GATHER

Name:

GATHER (LET) Type:

Let Subcommand Purpose:

Extract rows of a variable based on index values contained in another variable. Description:

With the GATHER command, we define a separate variable that contains the specific rows we want to extract.

If you modify the extracted data and want to save these modified values back to the original variable, you can use the SCATTER command. Enter HELP SCATTER for further information.

The first program example shows a simple artificial example to demonstrate the basic syntax. The second and third examples show non-trivial examples of the GATHER command.

Syntax:

Examples:

Default:

None Synonyms:

None Related Commands:

SCATTER	= Save data from a variable to specified rows of another variable based on an index variable.
SEQUENCE	= Generate a patterned sequence of values.
SORT	= Sort a column of numbers.
RANK	= Rank a column of numbers.
CODE	= Code a column of numbers.
SUBSET	= Specifies a subset to be included in a plot, analysis, or LET command.
RETAIN	= Retain specified rows or a subset of a variable.

Applications:

Data Transformation Implementation Date:

2008/11 Program 1:

 
let n = 30
let xseq = sequence 1 1 n
let x = normal rand numb for i = 1 1 n
let iindex = data 10  14  8  23  19
.
let y = gather x iindex
set write decimals 3
print xseq x iindex y

          1.000         -1.073         10.000          0.270
          2.000          0.573         14.000         -0.841
          3.000         -0.873          8.000          0.032
          4.000          0.234         23.000         -1.063
          5.000         -0.455         19.000          0.034
          6.000         -0.525          0.000          0.000
          7.000         -0.706          0.000          0.000
          8.000          0.032          0.000          0.000
          9.000          1.191          0.000          0.000
         10.000          0.270          0.000          0.000
         11.000         -0.149          0.000          0.000
         12.000         -0.197          0.000          0.000
         13.000         -0.243          0.000          0.000
         14.000         -0.841          0.000          0.000
         15.000         -0.104          0.000          0.000
         16.000          0.419          0.000          0.000
         17.000          0.264          0.000          0.000
         18.000          0.898          0.000          0.000
         19.000          0.034          0.000          0.000
         20.000          1.588          0.000          0.000
         21.000          0.389          0.000          0.000
         22.000         -0.470          0.000          0.000
         23.000         -1.063          0.000          0.000
         24.000         -0.027          0.000          0.000
         25.000         -0.464          0.000          0.000
         26.000          0.592          0.000          0.000
         27.000         -0.506          0.000          0.000
         28.000         -0.360          0.000          0.000
         29.000          0.499          0.000          0.000
         30.000          0.243          0.000          0.000

Program 2:

 
.  Purpose: Need to randomly sample bottles from a box of 81 bottles.
.
.           3 temperatures: -20, 5, 25
.           3 times:        1 day, 3 days, 7 days
.                           (7 days also has a temperature of -80)
.           4 levels
.
.           This gives us 2*3*4 + 1*4*4 = 40 combinations.  Generate a
.           sampling plan for 2 replications for each combination.
.
.  Step 0:  Set the random number generator
.
dimension 100 columns
set random number generator fibonacci congruential
seed 55609
set write decimals 0
.
.  Step 1:  Assign the combinations in arbitrary order
.
let temp = data -80 -20 -20 -20 5 5 5 25 25 25
let time = data   7   1   3   7 1 3 7  1  3  7
let ncomb1 = size temp
let nlevel = 4
let temp = combine temp temp temp temp
let time = combine time time time time
let level = sequence 1 ncomb1 1 nlevel
let ncomb2 = size temp
.
.  Step 2:  Randomly permute the combinations
.
let ntemp = size temp
let zperm = random permutation for i = 1 1 ntemp
let x1 = gather temp  zperm
let x2 = gather time  zperm
let x3 = gather level zperm
delete zperm
.
let nrepl = 2
loop for k = 2 1 nrepl
    let zperm = random permutation for i = 1 1 ntemp
    let x1t = gather temp  zperm
    let x2t = gather time  zperm
    let x3t = gather level zperm
    let x1 = combine x1 x1t
    let x2 = combine x2 x2t
    let x3 = combine x3 x3t
end of loop
let n  = size x1
.
.  Step 2:  Now randomize the vial selection
.
let nbottle = 81
let bottleid = sequence 1 1 nbottle
let xperm = random permutation for i = 1 1 nbottle
retain bottleid xperm  subset xperm <= n
.
.  Step 3:  Now match time/temp to bottle id
.
let temp2  = gather x1 xperm
let time2  = gather x2 xperm
let level2 = gather x3 xperm
.
write2 perm.out "Permutation   Temperature   Time   Level   Bottle-ID"
write2 perm.out "----------------------------------------------------"
set write format  F14.0,F11.0,F7.0,F8.0,F12.0
write2 perm.out xperm temp2 time2 level2 bottleid

Permutation   Temperature   Time   Level   Bottle-ID
----------------------------------------------------
           77.        25.     7.      4.          1.
           76.        25.     1.      1.          2.
           31.       -20.     7.      1.          3.
           69.         5.     1.      1.          4.
           51.       -20.     1.      1.          5.
           44.       -20.     3.      4.          6.
           79.        25.     3.      1.          7.
           41.        25.     3.      3.          8.
           54.         5.     3.      2.          9.
            1.       -80.     7.      4.         10.
           18.       -20.     3.      3.         11.
           25.        25.     3.      4.         12.
           10.         5.     3.      1.         13.
            9.        25.     1.      3.         14.
           58.         5.     7.      3.         15.
            4.         5.     3.      3.         16.
           34.        25.     3.      1.         17.
           73.       -80.     7.      1.         18.
           39.         5.     1.      4.         19.
           24.        25.     1.      2.         20.
           52.       -20.     3.      1.         21.
            3.         5.     1.      2.         22.
           49.         5.     3.      1.         23.
           57.        25.     7.      2.         24.
            8.        25.     7.      2.         25.
           38.       -20.     1.      2.         26.
           59.         5.     7.      1.         27.
           74.       -20.     7.      1.         28.
           26.        25.     1.      1.         29.
           72.         5.     1.      4.         30.
           19.       -20.     7.      2.         31.
           63.       -20.     1.      2.         32.
           23.         5.     1.      3.         33.
           75.       -80.     7.      4.         34.
           45.       -20.     7.      4.         35.
           47.         5.     3.      3.         36.
           71.       -20.     1.      4.         37.
           30.         5.     7.      4.         38.
           27.       -20.     1.      3.         39.
            6.         5.     3.      4.         40.
           70.         5.     7.      4.         41.
           29.        25.     3.      3.         42.
           46.       -20.     3.      2.         43.
           12.       -20.     3.      4.         44.
           32.         5.     1.      1.         45.
           66.       -80.     7.      3.         46.
           21.         5.     7.      2.         47.
           33.       -80.     7.      3.         48.
            5.        25.     7.      3.         49.
           65.         5.     1.      3.         50.
           80.       -20.     3.      3.         51.
           61.       -20.     7.      2.         53.
           11.         5.     7.      1.         54.
           60.        25.     3.      4.         55.
           14.         5.     3.      2.         56.
           55.        25.     7.      1.         57.
           16.       -20.     7.      4.         58.
           67.       -20.     7.      3.         59.
           62.       -20.     1.      3.         60.
           13.         5.     7.      3.         61.
            7.        25.     7.      4.         62.
           36.       -20.     1.      4.         63.
           28.       -80.     7.      2.         64.
           22.       -20.     3.      2.         65.
           48.         5.     3.      4.         66.
           20.        25.     1.      4.         67.
           64.        25.     7.      3.         68.
           15.       -20.     3.      1.         69.
           53.        25.     1.      2.         70.
           17.       -20.     7.      3.         71.
           50.       -80.     7.      2.         72.
           43.        25.     1.      3.         73.
           37.       -20.     1.      1.         74.
            2.        25.     7.      1.         75.
           68.        25.     3.      2.         76.
           40.        25.     3.      2.         77.
           42.        25.     1.      4.         78.
           78.         5.     1.      2.         79.
           56.         5.     7.      2.         80.
           35.       -80.     7.      1.         81.

Program 3:

 
. Purpose:  Reproduce the Reliability Analyses given at:
.
.           https://www.itl.nist.gov/div898/handbook/apr/section4/apr46.htm
.
.           This script shows a non-trivial use of the GATHER command
.           that greatly speeded up the analysis.
. Source:   Thanks to Jonathan H. Morgan for submitting this example.
. Description: Perform a weighted sampling with replacement.  This example
.              builds on the reliability analyses discussed in sections
.              8.3.1.5 and 8.4.6 of the NIST/SEMATECH e-Handbook of
.              Statistical Methods
.              (http://www.itl.nist.gov/div898/handbook).  In these
.              analyses, the authors constructed a posterior distribution
.              of Mean Time Between Failures (MTBF).  This is a common
.              approach because samples of equipment failure rates tend
.              to be small, making frequentist approaches impractical in
.              many cases.  Based on these analyses, the authors confirmed
.              that a 600 hour MTBF objective fell within a 80% credible
.              interval.  In addition to summarizing the posterior, it is
.              useful to average over our uncertainty to provide an
.              intuition about model trends.  What might we expect in the
.              future based on what we know of the prior and posterior?
.              We do this by performing a weighted re-sampling from the
.              prior and posterior distributions based on the prior and
.              posterior likelihoods using GATHER.
.
. Step 0:   Define the output devices and dimension the workspace
.
dimension 1000000 rows
.
device 2 close
let string fplot = gather_script.ps
set ipl1na ^fplot
device 2 postscript
.
. Step 1:  Initialize some plot control settings and the random number
.          generator
.
y2frame off; x2frame off
grid thickness 0.01; grid on
set random number generator fibonacci congruential
seed 33497
.
. Step 2:  Generate the Data for Mean Time Between Failures Analysis
.
let prob = sequence 0 0.001 0.999
let ngroup = size prob
let alpha = 4
let beta = 3309
.
. Step 3: Calculate the MTBF Posterior
.
let post = igappf(prob,alpha,0, beta) for i = 1 1 ngroup
retain post prob for i = 1 1 1000
.
.         Flipping Probabilities for the Purpose of Plotting
.
LET prob = REVERSE prob
.
.         Calculating Intervals for Plotting Purposes
.
let p09 = igappf(0.9,alpha,0, beta)
let p08 = igappf(0.8,alpha,0,beta)
let p05 = igappf(0.5,alpha,0,beta)
let p01 = igappf(0.1,alpha,0,beta)
.
. Step 4: Calculate the MTBF Posterior
.
.         Plotting to Confirm Results
.
x1label Mean Time Between Failures Objective
y1label Probability of Exceeding Objective
label size 2
y1label displacement 10
title Estimated Product Reliability
line color brown
.
plot prob post
line color blue
drawdata p09 0 p09 0.1
drawdata p05 0 p05 0.5
drawdata p08 0 p08 0.2
drawdata p01 0 p01 0.9
case lower
font complex
move 5 4
text alph()' = 2 + 2 fails, beta()' = 1400 + 1909 hours
move 63 4
text lamb() =
case asis
move 68 4
text 1/GSUP()-1
move 75 4
text (1/a, 4, 1/3309)

.
. Step 5: Sampling from the Posterior Distribution with Replacement Using
.         GATHER
.
.          Notes: The routine performs weighted samples by comparing the
.                 weights to values generated from a uniform random
.                 distribution scaled to encapsulate the range of the 
.                 weights, in this case the posterior values.  The
.                 routine's scale parameters reduce the number of maxloop
.                 draws necessary to generate the sample.  Random draws
.                 are included in the random sample if the sample's weight
.                 is greater than the random draw from the comparison
.                 distribution.  A macro of the routine such as
.                 weighted_macro facilitates performing weighted sampling
.                 with replacement.
.
.         Returning Probabilities Back to their Original Order
.
let prob = reverse prob
.
.         Specifying the Iteration and Location Parameters for Sampling
.         the Posterior
.
let n_samp = 10000
let maxloop = 10*n_samp
let loc = minimum post
let scale = maximum post
let i_loc = 1
let i_scale = size prob
.
.         Generate Two Sets of Uniform Random Numbers for Selecting Based
.         on Likelihood
.
let i_value = uniform random numbers for I = 1 1 maxloop
let i_value = loc + scale*i_value
.
let ind = uniform random numbers for i = 1 1 maxloop
let ind = i_loc + i_scale*ind
let index = ceil(ind) - 1
.
.         Now draw weighted samples from the posterior: post_sam
.
let index2 = sequence 1 1 maxloop
let val_i  = gather i_value index2
let val    = gather post index
.
let tag = 0 for i = 1 1 maxloop
let diff = val - val_i
let tag = 1 subset diff > 0
retain index subset tag = 1
retain index for i = 1 1 n_samp
let post_sam = gather prob index
.
delete loc scale i_loc i_scale i_value index ind index2 val val_i tag diff
.
.         Estimating Prior MTBF
.
let prior = igappf(prob, 2, 0, 1400) for i = 1 1 ngroup 
.
.         Specifying the iteration and location parameters for sampling
.         the prior
.
let maxloop = 1000000
let loc = minimum prior
let scale = maximum prior
let i_loc = 1
let i_scale = size prob
.
.         Generate two sets of uniform random numbers for selecting
.         based on likelihood
.
let i_value = uniform random numbers for i = 1 1 maxloop
let i_value = loc + scale*i_value
.
let ind = uniform random numbers for i = 1 1 maxloop
let ind = i_loc + i_scale*ind
let index = ceil(ind) - 1
.
.         Now draw weighted samples from the prior: pri_samp
.
let index2 = sequence 1 1 maxloop
let val_i  = gather i_value index2
let val    = gather post index
.
let tag = 0 for i = 1 1 maxloop
let diff = val - val_i
let tag = 1 subset diff > 0
retain index subset tag = 1
retain index for i = 1 1 n_samp
let pri_samp = gather prob index
delete loc scale i_loc i_scale i_value index ind index2 val val_i tag diff
.
.         Assessing the model by generating prior and posterior
.         predicative distributions
.
.         Generating predictions based on weighted samples of the
.         prior likelihoods
.
let n_sim = size pri_samp 
let pri_val = igappf(pri_samp, 2, 0, 1400) for i = 1 1 n_sim
.
.         Generating predictions based on weighted samples of the
.         posterior likelihoods
.
let n_sim = size post_sam 
let pred_val = igappf(post_sam, alpha, 0, beta) for i = 1 1 n_sim
.
.         Plotting Predicative Distributions
.
x1label Mean Time Between Failures Objective
y1label Density
x1limit 0 6000
y1limit 0 0.001
y1label displacement 12
major xtic mark number 5
major ytic mark number 3
title Weighted Predicative Distributions
line color brown blue
multiple kernel density plot pred_val pri_val
.
line thickness 0.2
line color blue
drawdata 3000 0.001 3500 0.001
movedata 3550 0.000985
hw 2 1
text Prior 
.
line color brown
drawdata 3000 0.00085 3500 0.00085
movedata 3550 0.00084
text Posterior 
line thickness 0.1
.