*! 1.2.0 10dec96 (Jeroen Weesie/ICS) STB-37 dm46 * -- added clustered sampling (options -cluster-, -any-, -all-) * -- added option -keep- * 1.1.0 12nov95 (StataCorp) program define sample2 quiet version local caller = _result(1) /* ensure uniform0 is used */ version 5.0 local if "opt" local in "opt" local exp "req noprefix" local options "any all by(string) Cluster(str) Keep(str)" parse "=`*'" if "`keep'" != "" { confirm new var `keep' } if length("`any'`all'") > 3 { di in re "at most one of the options { any | all } may be specified" exit 198 } if "`by'" != "" { unabbrev `by' local by "$S_1" if "`in'" != "" { di in red "-in- may not be combined with -by-" exit 190 } local sort "sort `by'" local byp "by `by':" } confirm number `exp' if `exp' < 0 | `exp' > 100 { error 198 } if `exp' == 100 { exit } tempvar cid N rand tokeep touse if "`cluster'" != "" { confirm var `cluster' * ensure that -by- are constant within clusters if "`by'" != "" { parse "`by'", p(" ") while "`1'" != "" { sort `cluster' `1' qui capt by `cluster': assert `1'[1]==`1'[_N] if _rc { di in re "by(groupvars) should be constant in clusters" exit 198 } mac shift } } * select unique record within clusters sort `cluster' qui by `cluster' : gen byte `cid' = _n==1 * maintain cluster integreity with if/in if "`if'`in'" != "" { mark `touse' `if' `in' sort `cluster' `touse' if "`any'" != "" { qui by `cluster': replace `touse' = `touse'[_N] } else if "`all'" != "" { qui by `cluster': replace `touse' = `touse'[1] } else { * -touse- should be constant over clusters qui capture by `cluster': assert `touse'[1]==`touse'[_N] if _rc { di in re "if/in should select complete clusters " /* */ "(but see options -any- and -all-)" exit 198 } } sort `cluster' `touse' } else { gen byte `touse' = 1 } * adopt if/in local if "if `touse'==1 & `cid'==1" local in } else { if "`any'" != "" { di in bl "option -any- ignored" } if "`all'" != "" { di in bl "option -all- ignored" } } * "re-cycling" of StataCorp code `sort' qui `byp' gen byte `tokeep' = 0 `if' `in' qui count if `tokeep'==0 if _result(1)==0 { exit } qui `byp' gen long `N' = sum(`tokeep'==0) qui replace `tokeep' = 1 if `tokeep'==. qui `byp' replace `N' = int(`N'[_N]*`exp'/100+.5) if `caller' < 4.0 { gen float `rand' = uniform0() } else { gen float `rand' = uniform() } sort `by' `tokeep' `rand' qui `byp' replace `tokeep'=1 if `tokeep'==0 & _n<=`N' * -tokeep- should be constant within clusters if "`cluster'" != "" { sort `cluster' `tokeep' qui by `cluster' : replace `tokeep'=0 if `tokeep'[1]==0 } * drop cases or set keep/variable if "`keep'" != "" { confirm new var `keep' gen byte `keep' = `tokeep' } else { drop if `tokeep'==0 } end exit