// Create the simulated data clear set obs 25000 local sampleN = 500 set seed 83573 gen kwh = exp(rnormal(6.4, 0.65)) // kwh usage gen savepct = -0.61 - 0.00014*kwh + 0.14 * sqrt(kwh) // looks realistic to me replace savepct = savepct + rnormal(0,0.5) // gives r = 0.9 with kwh // Population regression relationship gen sqrtk = sqrt(kwh) regress savepct kwh sqrtk // The true populatioh relationship // // Sample the data, pps, and run a regression model quiet summ kwh, detail gen pps = `sampleN' * kwh/r(sum) // sampling prob to get pps and n = 500 // User written -gsample- , see -findit gsample- gsample `sampleN' [aw = pps], gen(picked_pps) wor gen pwt = 1/pps svyset _n [pweight = pwt] svy: mean savepct if picked_pps svy: regress savepct kwh sqrtk if picked_pps // // Repeat analysis with simple random sampling svyset, clear gsample `sampleN', gen(picked_psrs) wor gen psrs = `sampleN'/`=_N' // sampling prob replace pwt = 1/psrs svyset _n [pweight = pwt] svy: mean savepct if picked_psrs svy: regress savepct kwh sqrtk if picked_psrs Thanks, =-=-=-=-=-=-=-=-=-=-=-=-= Mike Lacy, Assoc. Prof. Soc. Dept., Colo. State. Univ. Fort Collins CO 80523 USA

