adopath ++ ~/todo/glm run prolog glmr /* -----------------------------------------------------------------------*/ /* macros affecting this tests behavior */ global DEBUG 1 /* delete this line to not list accuracy */ /* -----------------------------------------------------------------------*/ /* General programs */ program define mateq /* m1 m2 tolerance */ local m1 "`1'" local m2 "`2'" local tol=`3' tempname dif sc mat `dif' = `m1' - `m2' mat `dif' = `dif' ' * `dif' scalar `sc' = trace(`dif') capture assert `sc'<=`tol' if _rc { di in red "assertion is false; observed sq. dif. " `sc' exit 9 } if "$DEBUG" != "" { di "(" `sc' ")" } end /* -----------------------------------------------------------------------*/ /* coefficient and VCE storage and checking routines */ program define sto_reg /* name */ global H__NAME1 "`1'" mat H_b = get(_b) mat H_V = get(VCE) end program define chk_coef tempname b mat `b' = get(_b) di "checking coefficient vector against $H__NAME1" mateq H_b `b' `1' end program define chk_VCE tempname v mat `v' = get(VCE) di "checking VCE matrix against $H__NAME1" mateq H_V `v' `1' end program define chk_cV chk_coef `1' chk_VCE `2' end /* -----------------------------------------------------------------------*/ /* Routines to store macros and do above */ program define sto_E /* name macroname(s) ... */ sto_reg `1' global H__NAME2 "`1'" mac shift while "`1'"!="" { global H_`1' ${S_E_`1'} mac shift } end program define rm_E global H__NAME2 while "`1'"!="" { global H_`1' mac shift } global H__NAME1 capture mat drop H_b capture mat drop H_V end program define chk_E chk_cV `1' `1' mac shift di "checking S_E_ macros against $H__NAME2" while "`1'"!="" { capture assert "${H_`1'}"=="${S_E_`1'}" if _rc { di "S_E_`1' mismatch:" di "orig: ${H_`1'}" di " new: ${S_E_`1'}" exit 7003 } mac shift } end program define forglmr `*' disp k lno pow depv m ber cmd flo link fam /* */ dd dc dev chi2 nobs rdf msg1 msg2 msg3 end /* -----------------------------------------------------------------------*/ /* BEGIN TEST */ use auto regress mpg weight displ foreign sto_reg base_regress glmr mpg weight displ for chk_cV 1e-26 1e-14 * In the following tests, we are estimating on the foreign subsample * case 1, truth, we just keep the foreign subsample. * compare regress and glmr keep if foreign regress mpg weight displ sto_reg regress_on_foreign_sample glmr mpg weight displ chk_cV 1e-26 5e-12 forglmr sto_E glmr_on_foreign_sample * case 2: glmr ... if foreign; should produce identical results use auto, clear glmr mpg weight displ if for forglmr chk_E 0 * case 3: one RHS var set to missing if !for use auto, clear replace weight=. if !foreign glmr mpg weight displ forglmr chk_E 0 * case 3.1: randomize order of data gen u = uniform() set seed 4213267 /* make deterministic */ sort u glmr mpg weight displ forglmr chk_E 1e-26 /* small error because of summing? */ * case 4: make the second variable missing if !for: use auto, clear replace displ=. if !for glmr mpg weight displ forglmr chk_E 0 * case 5: make lhs variable missing if !for: use auto, clear replace mpg=. if !for glmr mpg weight displ forglmr chk_E 0 * case 6: distribute missing among on all variables, LHS and RHS use auto, clear set seed 39487389 /* make deterministic */ gen u = uniform() replace mpg=. if !for & u<=1/3 replace weight=. if !for & u<2/3 & mpg!=. replace displ = . if !for & mpg!=. & weight!=. rcof "assert mpg!=." != 0 rcof "assert weight!=." != 0 rcof "assert displ!=." != 0 assert mpg==. | weight==. | displ==. if !for assert mpg!=. & weight!=. & displ!=. if for glmr mpg weight displ forglmr chk_E 0 * -------------------------------------------------------------------- * analytically weighted regression * case 0: compare estimates for regress and glmr: use auto, clear reg mpg weight displ [aw=mpg] sto_reg regress_aw glmr mpg weight displ [aw=mpg] chk_cV 1e-14 1e-14 * case 1, truth, we just keep the foreign subsample. * compare regress and glmr keep if foreign regress mpg weight displ [aw=mpg] sto_reg regress_aw_foreign_sample glmr mpg weight displ [aw=mpg] chk_cV 1e-17 5e-11 forglmr sto_E glmr_aw_foreign_sample * case 2: glmr ... if foreign; should produce identical results use auto, clear glmr mpg weight displ [aw=mpg] if for forglmr chk_E 0 * case 3: one RHS var set to missing if !for use auto, clear replace weight=. if !foreign glmr mpg weight displ [aw=mpg] forglmr chk_E 0 * case 5: make lhs variable missing if !for: use auto, clear gen w = mpg replace mpg=. if !for glmr mpg weight displ [aw=w] forglmr chk_E 0 * case 5.1: make weight missing if !for use auto, clear gen w = mpg set seed 76635277 /* make deterministic */ replace w = . if !for & uniform()<.5 replace w = 0 if !for & w!=. rcof "assert w==0 if !for" != 0 rcof "assert w==. if !for" != 0 assert w==0 | w==. if !for glmr mpg weight displ [aw=w] forglmr chk_E 0 * case 6: distribute missing among on all variables, LHS and RHS, and weight use auto, clear gen w = mpg set seed 39487389 /* make deterministic */ gen u = uniform() replace mpg=. if !for & u<=1/4 replace weight=. if !for & u<2/4 & mpg!=. replace displ = . if !for & mpg!=. & weight!=. & u<3/4 replace w = . if !for & mpg!=. & weight!=. & displ!=. rcof "assert mpg!=." != 0 rcof "assert weight!=." != 0 rcof "assert displ!=." != 0 rcof "assert w!=." != 0 assert mpg==. | weight==. | displ==. | w==. if !for assert mpg!=. & weight!=. & displ!=. & w!=. if for replace w = 0 if w==. & uniform()>=.5 glmr mpg weight displ [aw=w] forglmr chk_E 0 * --------------------------------------------- * frequency-weighted regression use auto, clear expand rep78 drop if rep78==. glmr mpg weight displ forglmr sto_E glmr_fw_by_expand use auto, clear glmr mpg weight displ [fw=rep78] forglmr chk_E * linear regression case is certified. * --------------------------------------------- * Poisson regression use auto, clear poisson rep78 weight foreign sto_reg poisson glmr rep78 weight foreign, fam(poisson) chk_cV 1e-16 1e-16 set seed 299620891 /* make deterministic */ gen w = int(4*uniform())+1 replace w = 0 if uniform()<.10 replace w = . if uniform()<.10 drop if w==. | w==0 expand w glmr rep78 weight foreign, fam(poisson) forglmr sto_E glmr_poisson_by_expand use auto, clear set seed 299620891 /* same seed as above */ gen w = int(4*uniform())+1 replace w = 0 if uniform()<.10 replace w = . if uniform()<.10 glmr rep78 weight foreign [fw=w], fam(poisson) forglmr chk_E 1e-30 * --------------------------------------------- * logistic regression use auto, clear logit for mpg weight sto_reg logit glmr for mpg weight, fam(bin) chk_cV 1e-15 4e-10 set seed 299620891 /* make deterministic */ gen w = int(4*uniform())+1 logit for mpg weight [aw=w] sto_reg logit_aw glmr for mpg weight [aw=w], fam(bin) chk_cV 1e-13 1e-10 logit for mpg weight [fw=w] sto_reg logit_fw glmr for mpg weight [fw=w], fam(bin) chk_cV 1e-14 1e-10 forglmr sto_E glmr_logit_fw expand w glmr for mpg weight, fam(bin) forglmr chk_E 1e-26 /* logit is now certified, but we are going to poke around and verify once again that missing and 0 weights are handled in the same way: */ use auto, clear set seed 299620891 /* make deterministic */ gen w = int(4*uniform())+1 replace w = 0 if uniform()<.10 replace w = . if uniform()<.10 keep for mpg weight w tempfile sample save `sample' expand w drop if w==0 | w==. glmr for mpg weight, fam(bin) forglmr sto_E glmr_logit_fw_by_expand use `sample', clear glmr for mpg weight [fw=w], fam(bin) forglmr chk_E 1e-27 forglmr sto_E glmr_logit_fw=w /* now save this exact result */ * * we have just saved the exact result obtained from [fw=w]. In w * are 0's and missings. We will * (1) specify [fw=w] if w!=0 & w!=. * (2) use the data, drop w==0, specify [fw=w] * (3) use the data, drop w==., specify [fw=w] * (4) use the data, drop if w==0 | w==., specify [fw=w] * If the code is correct, all results will be EXACTLY the same. * quietly glmr for mpg weight [fw=w] if w!=0 & w!=., fam(bin) forglmr chk_E 0 /* should be exactly the same */ drop if w==0 quietly glmr for mpg weight [fw=w] if w!=0 & w!=., fam(bin) forglmr chk_E 0 /* should be exactly the same */ use `sample', clear drop if w==. quietly glmr for mpg weight [fw=w] if w!=0 & w!=., fam(bin) forglmr chk_E 0 /* should be exactly the same */ drop if w==0 quietly glmr for mpg weight [fw=w] if w!=0 & w!=., fam(bin) forglmr chk_E 0 /* should be exactly the same */ * --------------------------------------------- * probit regression use auto, clear probit for mpg weight sto_reg probit glmr for mpg weight, fam(bin) link(probit) chk_coef 5e-8 /* coefs tolerable */ chk_VCE .02 /* not a bug */ set seed 299620891 /* make deterministic */ gen w = int(4*uniform())+1 probit for mpg weight [aw=w] sto_reg probit_aw glmr for mpg weight [aw=w], fam(bin) link(p) chk_coef 1e-10 /* coefs okay */ chk_VCE .008 /* not a bug */ probit for mpg weight [fw=w] sto_reg probit_fw glmr for mpg weight [fw=w], fam(bin) link(p) chk_coef 1e-10 /* coefs okay */ chk_VCE .002 /* not a bug */ chk_cV 99 99 * --------------------- nbreg drop _all input cohort age_mos deaths exposure 1 1 168 278.4 1 2 48 538.8 1 3 63 794.4 1 4 89 1550.8 1 5 102 3006.0 1 6 81 8743.5 1 7 40 14270.0 2 1 197 403.2 2 2 48 786.0 2 3 62 1165.3 2 4 81 2294.8 2 5 97 4500.5 2 6 103 13201.5 2 7 39 19525.0 3 1 195 495.3 3 2 55 956.7 3 3 58 1381.4 3 4 85 2604.5 3 5 87 4618.5 3 6 70 9814.5 3 7 10 5802.5 end replace age = 0.5 if age==1 replace age = 4.5 if age==3 replace age = 9 if age==4 replace age = 18 if age==5 replace age = 42 if age==6 replace age = 90 if age==7 gen logexp = ln(exposure) quietly tab cohort, gen(coh) nbreg deaths coh2 coh3 mat b = get(_b) local alpha = exp(b[1,4]) mat b = b[1,1..3] mat v = get(VCE) mat v = v[1..3,1..3] mat colnames b = _:coh2 _:coh3 _:_cons mat colnames v = _:coh2 _:coh3 _:_cons mat rownames v = _:coh2 _:coh3 _:_cons mat post b v sto_reg doctored_nbreg glmr deaths coh2 coh3, fam(nb) link(log) k(`alpha') chk_cV 1e-10 1e-12 nbreg deaths coh2 coh3, ex(exposure) nolog mat b = get(_b) local alpha = exp(b[1,4]) mat b = b[1,1..3] mat v = get(VCE) mat v = v[1..3,1..3] mat colnames b = _:coh2 _:coh3 _:_cons mat colnames v = _:coh2 _:coh3 _:_cons mat rownames v = _:coh2 _:coh3 _:_cons mat post b v sto_reg doctored_nbreg_ex glmr deaths coh2 coh3, fam(nb) link(log) k(`alpha') lnoffset(ex) chk_cV 1e-9 .0001 /* looseness on VCE okay */ glmr deaths coh2 coh3, fam(nb) link(log) k(`alpha') offset(logexp) chk_cV 1e-9 .0001 /* looseness on VCE okay */ * ----------------------------------------- * exponential regression use auto, clear gen lw = ln(weight) ereg mpg lw foreign sto_reg ereg glmr mpg lw foreign, fam(gam) link(log) scale(1) nolog chk_coef 1e-10 /* coefficients okay */ chk_VCE 1.5 /* not a bug */ * ------------------------------------------- * minor odds and ends * verify glmr can replay: glmr * verify cannot reply when no estimates: discard rcof "noisily glmr" == 301 * null sample use auto, clear rcof "noisily glmr mpg weight displ if foreign==2" == 2000 * 1 observation rcof "noisily glmr mpg weight displ in 1" == 2001 * if and in do the same thing quietly glmr mpg weight displ if foreign==0 sort for forglmr sto_E glmr_done_with_if count if for==0 local top = _result(1) quietly glmr mpg weight displ in 1/`top' /* it is interesting, but not concerning */ forglmr chk_E 1e-20 /* that this cannot be 0 (exact equality) */ * R^2 = 1 sort for keep if _n==1 | _n==_N glmr mpg weight * ------------------------------------------- * fam(binomial #|name) tests * drop _all input pos pop x pos2 pop2 17 50 3 34 100 3 10 4 30 100 18 25 5 54 100 end blogit pos pop x sto_reg blogit glmr pos x, fam(binomial pop) chk_cV 1e-15 1e-15 blogit pos2 pop2 x sto_reg blogit2 glmr pos2 x, fam(binomial 100) chk_cV 1e-14 1e-10 * same thing, probit bprobit pos pop x sto_reg bprobit glmr pos x, fam(binomial pop) link(probit) chk_cV 1e-13 1e-6 bprobit pos2 pop2 x sto_reg bprobit2 glmr pos2 x, fam(binomial 100) link(probit) chk_cV 1e-13 1e-5 * nonsense cases rcof "noisily glmr pos x, fam(binomial 0)" == 198 rcof "noisily glmr pos x, fam(binomial -1)" == 198 rcof "noisily glmr pos x, fam(binomial .25)" == 198 rcof "noisily glmr pos x, fam(binomial novar)" == 111 gen bad = .1 rcof "noisily glmr pos x, fam(binomial bad)" == 499 replace bad = -2 rcof "noisily glmr pos x, fam(binomial bad)" == 499 replace bad = 0 rcof "noisily glmr pos x, fam(binomial bad)" == 499 replace bad = cond(_n==1, pos-2, pos+5) rcof "noisily glmr pos x, fam(binomial bad)" == 499 rcof "noisily glmr pos x, fam(binomial)" == 499