#delim ; * Certify cendif using the auto data. This certification script uses the following STB additions: jknife (from sg34 in STB-24) *; * Begin certification script *; cscript "cendif: Robust CIs for median and other percentile differences" adofile cendif; * Input auto data set into memory and define additional variables. *; use auto,clear;desc; * Create manuf variable *; gene str18 manuf=ltrim(make); replace manuf=substr(manuf,1,index(manuf," ")-1); replace manuf=make if(index(make," ")<=1); lab var manuf "Manufacturer"; tab manuf,missing; * Create test weight variable *; gene wtest=mod(_n,5); lab var wtest "Test weight variable"; tab wtest,missing; * Define if and in test expressions *; global iftest="if(mod(_n,2))"; global intest="in 33/64"; tab wtest $intest $iftest,missing; * Define programs ci_conf and ci_comp to confirm and compare confidence interval matrices *; capt prog drop ci_conf; program define ci_conf,rclass; args cand; * Check that a candidate CI matrix is indeed a CI matrix (with percents in its left column and CIs in the next 3) *; tempname perc ci pc cimin cimid cimax; local nrow=rowsof(`cand'); scal `perc'=1;scal `ci'=1; local i1=0; while(`i1'<`nrow'){local i1=`i1'+1; scal `pc'=`cand'[`i1',1];scal `cimid'=`cand'[`i1',2]; scal `cimin'=`cand'[`i1',3];scal `cimax'=`cand'[`i1',4]; scal `perc'=`perc'&(0<`pc')&(`pc'<100); scal `ci'=`ci'&(`cimin'<=`cimid')&(`cimid'<=`cimax'); }; * Return results. r(perc) confirms that the first column are all percents r(ci) confirms that columns 2-4 contain CIs *; return scalar perc=`perc';return scalar ci=`ci'; end; capt prog drop ci_comp; program define ci_comp,rclass; args left right; * Check whether two CI matrices contain equal CIs, whether the CIs are centred on the same midpoints, and whether the left CIs are inside the right CIc *; tempname equal eqmid inside lmin lmid lmax rmin rmid rmax; local nrow=rowsof(`left'); scal `equal'=1;scal `eqmid'=1;scal `inside'=1; local i1=0; while(`i1'<`nrow'){local i1=`i1'+1; scal `lmid'=`left'[`i1',2];scal `rmid'=`right'[`i1',2]; scal `lmin'=`left'[`i1',3];scal `rmin'=`right'[`i1',3]; scal `lmax'=`left'[`i1',4];scal `rmax'=`right'[`i1',4]; scal `equal'=`equal'&(`lmid'==`rmid')&(`lmin'==`rmin')&(`lmax'==`rmax'); scal `eqmid'=`eqmid'&(`lmid'==`rmid'); scal `inside'=`inside'&(`lmin'>=`rmin')&(`lmax'<=`rmax'); }; * Return results. r(equal) confirms that the two CIs are equal r(eqmid) confirms that the midpoints are equal r(inside) confirms that the left CIs are inside the right CIs *; return scalar equal=`equal';return scalar eqmid=`eqmid'; return scalar inside=`inside'; end; * Test sequence 1. Failure modes. *; * Invalid using data set *; rcof "noi cendif weight using auto,by(foreign)"==498; * No observations *; rcof "noi cendif weight if(0),by(foreign)"==2000; * Too few groups *; rcof "noi cendif weight if(foreign),by(foreign)"==420; * Too many groups *; rcof "noi cendif weight,by(rep78)"==420; * Invalid transformation for Somers' D *; rcof "noi cendif weight,by(foreign) tr(rho)"==498; * Test sequence 2. Check that cendif X,by(foreign) creates the same median difference as cid X,by(foreign) median unpaired. *; for varlist price mpg rep78 hdroom trunk weight length turn displ gratio foreign: cendif X,by(foreign)\matr ci=r(cimat)\scal meddif1=ci[1,2]\ cid X,by(foreign) median unpaired\scal meddif2=r(p50)\assert meddif1==meddif2; capture program drop cidtes1; program define cidtes1; syntax varname(numeric) [if] [in]; local y `varlist'; * Test cendif against cid for all variables in auto data *; marksample touse; cid `y' if[`touse'],by(foreign) median unpaired; matr def cidmed=J(9,1,0); matr cidmed[1,1]=r(p1);matr cidmed[2,1]=r(p5); matr cidmed[3,1]=r(p10);matr cidmed[4,1]=r(p25); matr cidmed[5,1]=r(p50); matr cidmed[6,1]=r(p75);matr cidmed[7,1]=r(p90); matr cidmed[8,1]=r(p95);matr cidmed[9,1]=r(p99); cendif `y' if[`touse'],by(foreign) ce(1,5,10,25,50,75,90,95,99); matr def cimat=r(cimat);matr def cenmed=cimat[1...,2]; ci_conf cimat;assert r(ci)&r(perc); assert mreldif(cidmed,cenmed)==0; qui cendif `y' if[`touse'],by(foreign) ce(1,5,10,25,50,75,90,95,99) tr(asin); matr def cimat=r(cimat);matr def cenmed=cimat[1...,2]; ci_conf cimat;assert r(ci)&r(perc); assert mreldif(cidmed,cenmed)==0; qui cendif `y' if[`touse'],by(foreign) ce(1,5,10,25,50,75,90,95,99) tr(iden); matr def cimat=r(cimat);matr def cenmed=cimat[1...,2]; ci_conf cimat;assert r(ci)&r(perc); assert mreldif(cidmed,cenmed)==0; qui cendif `y' if[`touse'],by(foreign) ce(1,5,10,25,50,75,90,95,99) tr(z); matr def cimat=r(cimat);matr def cenmed=cimat[1...,2]; ci_conf cimat;assert r(ci)&r(perc); assert mreldif(cidmed,cenmed)==0; end; for varlist price mpg rep78 hdroom trunk weight length turn displ gratio foreign: cidtes1 X\qui cidtes1 X $iftest\qui cidtes1 X $intest\qui cidtes1 X $iftest $intest; * Test sequence 3. Check that in and if work as they should. *; capture prog drop iites1; program define iites1; syntax [fweight iweight pweight] [,CLuster(passthru) TRansf(passthru) TDist]; * In alone *; disp "In-range alone"; disp "Weights: [`weight'`exp']"; cendif mpg [`weight'`exp'] $intest,by(foreign) ce(25 50 75) `cluster' `transf' `tdist'; matr def ci=r(cimat);matr def Ds=r(Dsmat); scal N=r(N);scal N_1=r(N_1);scal N_2=r(N_2); ci_conf ci;assert r(perc)&r(ci); ci_conf Ds;assert r(perc)&r(ci); preserve;keep $intest; qui cendif mpg [`weight'`exp'],by(foreign) ce(25 50 75) `cluster' `transf' `tdist'; assert mreldif(ci,r(cimat))==0;assert mreldif(Ds,r(Dsmat))==0; assert N==r(N);assert N_1==r(N_1);assert N_2==r(N_2); restore; * If alone *; disp "If-expression alone"; disp "Weights: [`weight'`exp']"; cendif mpg [`weight'`exp'] $iftest,by(foreign) ce(25 50 75) `cluster' `transf' `tdist'; matr def ci=r(cimat);matr def Ds=r(Dsmat); scal N=r(N);scal N_1=r(N_1);scal N_2=r(N_2); preserve;keep $iftest; qui cendif mpg [`weight'`exp'],by(foreign) ce(25 50 75) `cluster' `transf' `tdist'; assert mreldif(ci,r(cimat))==0;assert mreldif(Ds,r(Dsmat))==0; assert N==r(N);assert N_1==r(N_1);assert N_2==r(N_2); restore; * In and if *; disp "In-range and if-expression together"; disp "Weights: [`weight'`exp']"; cendif mpg [`weight'`exp'] $intest $iftest,by(foreign) ce(25 50 75) `cluster' `transf' `tdist'; matr def ci=r(cimat);matr def Ds=r(Dsmat); scal N=r(N);scal N_1=r(N_1);scal N_2=r(N_2); preserve;keep $intest $iftest; qui cendif mpg [`weight'`exp'],by(foreign) ce(25 50 75) `cluster' `transf' `tdist'; assert mreldif(ci,r(cimat))==0;assert mreldif(Ds,r(Dsmat))==0; assert N==r(N);assert N_1==r(N_1);assert N_2==r(N_2); restore; end; capture prog drop iites2; program define iites2; syntax [,TRansf(passthru) TDist]; * Call iites1 for various cluster and weight options *; * Unclustered analyses *; iites1,`transf' `tdist'; iites1 [fwei=wtest],`transf' `tdist'; iites1 [iwei=wtest],`transf' `tdist'; iites1 [pwei=wtest],`transf' `tdist'; * Clustered analyses *; iites1,`transf' `tdist'; iites1 [fwei=wtest],cl(manuf) `transf' `tdist'; iites1 [iwei=wtest],cl(manuf) `transf' `tdist'; iites1 [pwei=wtest],cl(manuf) `transf' `tdist'; end; * Call iites2 for different values of transf and tdist *; iites2; iites2,tdist; qui iites2,tr(asin); qui iites2,tr(asin) tdist; qui iites2,tr(iden); qui iites2,tr(iden) tdist; qui iites2,tr(z); qui iites2,tr(z) tdist; * Test sequence 4. Check that normal-based CIs are always inside corresponding CIs based on the t-distribution, and that CIs based on uniformly scaled-up weights are inside the CI based on unscaled weights in the case of unclustered fweights, and equal to the CI based on unscaled weights in the case of all other weights *; capt prog drop instes1; program define instes1; syntax [if] [in] [fweight iweight pweight] [,TRansf(passthru) CLuster(passthru)]; local tiny=1e-8; * Define scaled weighting expression *; local scfac=4; if("`exp'"==""){local scexp=`scfac';};else{local scexp "`exp'*`scfac'";}; disp "Weights: [`weight'`exp']"; cendif weight `in' `if' [`weight'`exp'],by(foreign) ce(20(10)80) `transf' `cluster' tdist; matr def Ds11=r(Dsmat);matr def ci11=r(cimat); scal N_11=r(N);scal N_1_11=r(N_1);scal N_2_11=r(N_2); disp "Weights: [`weight'`exp']"; cendif weight `in' `if' [`weight'`exp'],by(foreign) ce(20(10)80) `transf' `cluster'; matr def Ds12=r(Dsmat);matr def ci12=r(cimat); scal N_12=r(N);scal N_1_12=r(N_1);scal N_2_12=r(N_2); disp "Weights: [`weight'`scexp']"; cendif weight `in' `if' [`weight'`scexp'],by(foreign) ce(20(10)80) `transf' `cluster' tdist; matr def Ds21=r(Dsmat);matr def ci21=r(cimat); scal N_21=r(N);scal N_1_21=r(N_1);scal N_2_21=r(N_2); disp "Weights: [`weight'`scexp']"; cendif weight `in' `if' [`weight'`scexp'],by(foreign) ce(20(10)80) `transf' `cluster'; matr def Ds22=r(Dsmat);matr def ci22=r(cimat); scal N_22=r(N);scal N_1_22=r(N_1);scal N_2_22=r(N_2); * Comparisons between t-distributed and corresponding normal numbers and CIs *; assert N_12==N_11;assert N_22==N_21; assert N_1_12==N_1_11;assert N_1_22==N_1_21; assert N_2_12==N_2_11;assert N_2_22==N_2_21; ci_comp Ds12 Ds11;assert r(inside)&r(eqmid)&(!r(equal)); ci_comp Ds22 Ds21;assert r(inside)&r(eqmid)&(!r(equal)); ci_comp ci12 ci11;assert r(inside)&r(eqmid); ci_comp ci22 ci21;assert r(inside)&r(eqmid); * Comparisons between scaled and unscaled numbers and CIs *; if(("`weight'"=="fweight")&("`cluster'"=="")){ assert N_21==N_11*`scfac';assert N_22==N_12*`scfac'; assert N_1_21==N_1_11*`scfac';assert N_1_22==N_1_12*`scfac'; assert N_2_21==N_2_11*`scfac';assert N_2_22==N_2_12*`scfac'; ci_comp Ds21 Ds11;assert r(inside)&r(eqmid)&(!r(equal)); ci_comp Ds22 Ds12;assert r(inside)&r(eqmid)&(!r(equal)); ci_comp ci21 ci11;assert r(inside)&r(eqmid)&(!r(equal)); ci_comp ci22 ci12;assert r(inside)&r(eqmid)&(!r(equal)); }; else{ * Other kinds of weights or clustered fweights *; if("`weight'"=="fweight"){ * Clustered fweights *; assert N_21==N_11*`scfac';assert N_22==N_12*`scfac'; assert N_1_21==N_1_11*`scfac';assert N_1_22==N_1_12*`scfac'; assert N_2_21==N_2_11*`scfac';assert N_2_22==N_2_12*`scfac'; }; else{ * Clustered or unclustered iweights or pweights *; assert N_21==N_11;assert N_22==N_12; assert N_1_21==N_1_11;assert N_1_22==N_1_12; assert N_2_21==N_2_11;assert N_2_22==N_2_12; }; assert mreldif(Ds11,Ds21)<`tiny'; assert mreldif(Ds12,Ds22)<`tiny'; ci_comp ci11 ci21;assert r(equal); ci_comp ci12 ci22;assert r(equal); }; end; capture program drop instes2; program define instes2; syntax [,TRansf(passthru) CLuster(passthru)]; * Call instes1 with different weights *; instes1 [fwei=1],`cluster' `transf'; instes1 [iwei=1],`cluster' `transf'; instes1 [pwei=1],`cluster' `transf'; qui instes1 [fwei=wtest],`cluster' `transf'; qui instes1 [iwei=wtest],`cluster' `transf'; qui instes1 [pwei=wtest],`cluster' `transf'; end; instes2; instes2,cl(manuf); qui instes2,tr(asin); qui instes2,tr(asin) cl(manuf); qui instes2,tr(iden); qui instes2,tr(iden) cl(manuf); qui instes2,tr(z); qui instes2,tr(z) cl(manuf); * Test sequence 5. Show that an unweighted analysis after expanding by weights gives the same result as a fweighted analysis in all cases, and the same result as an iweighted or pweighted analysis in the clustered case *; capture program drop extes1; program define extes1; * Compare expanded analysis with fweighted analysis and with iweighted and pweighted analyses if not clustered *; syntax [if] [in] [, CLuster(passthru) TRansf(passthru) TDist ]; preserve; if(("`if'"!="")|("`in'"!="")){keep `if' `in';}; keep if(wtest>=1);expand wtest; disp "Subsetting expression: (`if' `in')"; disp "Expanded analysis"; cendif displ,by(foreign) ce(25 50 75) `cluster' `transf' `tdist'; scal N__0=r(N);scal N_1__0=r(N_1);scal N_2__0=r(N_2); matr def ci0=r(cimat);matr def Ds0=r(Dsmat); restore; * fweights *; disp "Subsetting expression: (`if' `in')"; disp "Analysis with fweight=wtest"; cendif displ `if' `in' [fwei=wtest],by(foreign) ce(25 50 75) `cluster' `transf' `tdist'; scal N__1=r(N);scal N_1__1=r(N_1);scal N_2__1=r(N_2); matr def ci1=r(cimat);matr def Ds1=r(Dsmat); assert N__1==N__0; assert N_1__1==N_1__0;assert N_2__1==N_2__0; ci_comp ci0 ci1;assert r(equal); ci_comp Ds0 Ds1;assert r(equal); if("`cluster'"!=""){ * iweights *; disp "Subsetting expression: (`if' `in')"; disp "Analysis with iweight=wtest"; cendif displ `if' `in' [iwei=wtest],by(foreign) ce(25 50 75) `cluster' `transf' `tdist'; scal N__1=r(N);scal N_1__1=r(N_1);scal N_2__1=r(N_2); matr def ci1=r(cimat);matr def Ds1=r(Dsmat); ci_comp ci0 ci1;assert r(equal); ci_comp Ds0 Ds1;assert r(equal); * pweights *; disp "Subsetting expression: (`if' `in')"; disp "Analysis with pweight=wtest"; cendif displ `if' `in' [pwei=wtest],by(foreign) ce(25 50 75) `cluster' `transf' `tdist'; scal N__1=r(N);scal N_1__1=r(N_1);scal N_2__1=r(N_2); matr def ci1=r(cimat);matr def Ds1=r(Dsmat); ci_comp ci0 ci1;assert r(equal); ci_comp Ds0 Ds1;assert r(equal); }; end; capture program drop extes2; program define extes2; * Call extes1 under a variety of settings *; syntax [,TRansf(passthru) TDist]; extes1,`transf' `tdist'; extes1,cl(make) `transf' `tdist'; qui extes1 $intest,`transf' `tdist'; qui extes1 $intest,cl(make) `transf' `tdist'; qui extes1 $iftest,`transf' `tdist'; qui extes1 $iftest,cl(make) `transf' `tdist'; extes1 $intest $iftest,`transf' `tdist'; extes1 $intest $iftest,cl(make) `transf' `tdist'; end; * Call extes2 under a variety of settings *; extes2; qui extes2,tdist; qui extes2,tr(asin); qui extes2,tdist tr(asin); qui extes2,tr(iden); qui extes2,tdist tr(iden); qui extes2,tr(z); qui extes2,tdist tr(z); * Test sequence 6. Show that adding a constant to all of one group changes the percentile differences by the constant, in a direction depending on the group to which the constant is added *; capture program drop contes1; program define contes1; syntax,COnlis(numlist); local nconst:word count `conlis'; cendif weight,by(foreign) ce(25 50 75); matr def ci0=r(cimat);matr def Ds0=r(Dsmat); local i1=0; while(`i1'<`nconst'){local i1=`i1'+1; local const:word `i1' of `conlis'; gene wstar=weight+`const'*(!foreign); lab var wstar "Weight (lbs) + `const'"; matr def ci1=ci0; matr def ci1[1,2]=ci1[1...,2..4]+J(3,3,`const'); cendif wstar,by(foreign) ce(25 50 75); matr def ci2=r(cimat);matr def Ds2=r(Dsmat); ci_comp ci1 ci2;assert r(equal); ci_comp Ds0 Ds2;assert r(equal); drop wstar; }; end; contes1,co(-300.75 -200.50 -100.25 100.25 200.50 300.75); * Test sequence 7. Show that cendif produces infinite confidence limits when required *; capture program drop inftes1; program define inftes1; syntax [,TRansf(passthru) EForm]; * Define infinity *; local infty=1e300; cendif weight if((make=="AMC Concord")|(make=="Audi 5000")), by(foreign) centile(0.00000001 99.99999999) `transf' `eform'; matr cimat=r(cimat);ci_conf cimat;assert r(perc)&r(ci); scal minfty=cimat[1,3]; if("`eform'"==""){assert minfty==-`infty';}; else{assert minfty==0;}; scal pinfty=cimat[2,4];assert pinfty==`infty'; end; inftes1;inftes1,eform; qui inftes1,tr(asin);qui inftes1,tr(asin) eform; qui inftes1,tr(iden);qui inftes1,tr(iden) eform; qui inftes1,tr(z);qui inftes1,tr(z) eform; * Test sequence 8. Show that the eform option produces consistent confidence intervals *; gene logwt=log(weight); capture program drop eftes1; program define eftes1; syntax[,TRansf(passthru) ]; cendif logwt,by(foreign) ce(0.0001 10(10)90 99.9999) eform `transf'; matr cimat=r(cimat);ci_conf cimat;assert r(perc)&r(ci); cendif logwt,by(foreign) ce(0.0001 10(10)90 99.9999) eform level(90) `transf'; matr cimat=r(cimat);ci_conf cimat;assert r(perc)&r(ci); cendif logwt,by(foreign) ce(0.0001 10(10)90 99.9999) eform level(99) `transf'; matr cimat=r(cimat);ci_conf cimat;assert r(perc)&r(ci); end; eftes1; qui eftes1,tr(asin); qui eftes1,tr(iden); qui eftes1,tr(z); drop logwt; exit;