//use "E:\CSTOR_Research\DataSet\NIS\Year_2010\NIS_2010_Core" , clear set more off // Benchmarks for various programming technqiues // As presented by Joe Canner at Stata Conference, New Orleans, July 19, 2013 // Compare different methods of seeing if an ICD9 code is in a list of values // foreach value in timer clear 1 timer on 1 gen FOREACH=0 forvalues x = 1/15 { foreach value in "7359" "741" "9955" "640" { //foreach value in "7359" "741" "9955" "640" "9904" "8154" "7569" "3893" { replace FOREACH=1 if PR`x'=="`value'" } } timer off 1 timer list 1 // OR timer clear 2 timer on 2 gen IFOR=0 forvalues x = 1/15 { replace IFOR=1 if PR`x'=="7359" | PR`x'=="741" | PR`x'=="9955" | PR`x'=="640" //replace IFOR=1 if PR`x'=="7359" | PR`x'=="741" | PR`x'=="9955" | PR`x'=="640" | PR`x'=="9904" | PR`x'=="8154" | PR`x'=="7569" | PR`x'=="3893" } timer off 2 timer list 2 //inlist timer clear 3 timer on 3 gen INLIST=0 forvalues x = 1/15 { replace INLIST=1 if inlist(PR`x',"7359","741","9955","640") //replace INLIST=1 if inlist(PR`x',"7359","741","9955","640","9904","8154","7569","3893") } timer off 3 timer list 3 //foreach instead of forvalues timer clear 4 timer on 4 gen VARLIST=0 foreach x of varlist PR1-PR15 { replace VARLIST=1 if inlist(`x',"7359","741","9955","640") //replace VARLIST=1 if inlist(`x',"7359","741","9955","640","9904","8154","7569","3893") } timer off 4 timer list 4 tab1 FOREACH IFOR INLIST VARLIST // INLIST vs. RECODE for single ICD9s //inlist timer clear 10 timer on 10 gen INLIST1=0 replace INLIST1=1 if inlist(PR1,"7359","741","9955","640","9904","8154","7569","3893") timer off 10 timer list 10 //destring+recode timer clear 9 timer on 9 destring PR1, gen(tempPR1) ignore("incvl") recode tempPR1 (7359 741 9955 640 9904 8154 7569 3893 = 1) (else=0), gen(DESTRING_RECODE) drop tempPR1 timer off 9 timer list 9 //real()+recode timer clear 14 timer on 14 gen tempPR1=real(PR1) //destring PR1, gen(tempPR1) ignore("incvl") recode tempPR1 (7359 741 9955 640 9904 8154 7569 3893 = 1) (else=0), gen(REAL_RECODE) drop tempPR1 timer off 14 timer list 14 tab1 INLIST1 DESTRING_RECODE REAL_RECODE,missing // IFOR/INLIST vs RECODE for single ICD9s with ranges // split+destring+recode timer clear 12 timer on 12 split ECODE1, generate(nECODE) parse(E) destring nECODE2, gen(iECODE1) drop nECODE1-nECODE2 recode iECODE1 (9200/9209 956 966 986 974 =1) (8800/8869 888 9570/9579 9681 9870 =2) /// (9220/9223 9228 9229 9550/9554 9650/9654 9794 9850/9854 970=3) (8100/8199 9585 9685 9885=4), gen(mech1) recode mech1 (5/10000 =5) timer off 12 timer list 12 // substring+real+recode timer clear 15 timer on 15 gen inECODE1=real(substr(ECODE1,2,4)) recode inECODE1 (9200/9209 956 966 986 974 =1) (8800/8869 888 9570/9579 9681 9870 =2) /// (9220/9223 9228 9229 9550/9554 9650/9654 9794 9850/9854 970=3) (8100/8199 9585 9685 9885=4), gen(mech2) recode mech2 (5/10000 =5) timer off 15 timer list 15 // Using combination if AND, OR, INLIST timer clear 13 timer on 13 gen mech3=. replace mech3=1 if (ECODE1>="E9200" & ECODE1<="E9209") | inlist(ECODE1,"E956","E966","E986","E974") replace mech3=2 if (ECODE1>="E8800" & ECODE1<="E8869") | (ECODE1>="E9570" & ECODE1<="E9579") | /// inlist(ECODE1,"E888","E9681","E9870") replace mech3=3 if (ECODE1>="E9220" & ECODE1<="E9223") | (ECODE1>="E9550" & ECODE1<="E9554") |(ECODE1>="E9650" & ECODE1<="E9654") | /// (ECODE1>="E9850" & ECODE1<="E9854") | inlist(ECODE1,"E9228","E9229","E9794","E970") replace mech3=4 if (ECODE1>="E8100" & ECODE1<="E8199") | inlist(ECODE1,"E9585","E9685","E9885") replace mech3=5 if mech3==. & substr(ECODE1,1,1)=="E" timer off 13 timer list 13 // Using inrange instead of >= & <= timer clear 16 timer on 16 gen mech4=. replace mech4=1 if inrange(ECODE1,"E9200","E9209") | inlist(ECODE1,"E956","E966","E986","E974") replace mech4=2 if inrange(ECODE1,"E8800","E8869") | inrange(ECODE1,"E9570","E9579") | inlist(ECODE1,"E888","E9681","E9870") replace mech4=3 if inrange(ECODE1,"E9220","E9223") | inrange(ECODE1,"E9550","E9554") | inrange(ECODE1,"E9650","E9654") | /// inrange(ECODE1,"E9850","E9854") | inlist(ECODE1,"E9228","E9229","E9794","E970") replace mech4=4 if inrange(ECODE1,"E8100","E8199") | inlist(ECODE1,"E9585","E9685","E9885") replace mech4=5 if mech4==. & substr(ECODE1,1,1)=="E" timer off 16 timer list 16 tab1 mech1 mech2 mech3 mech4, missing // Compare the recode statement with a bunch of replaces // For categorizing continuous variables timer clear 4 timer on 4 gen AGE1=. replace AGE1=1 if AGE>=0 & AGE <=9 replace AGE1=2 if AGE>=10 & AGE <=19 replace AGE1=3 if AGE>=20 & AGE <=29 replace AGE1=4 if AGE>=30 & AGE <=39 replace AGE1=5 if AGE>=40 & AGE <=49 replace AGE1=6 if AGE>=50 & AGE <=59 replace AGE1=7 if AGE>=60 & AGE <=69 replace AGE1=8 if AGE>=70 & AGE <=79 replace AGE1=9 if AGE>=80 & AGE <=89 replace AGE1=10 if AGE>=90 & AGE <=120 timer off 4 timer list 4 timer clear 5 timer on 5 gen AGE2=recode(AGE,9,19,29,39,49,59,69,79,89,120) timer off 5 timer list 5 timer clear 11 timer on 11 recode AGE (0/9=1) (10/19=2) (20/29=3) (30/39=4) (40/49=5) (50/59=6) (60/69=7) (70/79=8) (80/89=9) (90/120=10), gen(AGE3) timer off 11 timer list 11 tab AGE AGE1, missing tab AGE AGE2, missing tab AGE AGE3, missing // For reodering gen sex=3-FEMALE replace sex=1 if FEMALE==. tab FEMALE sex, missing gen sex_new=sex timer clear 6 timer on 6 replace sex_new=0 if sex_new==3 replace sex_new=5 if sex_new==2 replace sex_new=4 if sex_new==1 replace sex_new=1 if sex_new==5 replace sex_new=2 if sex_new==4 timer off 6 timer list 6 timer clear 7 timer on 7 recode sex (3=0) (1=2) (2=1), gen(sex_new1) timer off 7 timer list 7 gen sex_new2=sex timer clear 8 timer on 8 replace sex_new2=0 if sex==3 replace sex_new2=2 if sex==1 replace sex_new2=1 if sex==2 timer off 8 timer list 8 tab sex sex_new tab sex sex_new1 tab sex sex_new2 // Simulate MRN number situation and compare destring with real(subinstr()) replace PR1="" if inlist(PR1,"incn","invl") replace PR2="" if inlist(PR2,"incn","invl") replace PR3="" if inlist(PR3,"incn","invl") gen MRN=PR1+"-"+PR2+"-"+PR3 if PR1!="" & PR2!="" & PR3!="" timer clear 20 timer on 20 destring MRN, ignore("-") gen(newMRN1) timer off 20 timer list 20 timer clear 21 timer on 21 gen long newMRN2=real(subinstr(MRN,"-","",.)) timer off 21 timer list 21