

clear all
set more off
version 16
set seed 16

// Define global macros for graph dimensions
global Width16x9 = 1920*2
global Height16x9 = 1080*2
global Width4x3 = 1440*2
global Height4x3 = 1080*2

// The global macros "$GoogleDriveLaptop" and "$GoogleDriveWork"
// are defined my profile here:  c:\ado\profile.do
capture cd "$GoogleDriveWork"
capture cd "$GoogleDriveLaptop"
capture cd "$Presentations"
capture cd ".\Talks\AllTalks\01_DataManagement2\examples"

capture log close
log using DataManagement2, replace



// IMPORT THE DEMOGRAPHIC DATA FROM THE 2013-2014 NHANES COHORT
// ============================================================
clear
import sasxport5 "https://wwwn.cdc.gov/Nchs/Nhanes/2013-2014/DEMO_H.XPT"
describe seqn ridageyr riagendr ridreth1
summarize seqn ridageyr riagendr ridreth1
sort seqn
save DEMO_2013_2014.dta, replace
browse seqn ridageyr riagendr ridreth1 



// IMPORT THE DEMOGRAPHIC DATA FROM THE 2013-2014 NHANES COHORT
// AND APPEND THE DATA FROM THE 2015-2016 COHORT
// ============================================================
clear
import sasxport5 "https://wwwn.cdc.gov/Nchs/Nhanes/2013-2014/DEMO_H.XPT"
describe seqn ridageyr riagendr ridreth1
summarize seqn ridageyr riagendr ridreth1
sort seqn
save DEMO_2013_2014.dta, replace
browse seqn ridageyr riagendr ridreth1 

clear
import sasxport5 "https://wwwn.cdc.gov/Nchs/Nhanes/2015-2016/DEMO_I.XPT"
describe seqn ridageyr riagendr ridreth1
summarize seqn ridageyr riagendr ridreth1
sort seqn
save DEMO_2015_2016.dta, replace
browse seqn ridageyr riagendr ridreth1 

clear
use DEMO_2013_2014.dta
append using DEMO_2015_2016.dta
sort seqn
save DEMO_2013_2016.dta, replace



// IMPORT AND MERGE NHANES DATA FROM THE INTERNET
// ==============================================
import sasxport5 "https://wwwn.cdc.gov/Nchs/Nhanes/2015-2016/BMX_I.XPT", clear
describe seqn bmxht bmxwt bmxbmi
summarize seqn bmxht bmxwt bmxbmi
sort seqn
save BMX_I.dta, replace
browse seqn bmxht bmxwt bmxbmi 

clear
import sasxport5 "https://wwwn.cdc.gov/Nchs/Nhanes/2015-2016/DEMO_I.XPT"
describe seqn ridageyr riagendr ridreth1
summarize seqn ridageyr riagendr ridreth1
sort seqn
save DEMO_I.dta, replace
browse ridageyr riagendr ridreth1 seqn

clear
use DEMO_I.dta
merge 1:1 seqn using BMX_I.dta
browse seqn _merge ridageyr riagendr ridreth1 bmxht bmxwt bmxbmi 



// CREATE AND LINK DATA FRAMES
// ======================================================================
frame reset
frame create demographic
frame change demographic
use DEMO_I.dta
describe seqn ridageyr riagendr ridreth1

frame create anthro
frame change anthro
use BMX_I.dta
describe seqn bmxht bmxwt bmxbmi

frames dir
frames pwf

frame change demographic
frlink 1:1 seqn, frame(anthro)
frget bmxht bmxwt bmxbmi, from(anthro)
describe seqn ridageyr riagendr ridreth1 bmxht bmxwt bmxbmi



// USE frames WITH margins TO PLOT CONTINUOUS-BY-CONTINUOUS INTERACTIONS
// =====================================================================
clear
frame reset
frame create nhanes
frame change nhanes
webuse nhanes2
rename bpsystol sbp
describe sex age race bmi sbp

regress sbp c.age##c.bmi
quietly margins, at(age=(20(10)60) bmi=(10(5)40)) saving(predictions, replace)
frame create contour
frame change contour
use predictions

describe _at1 _at2 _margin
list _at1 _at2 _margin in 1/5

rename _at1 age
rename _at2 bmi
rename _margin pr_sbp
describe age bmi pr_sbp
list age bmi pr_sbp in 1/5

twoway (contour pr_sbp bmi age, ccuts(90(10)160)),            ///
        xlabel(20(10)60)  ylabel(10(5)40, angle(horizontal))  ///
        xtitle("Age (years)") ytitle("BMI")                   ///
        ztitle("Predicted Systolic Blood Pressure")           ///
        title("Predicted SBP by Age and BMI")
graph export ./graphs/contour.png, as(png) width($Width4x3) height($Height4x3) replace



// USE frames AND margins TO COMPARE UNADJUSTED AND ADJUSTED MODELS
clear
frame reset
frame create nhanes
frame change nhanes
webuse nhanes2
rename bpsystol sbp

regress sbp c.age##c.age
quietly margins, at(age=(20(10)60)) saving(unadjusted, replace)
marginsplot, ytitle("Predicted Systolic Blood Pressure")            ///
             ylabel(, angle(horizontal) format(%9.0f))              ///
             xtitle(Age (years))                                    ///
             title(Unadjusted Model for SBP and Age)      
graph export ./graphs/unadjusted.png, as(png) width($Width4x3) height($Height4x3) replace

regress sbp c.age##c.age bmi c.age#c.bmi i.sex i.race i.hlthstat
quietly margins, at(age=(20(10)60)) saving(adjusted, replace)
marginsplot, ytitle("Predicted Systolic Blood Pressure")                          ///
             ylabel(, angle(horizontal) format(%9.0f))                            ///
             xtitle(Age (years))                                                  ///
             title(Adjusted* Model for SBP and Age)                               ///
             note("*Adjusted for body mass index, sex, race, and health status")  
graph export ./graphs/adjusted.png, as(png) width($Width4x3) height($Height4x3) replace

frame create unadj
frame change unadj
use unadjusted
describe _at1 _margin _ci_lb _ci_ub

rename _at1 age
rename _margin unadj_pr
rename _ci_lb unadj_lb
rename _ci_ub unadj_ub
list age unadj_pr unadj_lb unadj_ub

frame create adj
frame change adj
use adjusted
describe _at1 _margin _ci_lb _ci_ub

rename _at1 age
rename _margin adj_pr
rename _ci_lb adj_lb
rename _ci_ub adj_ub
list age adj_pr adj_lb adj_ub

frlink 1:1 age, frame(unadj)
frget unadj_pr unadj_lb unadj_ub, from(unadj) 
format %9.1f unadj_pr unadj_lb unadj_ub adj_pr adj_lb adj_ub
list age unadj_pr unadj_lb unadj_ub adj_pr adj_lb adj_ub

twoway (connected unadj_pr age, lcolor(navy))                               ///
       (rcap unadj_ub unadj_lb age, lcolor(navy))                           ///
       (connected adj_pr age, mcolor(cranberry) lcolor(cranberry))          ///
       (rcap adj_ub adj_lb age, lcolor(cranberry)),                         ///
       ytitle("Predicted Systolic Blood Pressure")                          ///
       ylabel(,angle(horizontal) format(%9.0f))                             ///
       xtitle(Age (years))                                                  ///
       title(Unadjusted and Adjusted Models for SBP and Age)                ///
       note("*Adjusted for body mass index, sex, race, and health status")  ///
       legend(order(1 "Unadjusted" 3 "Adjusted*"))
graph export ./graphs/combined.png, as(png) width($Width4x3) height($Height4x3) replace



// reshape WIDE DATA TO LONG DATA
// ==========================================================
clear
webuse reshape1
list
reshape long inc ue, i(id) j(year)
list, sepby(id)
reshape wide
list


// reshape LONG DATA TO WIDE DATA
// ==========================================================
clear
webuse reshape6
list, sepby(id)
reshape wide inc ue, i(id) j(year)
list
reshape long
list, sepby(id)




// USE egen TO CREATE NEW VARIABLES
// ==========================================================
clear
webuse reshape1
// CREATE ROW SUMMARY VARIABLES
list id inc*
egen inc_n = rownonmiss(inc80 inc81 inc82)
egen inc_mean = rowmin(inc80 inc81 inc82)
egen inc_min = rowmin(inc80 inc81 inc82)
egen inc_max = rowmax(inc80 inc81 inc82)
list id inc*

// CREATE A COMBINED "RACE-SEX" VARIABLE
clear
webuse nhanes2
tab race sex
egen race_sex = group(race sex), label
tab race_sex
	   
// CUT AGE INTO 10-YEAR CATEGORIES
egen agecat = cut(age), at(20,30,40,50,60,70,80) label
tab agecat



// STRING FUNCTIONS
// ===========================================================
clear
use beatles
list

// USE THE proper() FUNCTION AND CONCATENATE fname AND lname
generate name = proper(fname) + " " + proper(lname)
list fname lname name

// USE THE upper() FUNCTION
replace fname = upper(fname)
replace lname = upper(lname)
list fname lname

// IDENTIFY THE POSITION OF THE COMMA IN birthdate FOR EACH OBSERVATION
generate commapos = strpos(birthdate, ",")
list birthdate commapos

// USE THE COMMA POSITION TO EXTRACT THE YEAR USING substr()
generate year = substr(birthdate, commapos+2, 4)
list birthdate commapos year

// USE THE COMMA POSITION TO EXTRACT THE MONTH AND DAY USING substr()
generate monthday = substr(birthdate, 1, commapos-1)
list birthdate monthday

// USE THE SPACE POSITION TO EXTRACT THE MONTH FROM monthday
generate month = substr(monthday, 1, strpos(monthday, " "))
list birthdate monthday month

// USE THE SPACE POSITION TO EXTRACT THE DAY FROM monthday
generate day = substr(monthday, strpos(monthday, " "), .)
list birthdate monthday day

list birthdate month day year



// USE contract TO CREATE SUMMARY DATA
// ===========================================================
clear
webuse nhanes2
tabulate sex race
contract sex race, cfreq(cfreq) percent(percent) cpercent(cpercent)
list, sepby(sex)


// USE collapse TO CREATE SUMMARY DATA
// ===========================================================
clear
webuse nhanes2
summ age height weight bmi
collapse (mean) age height weight bmi, by(sex race)
list, sepby(sex)



// by AND if
// ==========================================================
clear
frames reset
webuse nhanes2
rename bpsystol sbp
rename tcresult chol

bysort sex: regress sbp age, noheader
regress sbp age if sex==1, noheader
regress sbp age if sex==2, noheader


// _b, _se, return list, and ereturn list
// ============================================================
regress sbp age, noheader
display _b[age]
display _se[age]
display _b[age]/_se[age]

regress sbp age
return list
matlist r(table)
matrix Z = r(table)'
matlist Z[1..2,1..3]

regress sbp age
ereturn list
display "R-squared = " e(mss) / (e(mss) + e(rss))


// USING estimates
// ===========================================================
regress sbp age
estimates store age
estimates table age, b(%7.2f) 
estimates stats age

quietly regress sbp age
estimates store age
quietly regress sbp i.sex
estimates store sex
quietly regress sbp c.age##i.sex
estimates store agesex

estimates table age sex agesex, b(%7.2f) 
estimates stats age sex agesex

lrtest agesex age

// USING foreach TO LOOP OVER LISTS OF VARIABLES
// ==========================================================
local variables "age bmi chol"
foreach var of local variables {
    quietly regress sbp `var'
    estimates store `var'
}
quietly regress sbp `variables'
estimates store all

estimates table `variables' all
estimates stats `variables' all



log close

translate "DataManagement2.smcl" "DataManagement2.log", replace linesize(120) translator(smcl2log)





