*! v 1.0.8 PR 12Feb98. STB-42 sg79 program define gam version 5.0 if "`1'" == "" | substr("`1'",1,1)=="," { if "$S_E_cmd"!="gam" { error 301 } preserve _gamrslt exit } global S_E_gam local varlist "req ex min(1)" local if "opt" local in "opt" local weight "aweight fweight" #delimit ; local options "Family(string) Link(string) DF(string) DEAD(string) MIssing(string) noCONStant BIG LTolerance(real .001) noSTAnd" ; #delimit cr parse "`*'" if "$GAMDIR"=="" { local dir "c:\ado\" } else local dir $GAMDIR if "`big'"!=""{ local gamprog `dir'gambig.exe local maxreal 1000 } else { local gamprog `dir'gamfit.exe local maxreal 70 } cap confirm file `gamprog' local rc=_rc errfile `rc' `gamprog' local f = lower(trim("`family'")) local lf = length("`f'") if "`f'"=="" { local fam "gauss" } else if "`f'"==substr("binomial",1,`lf') { local fam "binom" } else if "`f'"==substr("gamma",1,max(`lf',3)) { local fam "gamma" } else if "`f'"==substr("gaussian",1,max(`lf',3)) { local fam "gauss" } else if "`f'"==substr("poisson",1,`lf') { local fam "poiss" } else if "`f'"==substr("cox",1,`lf') { local fam "cox" } else { di in red "unknown family() `f'" exit 198 } local li = lower(trim("`link'")) local lli = length("`li'") if "`li'"=="" { if "`fam'"=="gauss" { local l "ident" } else if "`fam'"=="binom" { local l "logit" } else if "`fam'"=="poiss" { local l "logar" } else if "`fam'"=="gamma" { local l "inver" } else if "`fam'"=="cox" { local l "cox" } } else if "`li'"==substr("identity",1,max(`lli',3)) { local l "ident" } else if "`li'"==substr("inverse",1,max(`lli',3)) { local l "inver" } else if "`li'"==substr("cox",1,`lli') { local l "cox" } else if "`li'"=="log" { local l "logar" } else if "`li'"==substr("logit",1,`lli') { local l "logit" } else { di in red "invalid link `link'" exit 198 } if "`missing'"=="" { local missing 9999 } confirm num `missing' if "`constan'"!="" { local stand nostand } if "`fam'"=="cox" { if "`dead'"=="" { di in red "dead() required with cox" exit 198 } else { unabbrev "`dead'" local dead "$S_1" local constan noconstant local stand /* always standardize cox */ } } else if "`dead'"!="" { di in red "dead() invalid, not Cox model" exit 198 } if "`constan'"!="noconstant" { local interc "yes" } else local interc "no" parse "`varlist'", parse(" ") local y "`1'" mac shift local x "`*'" local nx 0 local longn while "`1'"!="" { if length("`1'")>6 { local longn "`longn' `1'" } local nx = `nx'+1 local x`nx' `1' mac shift } if "`longn'"!="" { di _n in bl "Length of variable(s)" in ye /* */ "`longn'" in bl " is/are >6 characters." di in bl "There may be problems if any name is not unique to 6 chars." } if `nx'<1 { di in red "insufficient predictors" exit 198 } /* Set up degrees of freedom for smoothers. Default is 1 df (linear) for everything. */ local d 1 local i 1 while `i'<=`nx' { local df`i' `d' local i = `i'+1 } local df_all1 1 if "`df'"!="" { parse "`df'", parse(",") local ncl 0 /* # of comma-delimited clusters */ while "`1'"!="" { if "`1'"!="," { local ncl=`ncl'+1 local clust`ncl' "`1'" } mac shift } if "`clust`ncl''"=="" { local ncl=`ncl'-1 } if `ncl'>`nx' { di in red "too many df() values specified" exit 198 } /* Disentangle each varlist:string cluster */ local i 1 while `i'<=`ncl' { parse "`clust`i''", parse("=:") if "`2'"!=":" & "`2'"!="=" { local 2 ":" local 3 `1' local j 0 local 1 while `j'<`nx' { local j=`j'+1 local 1 `1' `x`j'' } } local dfk `3' cap confirm num `dfk' if _rc { di in red "invalid df() value `dfk'" exit 198 } if `dfk'<1 { di in red "invalid df() value `dfk'" exit 198 } if `dfk'>1 { local df_all1 0 } unabbrev `1' parse "$S_1", parse(" ") while "`1'"!="" { local dfv `1' local k 0 local j 1 while `j'<=`nx' { if "`dfv'"=="`x`j''" { local k `j' local j `nx' } local j = `j'+1 } if `k'==0 { di in red "`dfv' must be one of the predictors" exit 198 } local df`k' `dfk' mac shift } local i = `i'+1 } } if `df_all1' { di in bl "[model is linear, not a GAM---all the df are 1]" } tempvar touse mark `touse' `if' `in' markout `touse' `varlist' `dead' /* Weights */ qui if "`exp'"!="" { tempvar wt gen `wt' `exp' replace `wt'=. if `wt'<=0 replace `touse'=0 if `wt'==. if "`weight'"=="aweight" { sum `wt' if `touse' replace `wt' = `wt'/_result(3) } } /* Estimate storage for gamfit.exe or gambig.exe */ local vsize=`nx'+("`constan'"!="noconstant")+("`dead'"!="")+("`exp'"!="") qui count if `touse' local lworkr=int(.5+_result(1)*(`vsize'^.2)/25) if `lworkr'>`maxreal' { di in bl "[Approximate problem size: " `lworkr' "000 reals. " /* */ "Available:" `maxreal' "000 reals.]" di in red "problem is too big for `gamprog'" exit 2002 } /* Save "index" variable to keep track of data order for later merge */ quietly { sort `x' cap drop _index gen long _index=_n /* Output data and $.mod files for gamfit.exe */ preserve drop if `touse'==0 local stub "$" local i 1 tempvar s gen long `s'=0 while `i'<=`nx' { local xi `x`i'' /* Count df for xi (#distinct values - 1) */ sort `xi' replace `s'=sum(`xi'>`xi'[_n-1]) local GAMd`i'=`s'[_N] if `GAMd`i''<`df`i'' { noi di in red "`df`i'' df for `xi' are too many" /* */ "---only " `GAMd`i''+1 " distinct values" exit 2001 } /* Standardize each x */ global GAMs`i' 1 if `GAMd`i''>=2 & "`stand'"!="nostand" { /* at least 3 distinct values */ sum `xi' global GAMs`i'=sqrt(_result(4)) if ${GAMs`i'}==0 { noi di in red "`xi' has zero variance" exit 2001 } replace `x`i'' = cond(`x`i''==., `missing', /* */ (`xi'-_result(3))/${GAMs`i'}) } else { replace `x`i'' = `missing' if `x`i''==. } local i = `i'+1 } sort _index outfile `y' `x' `wt' `dead' using `stub'.dat, comma replace nolabel keep _index save `stub'.inx, replace * Create model specification file. local i 1 local model`i'=ltrim("'DATA: ', '`stub''") local i=`i'+1 local model`i'=ltrim("'P: '," /* */ +string(1+`nx'+("`fam'"=="cox")+("`exp'"!=""))) local i=`i'+1 local model`i'=ltrim("'N: ', -1") local i=`i'+1 local model`i'=ltrim("'FORMAT: ', 'free'") local i=`i'+1 local model`i'=ltrim("'MISSING-CODE: ',"+string(`missing')) local i=`i'+1 local model`i'=ltrim("'INTERCEPT: ', `interc'") local i=`i'+1 local model`i'=ltrim("'VARIABLE NAME MODE DF'") local i=`i'+1 local model`i'=ltrim("'`y'', 'response', 0") local i=`i'+1 local j 1 while `j'<=`nx' { local model`i'=ltrim("'`x`j''','predictor',"+string(`df`j'')) local j=`j'+1 local i=`i'+1 } if "`exp'"!="" { local model`i'=ltrim("'`wt'', 'weight', 0") local i=`i'+1 } if "`fam'"=="cox" { local model`i'=ltrim("'`dead'', 'censoring', 0") local i=`i'+1 } local model`i'=ltrim("'FAMILY: ', '`fam''") local i=`i'+1 local model`i'=ltrim("'LINK: ', '`l''") local i=`i'+1 /* H & T use .001, .001 as tolerances for convergence of local scoring and backfitting, while suggesting .0005 for latter in program. We use ltolera=.001 (default) and .0005 for greater safety. */ local model`i'=ltrim("'THRESHOLDS: ',"+string(`ltolera')+", .0005") local i=`i'+1 /* H & T use 20, 15 as max iterations for local scoring and backfitting. We use 40, 30 for greater safety. */ local model`i'=ltrim("'MAX ITERS: ', 40, 30") local nspec `i' drop _all set obs `nspec' tempvar model gen str44 `model'="" local blank48 " " local i 0 while `i'<`nspec' { local i=`i'+1 replace `model'=substr("`model`i''`blank48'",1,44) in `i' } outfile `model' using `stub'.mod, replace noquote * !c:\ado\gamfit.exe !`gamprog' erase `stub'.dat erase `stub'.mod /* Read model summary statistics */ drop _all cap infile stats using `stub'.out local rc=_rc errfile `rc' `stub'.out erase `stub'.out local fault=int(stats[1]+.5) /* fault code */ local nobs=int(stats[2]+.5) /* number of observations */ local tdf=stats[3] /* error degrees of freedom */ local devpen=stats[4] /* penalized deviance */ local scale=stats[5] /* estimated scale parameter */ /* Read fit, residuals and predictors */ drop _all /* !! Dropping GAM_sres and GAM_sres because seem fairly useless. !! Anyway, don't know what GAM_res is for Cox models. */ cap infile `y' GAM_mu GAM_res GAM_sres `x' using `stub'.fit local rc=_rc errfile `rc' `stub'.fit drop GAM_res GAM_sres label var GAM_mu "GAM fitted values" * label var GAM_res "GAM residuals" * label var GAM_sres "GAM standardized residuals" erase `stub'.fit tempfile tmp save `tmp' /* Read individual smooths, confidence bands and partial residuals */ local i 0 while `i'<`nx' { local i=`i'+1 local vname=substr("`x`i''",1,6) if `GAMd`i''>1 { drop _all cap infile `x`i'' s_`vname' l_`vname' h_`vname'/* */ r_`vname' using `x`i''.gra local rc=_rc errfile `rc' `x`i''.gra /* SE from confidence band */ gen e_`vname'=(h_`vname'-s_`vname')/1.96 drop `x`i'' h_`vname' l_`vname' lab var s_`vname' "GAM `df`i'' df smooth for `x`i''" lab var e_`vname' "GAM SE of smooth for `x`i''" /* Cox: Standardize each smooth to mean zero */ if "`fam'"=="cox" { drop r_`vname' sum s_`vname' replace s_`vname'=s_`vname'-_result(3) } else lab var r_`vname' "GAM partial residual for `x`i''" tempfile tmp`i' save `tmp`i'' } erase `x`i''.gra } use `tmp', clear local i 0 while `i'<`nx' { local i=`i'+1 if `GAMd`i''>1 { /* >2 distinct values */ merge using `tmp`i'' /* Cox: calc overall PI */ if "`fam'"=="cox" { local vname=substr("`x`i''",1,6) replace GAM_mu=GAM_mu+s_`vname' } erase `tmp`i'' drop _merge } } /* Merge with index values */ compress sort `x' drop `y' `x' merge using `stub'.inx drop _merge sort _index erase `stub'.inx save `tmp', replace global S_E_cons="`constan'"=="" global S_E_dead `dead' global S_E_depv `y' global S_E_dev `devpen' global S_E_disp `scale' global S_E_fam `fam' global S_E_link `l' global S_E_nobs `nobs' global S_E_stub `stub' global S_E_tdf `tdf' global S_E_vl `x' noisily _gamrslt /* Drop variables that might be left over from previous run of gamfit.exe */ restore cap drop GAM_mu local i 0 while `i'<`nx' { local i=`i'+1 local vname=substr("`x`i''",1,6) cap drop s_`vname' cap drop e_`vname' cap drop r_`vname' } /* Merge existing data with data from gamfit run */ sort _index merge _index using `tmp' erase `tmp' drop _merge _index count } di _n in ye _result(1) in gr " records merged." global S_E_cmd gam end program define _gamrslt * GAM results. Needs a preserve command before using it. cap drop _all cap infile dof slope se z gain pvalue using ${S_E_stub}.sum local rc=_rc errfile `rc' ${S_E_stub}.sum local nx: word count $S_E_vl local nv1=`nx'+$S_E_cons if `nv1'!=_N { di in red /* */ "inconsistency found when reading GAM results from file ${S_E_stub}.sum exit 2002 } local flo "family ${S_E_fam}, link ${S_E_link}." di _n in gr "Generalized Additive Model with `flo'" #delimit ; di _n in gr "Model df = " in ye %9.3f $S_E_nobs-$S_E_tdf _col(49) in gr "No. of obs = " in ye %9.0g $S_E_nobs ; di in gr "Deviance = " in ye %9.0g $S_E_dev _col(49) in gr "Dispersion = " in ye %9.0g $S_E_disp ; #delimit cr local skip=9-length("$S_E_depv") di in gr "----------+" _dup(59) "-" di in gr _skip(`skip') /* */ "$S_E_depv | df Lin. Coef. Std. Err. z Gain P>Gain" di in gr "----------+" _dup(59) "-" tempname tg totdf b se scalar `tg'=0 scalar `totdf'=0 local i 0 while `i'<`nv1' { local i=`i'+1 if $S_E_cons & (`i'==`nv1') { local v _cons } else local v: word `i' of $S_E_vl local skip=9-length("`v'") if abs(dof[`i']-1)<1e-6 { local fmt %4.0f _skip(3) } else { local fmt %7.3f scalar `tg'=`tg'+gain[`i'] scalar `totdf'=`totdf'+dof[`i']-1 } scalar `b'=slope[`i'] scalar `se'=se[`i'] if ($S_E_cons & `i'<`nv1') | "$S_E_fam"=="cox" { scalar `b'=`b'/${GAMs`i'} scalar `se'=`se'/${GAMs`i'} } local name=substr("`v'",1,6) global S_`name'=`b' global E_`name'=`se' di in gr _skip(`skip') "`v' |" /* */ in ye `fmt' dof[`i'] " " /* */ in ye %9.0g `b' " " /* */ in ye %9.0g `se' " " /* */ in ye %9.3f z[`i'] " " /* */ in ye %9.3f gain[`i'] " " /* */ in ye %9.4f pvalue[`i'] } di in gr _dup(70) "-" di in gr "Total gain (nonlinearity chisquare) = " in ye %9.3f `tg' /* */ in gr " (" in ye %5.3f `totdf' in gr " df), P = " /* */ in ye %6.4f chiprob(`totdf',`tg') end program define errfile * 1=rc, 2=file giving problems. if `1' { noi di in red "GAMFIT failure, `2' not found" exit `rc' } end