* version 1.1 (27/9/99) R.Wolfe [STB-51: sg118] program define opartchi, rclass version 6 * Check the syntax of the command syntax varname [if] [in] [fweight/] /* */ [, BY(varname) Loglinear OROWS TABle SCore(varname) RSCore(varname)] marksample touse, zeroweight markout `touse' `score' markout `touse' `by', strok * Check the ordinal variable to ensure >2 categories tempvar ordvar quietly { egen `ordvar' = group(`varlist') if `touse' summ `ordvar' if `touse' local ncats = r(max) } if `ncats'<3 { di in red "`varlist' takes 2 or less values in requested sample" exit 498 } * Sum the cell counts. if "`exp'"~="" { local wgted "*`exp'" } tempvar count quietly { gen long `count'=1 if `touse' sort `touse' `by' `ordvar' by `touse' `by' `ordvar': replace `count'=sum(`count'`wgted') if `touse' by `touse' `by' `ordvar': replace `count'=. if `touse' & _n~=_N } * Drop from data all but 1 record per cell of the table preserve quietly{ drop if `count'==. fillin `by' `ordvar' replace `count'=0 if `count'==. } * Output table if requested if "`table'"~="" { tab `by' `varlist' [fweight=`count'], row } * Define names of temporary variables tempvar N_xj n_ix E_ij n p_j mu V_1i g1_j V_2i g2_j c_sc c_rsc tempvar byvar scvar rscvar mu2 mu3 mu4 add temp work temp2 temp3 temp4 * Convert grouping variable (string or numeric) to increasing integer values sort `by' quietly { gen long `byvar'=0 by `by': replace `byvar' = 1 if _n==1 replace `byvar' = sum(`byvar') summ `byvar' local nrows = r(max) if `nrows'<2 { di in red "Need at least two rows to do comparisons" exit 498 } } * Check that user-supplied row scores are valid if "`rscore'"=="" { qui gen double `rscvar' = `byvar' } else { quietly { _Chk_scr `rscore' `byvar' Row `by' by `byvar': replace `rscore' = `rscore'[1] if `rscore'==. gen double `rscvar' = `rscore' } if "`orows'"=="" { local orows "orows" } } qui summ `rscvar' qui gen `c_rsc'=`rscvar'-r(mean) * Check that user-supplied category scores are valid. if "`score'"=="" { qui gen double `scvar' = `ordvar' } else { quietly { _Chk_scr `score' `ordvar' Column `varlist' by `ordvar': replace `score' = `score'[1] if `score'==. gen double `scvar' = `score' } } qui summ `scvar' qui gen `c_sc'=`scvar'-r(mean) * Part 1 : Calculate Pearson chi-sq (optionally do equivalent for loglinear model) * quietly { egen long `N_xj'=sum(`count'), by(`ordvar') egen long `n_ix'=sum(`count'), by(`byvar') egen long `n'=sum(`count') gen double `E_ij'=`n_ix'*`N_xj'/`n' gen double `temp'=sum((`count'-`E_ij')^2/`E_ij') global chi2_i=`temp'[_N] drop `temp' global df1_ind=(`nrows'-1)*(`ncats'-1) } if "`logline'"~="" { qui cap xi: glm `count' i.`byvar' i.`ordvar', fam(poisson) link(log) if _rc~=0 { local outind "failed" } global D_i=e(deviance) global df0_ind=e(df_pear) } * Part 2 : Calculate location and dispersion components of Pearson's chi-sq * (optioanlly do equivalent calculations for loglinear model) * quietly { gen double `p_j'=`N_xj'/`n' egen double `mu'=sum((`scvar')*`p_j'), by(`byvar') gen double `temp2'=(`scvar'-`mu')^2 egen double `mu2'=sum(`p_j'*`temp2'), by(`byvar') gen double `temp3'=(`scvar'-`mu')^3 egen double `mu3'=sum(`p_j'*`temp3'), by(`byvar') gen double `temp4'=(`scvar'-`mu')^4 egen double `mu4'=sum(`p_j'*`temp4'), by(`byvar') gen double `work'=`mu4'-((`mu3'^2)/`mu2')-`mu2'^2 gen double `temp'=1/sqrt(`work') drop `work' `temp3' `temp4' sort `byvar' `ordvar' gen double `work'=sqrt(`mu2') gen double `g1_j'=(`scvar'-`mu')/`work' gen double `g2_j'=`temp'*(`temp2' - `mu3'*(`scvar'-`mu')/`mu2' - `mu2') drop `temp' `temp2' egen double `V_1i'=sum(`count'*`g1_j'/sqrt(`n_ix')), by(`byvar') egen double `V_2i'=sum(`count'*`g2_j'/sqrt(`n_ix')), by(`byvar') sort `byvar' `ordvar' by `byvar': replace `V_1i'=. if _n~=_N by `byvar': replace `V_2i'=. if _n~=_N gen double `temp'=sum(`V_1i'^2) global chi2_l=`temp'[_N] drop `temp' gen double `temp'=sum(`V_2i'^2) global chi2_d =`temp'[_N] drop `temp' global df1_loc=`nrows'-1 } if "`logline'"~="" & "`orows'"~="" { quietly { gen double `temp'=`c_rsc'*`c_sc' capture xi: glm `count' i.`byvar' i.`ordvar' `temp', fam(poisson) link(log) drop `temp' } if _rc~=0 { local outtrd ""No convergence"" } else { global D_tr=$D_i-e(deviance) global df_lloc=$df0_ind-e(df_pear) local outtrd " %10.4g $D_tr _col(72) %6.4f chiprob($df_lloc,$D_tr) " return scalar D_tr=$D_tr } } if "`logline'"~="" { qui cap xi: glm `count' i.`byvar'*`c_sc' i.`ordvar' , fam(poisson) link(log) if _rc~=0 { local outloc "failed" } global D_l=$D_i-e(deviance) global df0_loc=$df0_ind-e(df_pear) qui gen `temp'=(`c_sc')^2 qui cap xi: glm `count' i.`byvar'*`c_sc' i.`byvar'*`temp' i.`ordvar' , /* */ fam(poisson) link(log) drop `temp' if _rc~=0 { local outdis "failed" } global D_d=$D_i-$D_l-e(deviance) global df0_dis=$df0_ind-$df0_loc-e(df_pear) return scalar D_i=$D_i return scalar D_l=$D_l return scalar D_d=$D_d } * Part 3 : Perform chi-square tests on test statistics * if "`logline'"~="" { local out1 "Log-linear models" local out2 "Deviance P-value" if "`outind'"=="" { local outind " %10.4g $D_i _col(72) %6.4f chiprob($df0_ind,$D_i) " } else { local outind ""No convergence"" } if "`outloc'"=="" { local outloc " %10.4g $D_l _col(72) %6.4f chiprob($df0_loc,$D_l) " } else { local outloc ""No convergence"" } if "`outdis'"=="" { local outdis " %10.4g $D_d _col(72) %6.4f chiprob($df0_dis,$D_d) " } else { local outdis ""No convergence"" } local outdup ""-"" local outcom " (/ deviance)" } * Present final output in tabular format. * #delimit ; di in green _n(2) _col(37) "Chi-square tests" _col(60) "`out1'" _n; di in green _col(28) "df" _col(35) "Chi-square" _col(47) "P-value" _col(60) "`out2'" ; di in green "Independence " _col(25) in yellow %5.0f $df1_ind _col(35) %10.4g $chi2_i _col(48) %6.4f chiprob($df1_ind,$chi2_i) _col(58) `outind' ; di in green _dup(55) "-" _dup(22) `outdup'; di in green _col(25) "Components of independence test `outcom'" ; di in green "Location" _col(25) in yellow %5.0f $df1_loc _col(35) %10.4g $chi2_l _col(48) %6.4f chiprob($df1_loc,$chi2_l) _col(58) `outloc' ; di in green "Dispersion" _col(25) in yellow %5.0f $df1_loc _col(35) %10.4g $chi2_d _col(48) %6.4f chiprob($df1_loc,$chi2_d) _col(58) `outdis' ; if "`orows'"~="" { di in green _col(25) "Ordinal X Ordinal 2-way table" ; qui gen double `temp'=sum(`V_1i') ; qui gen double `add'=`temp'[_N]^2/`nrows' ; qui anova `V_1i' `rscvar', cont(`rscvar') ; di in green "Trend in location effect" _col(25) in yellow %5.0f 1 _col(35) %10.4g e(mss)+`add'[_N] _col(48) %6.4f chiprob(1,e(mss)+`add'[_N]) _col(58) `outtrd' ; return scalar chi2_tr=e(mss)+`add'[_N] ; } ; #delimit cr * Store results in output indicators return scalar df_i=$df1_ind return scalar df=$df1_loc return scalar chi2_i=$chi2_i return scalar chi2_l=$chi2_l return scalar chi2_d=$chi2_d end * Program to check scores are valid program define _Chk_scr args score sortvar margin name sort `sortvar' `score' quietly capture assert `score'>=`score'[_n-1] if `score'[_n-1]~=. if _rc~=0 { di in red "`margin' scores must increase with `name'" exit 498 } cap by `sortvar': assert `score' == `score'[1] if `score'~=. if _rc~=0 { di in red "`score' varies within categories of `name'" exit 498 } end