*! version 2.1.0. 3 Jan 1997 STB-36 sg68 *! 2.1.0 (Jan 03, 1997) Jeroen Weesie/ICS * -- added -n- option * -- added -by-, -saving-, -nolist * -- added cochran-style counts for small cells * 2.0.1 (Oct 20, 1996) Jeroen Weesie/ICS * -- checked consistency with Stata 5.0 * 2.0.0 (Sept, 1996) Jeroen Weesie/ICS * -- full rewrite to syntax Stata 4.0 (ado, tempvars, new macro syntax, ..) * -- added -plot- * -- numlist support * -- help file *! 1.0.0 1991 Albert Verbeek/ICS capture program drop gof program define gof version 5.0 local varlist "req ex min(2) max(2)" local if "opt" local in "opt" #del ; local options "df(int 0) x2 cr lr g2 ft kl x2n LAmbda(str) by(str) LIst n(str) Plot SAving(str) SCale" ; #del cr parse "`*'" confirm integer number `df' if `df' < 0 { di in re "negative df is too advanced for me ..." exit 2000 } if "`n'" ~= "" { confirm number `n' if `n' <= 0 { di in re "non-positive population size invalid" exit 2000 } } tempname ncell nexp nobs gof la p tempvar exp obs stat touse * ------------------------------------------------------------------------ * check obs and exp are valid * ------------------------------------------------------------------------ mark `touse' `if' `in' parse "`varlist'", parse(" ") quiet gen double `obs' = `1' if `touse' quiet gen double `exp' = `2' if `touse' markout `touse' `obs' `exp' * check obs>=0, exp>=0 capture assert `obs' >= 0 if `touse' if _rc { di in re "Negative observed values not permitted" exit 2000 } capture assert `exp' >= 0 if `touse' if _rc { di in re "Negative expected values not permitted" exit 2000 } capture assert `obs' == 0 if `exp' == 0 if _rc { di in bl "observed > 0, while expected = 0." list `obs' if `obs' > 0 & `obs' != . & `exp' == 0 } * obs is approximately integer-valued (if -n- not specified) if "`n'" == "" { capture assert abs(`obs'-round(`obs',1)) < 1e-6 if _rc { di in bl "observed is not integer-valued" } } * ------------------------------------------------------------------------ * -by- aggregate observations * ------------------------------------------------------------------------ if "`by'" != "" { quiet count if `touse' local ncell0 = _result(1) unabbrev `by' local by "$S_1" sort `by' `touse' quiet by `by' `touse' : replace `obs' = sum(`obs') quiet by `by' `touse' : replace `exp' = sum(`exp') quiet by `by' `touse' : replace `touse' = 0 if _n<_N } * ------------------------------------------------------------------------ * ensure that sum(obs) = sum(exp) * ------------------------------------------------------------------------ quiet summ `obs' if `touse', meanonly scalar `nobs' = _result(18) scalar `ncell' = _result(1) quiet summ `exp' if `touse', meanonly scalar `nexp' = _result(18) if "`n'" != "" { if abs(`nobs'-1) > 0.0001 { di in bl "observed does not sum to 1" } if abs(`nexp'-1) > 0.0001 { di in bl "expected does not sum to 1" } replace `obs' = `obs' * (`n'/`nobs') if `touse' replace `exp' = `exp' * (`n'/`nexp') if `touse' scalar `nobs' = `n' scalar `nexp' = `n' } else { if abs(`nobs'/`nexp' - 1) > 0.001 { if "`scale'" == "" { di in re "-obs- and -exp- do not have the same sum" exit 2000 } else di in bl "-exp- is scaled to the same sum as -obs-" } quiet replace `exp' = `exp' * (`nobs'/`nexp') if `touse' } if `df' > `ncell' { di in re "df > #cells is impossible" exit 2000 } if `df' == 0 { local df "." } * ------------------------------------------------------------------------ * header * ------------------------------------------------------------------------ di in gr "Cressie-Read multinomial goodness-of-fit (" /* */ in ye `df' in gr " df; " in ye `ncell' _c if "`ncell0'" != "" { di in gr "/" in ye `ncell0' in gr " cells)" } else di in gr " cells)" * ------------------------------------------------------------------------ * Cochran-type warning messages for small cell counts * ------------------------------------------------------------------------ quiet count if `exp' < 1 & `touse' local nexp1 = _result(1) quiet count if `exp' < 5 & `touse' local nexp5 = _result(1) quiet count if `obs' < 1 & `touse' local nobs1 = _result(1) quiet count if `obs' < 5 & `touse' local nobs5 = _result(1) di _n _col(4) in gr "%cells(exp<1) = " in ye %6.3f `nexp1'/`ncell' /* */ _col(30) in gr "%cells(exp<5) = " in ye %6.3f `nexp5'/`ncell' /* */ _n _col(4) in gr "%cells(obs<1) = " in ye %6.3f `nobs1'/`ncell' /* */ _col(30) in gr "%cells(obs<5) = " in ye %6.3f `nobs5'/`ncell' * ------------------------------------------------------------------------ * process powers * ------------------------------------------------------------------------ if "`lambda'" != "" { numlist `lambda', real local lambda "$S_1" } if "`x2n'" != "" { local lambda "`lambda' -2.0" } if "`kl'" != "" { local lambda "`lambda' -1.0" } if "`ft'" != "" { local lambda "`lambda' -0.5" } if "`g2'" != "" | "`lr'" != "" { local lambda "`lambda' 0" } if "`cr'" != "" { local lambda "`lambda' 0.667" } if "`x2'" != "" { local lambda "`lambda' 1" } if "`lambda'" == "" { local lambda "-2 -1 -.5 0 .667 1" } * ------------------------------------------------------------------------ * prepare for plotting using -postfile- * ------------------------------------------------------------------------ if "`plot'" != "" { tempname fhandle tempfile goffile postfile `fhandle' lambda gof using `goffile' local list } else local list "list" * ------------------------------------------------------------------------ * loop over the powers lambda * ------------------------------------------------------------------------ di parse "`lambda'", parse(" ") while "`1'" != "" { scalar `la' = `1' if `la' == 0 { quiet gen double `stat' = 2 * `obs' * log(`obs'/`exp') if `touse' quiet summ `stat' if `touse', meanonly scalar `gof' = _result(18) } else if `la' == -1 { quiet gen double `stat' = 2 * `exp' * log(`exp'/`obs') if `touse' quiet summ `stat' if `touse', meanonly scalar `gof' = _result(18) } else if `la' != 0 & `la' != -1 { quiet gen double `stat' = 2 * `obs'*((`obs'/`exp')^(`la') - 1) if `touse' quiet summ `stat' if `touse', meanonly scalar `gof' = _result(18) / (`la'*(`la'+1)) } else if `la' >= 0 { quiet count if `obs' >= 0 & `exp' > 0 & `touse' scalar `gof' = _result(1) } else if `la' < 0 { quiet count if `obs' > 0 & `exp' >= 0 & `touse' scalar `gof' = _result(1) } if "`list'" != "" { local name " " if abs(`la'+2) < .001 { local name " Neyman's X2" } if abs(`la'+1) < .001 { local name "Kullbach's KL" } if abs(`la'+.5) < .001 { local name "Freeman-Tukey" } if abs(`la') < .001 { local name " LR" } if abs(`la'-2/3) < .001 { local name " Cressie-Read" } if abs(`la'-1) < .001 { local name " Pearson's X2" } scalar `p' = chiprob(`df',`gof') di in gr "l(" in ye %5.2f `la' in gr ") `name' = " /* */ in ye %9.3f `gof' in gr " p = " in ye %6.4f `p' } if "`plot'" != "" { post `fhandle' `la' `gof' } drop `stat' mac shift } global S_1 `gof' * ------------------------------------------------------------------------ * plot gof x lambda * ------------------------------------------------------------------------ if "`plot'" != "" { postclose `fhandle' preserve use `goffile', clear if `df' != 0 { local p90 = 2*invgammap(`df'/2, 0.90) local p95 = 2*invgammap(`df'/2, 0.95) local p99 = 2*invgammap(`df'/2, 0.99) local yl "yline(`p90',`p95',`p99')" local dftext "; `df' df)" } if "`saving'" != "" { local saving "saving(`saving')" } local nc = `ncell' graph gof lambda, s(.) c(l) xlab ylab `yl' bor gap(3) `saving' /* */ t1("Cressie-Read goodness-of-fit statistics (`nc' cells`dftext')") /* */ t2("(horizontal lines at .90, .95 and .99 critical values)") restore } end exit