* DICHOTOMOUS DISCRIMINANT ANALYSIS * Algorithm Based on OLS Regression * Joseph Hilbe STB 12-7-91 capture program drop discrim program define discrim version 2.1 if "%_*"=="" { di in red "invalid syntax or conditions - see help discrim" exit 198 } mac def _varlist "req ex" mac def _options "Detail Graph Anova" mac def _in "opt" mac def _if "opt" parse "%_*" parse "%_varlist", parse(" ") mac def _sfn "S_FN" qui save _U_S_E_R, replace capture { if "%_if`%_in"~="" { keep %_if %_in } } cap { cap drop _dummy cap drop _* cap drop logprob cap drop dscore cap drop dfun cap drop grpred cap drop qui dropmiss %_varlist mac def _group %_1 mac shift qui count mac def _obs=_result(1) qui count if %_group==0 mac def _obs0=_result(1) mac def _obs1=%_obs-%_obs0 mac def _c0=%_obs1/%_obs mac def _c1=-(%_obs0/%_obs) gen _prob0=%_obs1/%_obs qui gen _dummy=%_c0 if %_group==0 replace _dummy=%_c1 if %_group==1 qui reg _dummy %_* cap drop _dummy mac def _rsquare=_result(7) mac def _mahala=(%_rsquare/(1-%_rsquare))*((%_obs*(%_obs-2))/(%_obs0*%_obs1)) mac def _prop=%_rsquare/%_mahala gen _D=sqrt(%_mahala) mac def _i=0 while "%_1"!= "" { mac def _i=%_i+1 gen _cof%_i=_b[%_1] gen _disc%_i=_cof%_i/%_prop mac shift } qui gen _nvar=%_i _mncalc %_varlist _kcalc %_varlist drop _prob0 mac def _tsquare=%_mahala*((%_obs0*%_obs1)/%_obs) mac def _F = %_tsquare*(%_obs-_nvar)/((_nvar-1)*(%_obs-2)) mac def _CNT0=-_D*(%_obs1/%_obs) mac def _CNT1=_D*(%_obs0/%_obs) di " " di in gr _col(20) "Dichotomous Discriminant Analysis" di " " #delimit ; di in gr "Observations = " in ye %_obs in gr _col(50) "Obs Group 0 = " in ye %9.0g %_obs0; di in gr "Indep variables = " in ye _nvar in gr _col(50) "Obs Group 1 = " in ye %9.0g %_obs1; di " "; di in gr "Centroid 0 = " in ye %9.4f %_CNT0 in gr _col(50) "R-square = " in ye %9.4f %_rsquare; di in gr "Centroid 1 = " in ye %9.4f %_CNT1 in gr _col(50) "Mahalanobis = " in ye %9.4f %_mahala; di in gr "Grand Cntd = " in ye %9.4f ((%_obs1*%_CNT0)+(%_obs0*%_CNT1))/%_obs; #delimit cr cap drop _cof* _usdsc %_varlist _usds %_varlist oneway dscore %_group mac def _EIGEN=_result(2)/_result(4) mac def _CCOR=sqrt(_result(2)/(_result(2)+_result(4))) mac def _LAMBDA=_result(4)/(_result(2)+_result(4)) mac def _CHISQ=log(%_LAMBDA)* -((%_obs-(_nvar+2)/2)-1) di " " #delimit ; di in gr "Eigenvalue = " in ye %9.4f %_EIGEN in gr _col(50) "Wilk's Lambda = " in ye %7.4f %_LAMBDA; di in gr "Canon. Corr. = " in ye %9.4f %_CCOR in gr _col(50) "Chi-square = " in ye %7.4f %_CHISQ; di in gr "Eta Squared = " in ye %9.4f %_CCOR^2 in gr _col(50) "Sign Chi2 = " in ye %7.4f chiprob(_nvar,%_CHISQ); #delimit cr gen logprob=1/(1+exp(dfun)) gen grpred=1 if logprob>=.5 replace grpred=0 if logprob<.5 gen _CELL=1 summ _CELL if %_group==0 & logprob<.5 mac def _A=_result(2) summ _CELL if %_group==0 & logprob>=.5 mac def _B=_result(2) summ _CELL if %_group==1 & logprob<.5 mac def _C=_result(2) summ _CELL if %_group==1 & logprob>=.5 mac def _D=_result(2) mac def _TOT=%_A+%_B+%_C+%_D di " " di in gr _col(25) "----- Predicted -----" di " " di in gr _col(13) "Actual | Group 0 Group 1 | Total" di in gr _col(13) "---------+" _dup(26) "-" "+--------" di in gr _col(13) "Group 0 |" in ye _col(26) %6.0g %_A /* */ _col(40) %6.0g %_B in gr _col(49) "|" in ye _col(52) %6.0g %_A+%_B di in gr _col(13) "Group 1 |" in ye _col(26) %6.0g %_C /* */ _col(40) %6.0g %_D in gr _col(49) "|" in ye _col(52) %6.0g %_C+%_D di in gr _col(13) "---------+" _dup(26) "-" "+--------" di in gr _col(13) "Total | " in ye _col(26) %6.0g %_A+%_C /* */ _col(40) %6.0g %_B+%_D in gr _col(49) "|" in ye _col(52) /* */ %6.0g %_TOT di in gr _col(13) "---------+" _dup(26) "-" "+--------" di " " di in gr _col(21) "Correctly predicted = " in ye /* */ %6.2f (%_A+%_D)/(%_TOT) * 100 " %" di in gr _col(21) "Model sensitivity = " in ye /* */ %6.2f (%_A/(%_A+%_B)) * 100 " %" di in gr _col(21) "Model specificity = " in ye /* */ %6.2f (%_D/(%_C+%_D)) * 100 " %" di in gr _col(21) "False Positive (G1) = " in ye /* */ %6.2f (%_C/(%_A+%_C)) * 100 " %" di in gr _col(21) "False negative (G0) = " in ye /* */ %6.2f (%_B/(%_B+%_D)) * 100 " %" _dspstat %_varlist rename grpred PRED rename dscore DscScore rename dfun DscIndex rename logprob LnProb1 if ("%_anova"!="") { di _n(2) di in gr _col(18) "Discriminant Scores v Group Variable" noisily oneway DscScore %_group } if ("%_detail"!="") { di _n(2) gen str2 DIFF=" *" if %_group~=PRED format DIFF %2s format DscIndex DscScore LnProb1 %9.4f di in gr _dup(63) "-" di in gr " PRED = Predicted Group DIFF = Misclassification" di in gr " LnProb1 = Probability Gr 1 DscScore = Discriminant Score" di in gr " DscIndex = Discriminant Index" di in gr _dup(63) "-" di " " noisily l %_group PRED DIFF LnProb1 DscIndex DscScore, nol cap drop DIFF } if ("%_graph"!="") { gen _LnP=LnProb1 if (LnProb1>=.5 & %_group==1) | (LnProb1<.5 & %_group==0) gen _LnM=LnProb1 if _LnP==. label variable _LnP "Classified" label variable _LnM "Misclassified" label variable DscIndex "Discriminant Index" gr _LnP _LnM DscIndex, s(.p) xlab ylab(.1,.3,.4,.5,.6,.7,.9) /* */ border yline(.5) /* */ ti(" Probability of Classification") } cap drop mean* cap drop mn* cap drop CASE cap drop PRED cap drop LnProb1 cap drop DscIndex cap drop DscScore cap drop _nvar cap drop _* } mac def _rc = _rc qui use _U_S_E_R, clear qui erase _U_S_E_R.dta mac def S_FN "%_sfn" if %_rc==0 {exit} exit %_rc end capture program drop dropmiss program define dropmiss version 2.1 mac def _varlist "req ex" parse "%_*" parse "%_varlist", parse(" ") while "%_1"~="" { drop if %_1==. mac shift } end capture program drop _mncalc program define _mncalc version 2.1 qui { mac def _varlist "req ex" parse "%_*" parse "%_varlist", parse(" ") gen _gv=%_1 mac shift gen _j=0 while "%_1"~="" { mac def _j=%_j+1 summ %_1 gen mean%_j=_result(3) summ %_1 if _gv==0 gen mn0%_j=_result(3) summ %_1 if _gv==1 gen mn1%_j=_result(3) gen mndff%_j=mn0%_j + mn1%_j mac shift } cap drop _gv cap drop _j cap drop mn0* cap drop mn1* } end capture program drop _kcalc program define _kcalc version 2.1 qui { mac def _varlist "req ex" parse "%_*" parse "%_varlist", parse(" ") mac def _i=0 gen _konst=0 while %_i < _nvar { mac def _i=%_i+1 replace _konst=_disc%_i* mndff%_i + _konst mac shift } replace _konst=_konst*-.5 } end capture program drop _usdsc program define _usdsc version 2.1 qui { mac def _varlist "req ex" parse "%_*" parse "%_varlist", parse(" ") mac def _i=0 gen _usdfk=0 while %_i < _nvar { mac def _i=%_i+1 gen _usdf%_i=_disc%_i/-_D replace _usdfk=_usdf%_i * mean%_i + _usdfk mac shift } replace _usdfk=_usdfk*-1 } end capture program drop _usds program define _usds version 2.1 qui { mac def _varlist "req ex" parse "%_*" parse "%_varlist", parse(" ") gen _gv=%_1 mac shift mac def _i=0 gen _dscorep=0 gen dscore=0 gen _dsFp=0 gen dfun=0 while "%_1"~="" { mac def _i=%_i+1 replace _dscorep=_usdf%_i*%_1 replace dscore=dscore+_dscorep replace _dsFp=_disc%_i*%_1 replace dfun=dfun+_dsFp mac shift } replace dscore=dscore+_usdfk replace dfun=dfun+_konst cap drop _gv cap drop _dscorep cap drop _dsFp } end capture program drop _dspstat program define _dspstat version 2.1 qui { mac def _varlist "req ex" parse "%_*" parse "%_varlist", parse(" ") mac def _gv=%_1 mac shift mac def _i=0 di _n(1) #delimit ; di in gr _col(26) "Discrim Function" _col(46) "Unstandardized"; di in gr _col(11) "Variable" _col(28) "Coefficients" _col(48) "Coefficients"; di in gr _col(11) _dup(49) "-"; #delimit cr while %_i < _nvar { mac def _i=%_i+1 di in gr _col(11) "%_1" in ye _col(28) %9.4f _disc%_i _col(51) %9.4f _usdf%_i mac shift } di in gr _col(11) "Constant" in ye _col(28) %9.4f _konst _col(51) %9.4f _usdfk } end