* Stata 2.1 version 1.0.0 JPR 03.11.91. * If Symm option is used, program finds lambda for * sign(data-mean)*abs(data-mean) by min kurtosis. program define boxcoxr version 2.1 /* Global macros used, other than S_*: exp, ropt, rhs, lmax, rms. */ set adosize 20 mac def S_1 . /* Lambda */ mac def S_2 . /* Lower confidence limit */ mac def S_3 . /* Upper confidence limit */ mac def S_4 . /* Log likelihood for raw data */ mac def S_5 . /* Log likelihood for transformed data */ mac def _varlist "req ex min(1)" mac def _if "opt" mac def _in "opt" mac def _exp "opt" mac def _options "Zero(real .001) DELta(real .01) Ci(real 0) Iter(integer 0) Lstart(real 1e20) GEN(string) Ropt(string) Anova Symm Quiet Detail" parse "%_*" cap drop _x cap drop _lx cap drop _ll cap drop _s cap drop _xtsf capture { parse "%_varlist", parse(" ") mac def exp "%_exp" mac def ropt "%_ropt" mac def _lhs "%_1" mac shift mac def rhs "%_*" gen _x = %_lhs %_if %_in summ _x mac def _obs = _result(1) if %_obs < 3 { noisily error 2001 } if _result(5) == _result(6) { di in red "%_lhs has no variance" exit 198 } if (%_zero == 0) & (%_ci > 0) { di in red "no confidence interval when zero = 0" exit 198 } if %_iter <= 0 { mac def _iter = 10 } mac def _noisy = ("%_detail" ~= "") /* Calc geometric mean of _x or its transform */ if "%_symm" ~= "" { mac def _sym = 1 gen _s = sign(_x-_result(3)) replace _x = abs(_x-_result(3))+1 gen _xtsf = log(_x) summ _xtsf mac def gm = exp(_result(3)) } else { mac def _sym = 0 gen _s = 1 gen _xtsf = log(_x) summ _xtsf mac def gm = exp(_result(3)) } /* Calculate LL for untransformed data. */ mac def _aov = ("%_anova" ~= "") _loglik1 _x _s 1 %_aov 1 %rhs mac def _lraw = %lmax if (%_lstart < .999e20) { mac def _k = %_lstart } else { mac def _k = 1 } if %_noisy { di in gr "(Note: iterations performed using zero =" /* */ in ye %_zero in gr ")" _n gen _lx = . gen _ll = . mac def ix = 0 } mac def _kl = . mac def _kh = . mac def _target = 0 mac def _deriv = 1 mac def S_1 = 0 mac def S_5 = 0 _root4 %_k %_zero %_delta %_target %_iter %_sym %_noisy %_deriv %_anova mac def _k = %S_1 /* lambda */ mac def _ltsf = %S_5 /* maximized log likelihood */ if (%S_2 == -1) & ("%_quiet" == "") { di in red "Lambda: no convergence after " %_iter " iterations" } else { if "%_gen" ~= "" { capture drop %_gen if %_k { gen %_gen = (_x^%_k-1)/%_k label var %_gen "(%_lhs^%_k-1)/%_k" } else { gen %_gen = log(_x) label var _xtsf "ln(%_lhs)" } } } mac def _x2 = 0 if (%_ci > 0) & (%_k ~= .) { /* Find support interval for lambda. First calculate LL for untransformed data. */ mac def _x2 = 0.5*invnorm(0.5+0.5*%_ci)^2 /* Lower confidence limit (if it exists). If not, it is taken as missing (minus infinity). */ mac def _target = -%_x2 mac def _deriv = 0 mac def S_1 = %_k /* Starting value for lower limit for lambda is lambda-0.1. Pass max LL via S_5 and lambda via S_1. */ mac def _kl = %_k-.5 _root4 %_kl %_zero %_delta %_target %_iter %_sym %_noisy %_deriv %_anova mac def _kl = %S_1 if (%S_2 == -1) & ("%_quiet" == "") { di in red "Lower limit: no convergence after " %_iter " iterations" } mac def _target = -%_target mac def S_1 = %_k mac def S_5 = %_ltsf mac def _kh = %_k+0.5 _root4 %_kh %_zero %_delta %_target %_iter %_sym %_noisy %_deriv %_anova mac def _kh = %S_1 if (%S_2 == -1) & ("%_quiet" == "") { di in red "Upper limit: no convergence after " %_iter " iterations" } } } if %_noisy { lab var _ll "Log likelihood" lab var _lx "Lambda" if (%_ltsf ~= .) & (%_x2 > 0) { mac def _yl = %_ltsf-%_x2 graph _ll _lx, c(s) s(O) xla yla yline(%_yl) sort } else { graph _ll _lx, c(s) s(O) xla yla sort } } if ("%_quiet" == "") { #delimit ; di in gr _n " Variable | Obs" _col(25) "Lambda" %3.0f 100*%_ci "% confidence interval" _col(58) "LL(raw)" _col(66) "LL(x^lambda)" _n " ---------+" _dup(66) "-" ; mac def _skip=9-length("%_lhs") ; di in gr _skip(%_skip) "%_lhs |" in ye %7.0f %_obs " " %8.3f %_k " " %8.3f %_kl " " %8.3f %_kh " " %11.3f %_lraw " " %11.3f %_ltsf ; #delimit cr } mac def S_1 %_k mac def S_2 %_kl mac def S_3 %_kh mac def S_4 %_lraw mac def S_5 %_ltsf cap drop _xtsf cap drop _x cap drop _s end program define _root4 version 2.1 /* Args: 1=lambda, 2=%_zero, 3=%_delta, 3=target LL, 4=max iterations, 5=%_sym, 6=1 for noisy, 7=derivative, 8=anova if non-null, else regrn. Right-hand side variables for regression or anova passed in %rhs. Returns lambda in %S_1, #iterations in %S_2, max LL in %S_5. If zero is 0, perform calculations just once. */ mac def _k0 = %S_1 /* ML est of lambda, if already calculated */ mac def _ltsf = %S_5 /* Max log likelihood, if already calculated */ mac def _k = %_1 /* Starting value for lambda */ mac def _zero = %_2 mac def _delta = %_3 mac def _target = %_4 mac def _itmax = %_5 mac def _sym = %_6 mac def _noisy = %_7 mac def _deriv = %_8 mac def _aov = 0 if "%_9" ~= "" { mac def _aov = 1 } mac def _iter = 0 /* Transformation is standardized Box-Cox (Atkinson p.87) */ _loglik1 _x _s %_k %_aov %gm %rhs mac def _lmax = %lmax mac def _rms = %rms if %_noisy { di in gr "Iter Lambda Zero Variance Lmax" } if %_zero == 0 { mac def _f0 = 0 } else { if %_deriv { mac def _e = %_k+%_delta _loglik1 _x _s %_e %_aov %gm %rhs mac def _f0 = (%lmax-%_lmax)/%_delta } else { mac def _f0 = %_ltsf-%lmax if %_k < %_k0 { mac def _f0 = -%_f0 } mac def _f0 = %_f0-%_target } while (abs(%_f0) > %_zero) & (%_iter < %_itmax) { if %_noisy { di in ye %4.0f %_iter %12.5f %_k %12.5f %_f0 %12.5f %rms %12.5f %lmax mac def ix = %ix+1 replace _lx = %_k in %ix replace _ll = %_lmax in %ix } mac def _iter = %_iter+1 mac def _e = %_k+%_delta _loglik1 _x _s %_e %_aov %gm %rhs if %_deriv { mac def _f1 = %lmax mac def _e = %_e+%_delta _loglik1 _x _s %_e %_aov %gm %rhs mac def _f1 = (%lmax-%_f1)/%_delta } else { mac def _f1 = %_ltsf-%lmax if %_k < %_k0 { mac def _f1 = -%_f1 } mac def _f1 = %_f1-%_target } mac def _m = (%_f1-%_f0)/%_delta if %_m == 0 { noisily di in red /* */ "Convergence problem, doubling the value of delta." mac def _seps = %_seps*2 mac def _delta = %_delta*2 } else { mac def _k = %_k-%_f0/%_m _loglik1 _x _s %_k %_aov %gm %rhs mac def _lmax = %lmax mac def _rms = %rms if %_deriv { mac def _e = %_k+%_delta _loglik1 _x _s %_e %_aov %gm %rhs mac def _f0 = (%lmax-%_lmax)/%_delta } else { mac def _f0 = %_ltsf-%lmax if %_k < %_k0 { mac def _f0 = -%_f0 } mac def _f0 = %_f0-%_target } } if %lmax == . { mac def _iter = %_itmax } } if abs(%_f0) > %_zero { /* Convergence failure - iterations set to -1. */ mac def S_2 -1 mac def _k = . } } if %_noisy { di in ye %4.0f %_iter %12.5f %_k %12.5f %_f0 %12.5f %rms %12.5f %lmax } mac def S_1 %_k mac def S_2 %_iter mac def S_5 %_lmax end program define _loglik1 version 2.1 /* Arg 1 is _x, 2 is multiplier (e.g. sign of _x), 3 is %_k (lambda), 4 is 0 for regression, 1 for anova, 5 is geometric mean of %_1. Either regress _xtsf on %rhs or summarize _xtsf. Calc skewness, returned in _result(14), RMS, in %rms, and partially maximized likelihood in %lmax. */ cap drop _res cap drop _xtsf if %_3 { gen _xtsf = %_2*(%_1^%_3-1)/(%_3*%_5^(%_3-1)) } else { gen _xtsf = %_2*%_5*log(%_1) } if "%rhs" == "" { summ _xtsf %exp, detail mac def _rss = (_result(1)-1)*_result(4) } else { cap drop _res if %_4 == 0 { reg _xtsf %rhs %exp %ropt } else { anova _xtsf %rhs %exp %ropt } mac def _rss = _result(4) } mac def rms = %_rss/_result(1) if %rms == 0 { mac def rms = 1e-7 } mac def lmax = -0.5*_result(1)*log(%rms) end