AbdelRahmen El Lahga <[email protected]> is having trouble writing a
Mata routine that will reproduce the robust variance estimates from -xtreg, fe
robust-:
> I try to write a code estimating fe model for unbalanced data using mata in
> order to practice my mata!! (see the code below) I find the same coefficient
> vector as the command xtreg but my programmation of the robust se vector is
> certainly wrong it give se like 1.37e+15..; (see the last 10 lines of the
> code below) any suggestions
For reference, AbdelRahmen's original Mata code (with indentation) is included
after my signature.
I believe there is a one line hit that will fix AbdelRahmen's code. I believe
the definition for 'xuux' should be:
xuux = cross(x, uhat:^2, x)
This should result in "Robust" variance estimates similar to using -_robust,
minus(0)-.
In order to "check my work" against -xtreg, fe robust- I made some changes to
the original code, so that it would
1) work with unbalanced panels (see -help mata panelsetup()-)
2) work with a dataset other than AbdelRahmen's c:\projets\nicolas\sas\table
I also included the 'q_c' factor in the variance formula (see Methods and
formulas in [R] regress), this is equivalent to -xtreg, fe robust- which uses
-minus(n+k)- (an option of -_robust-). Here 'n' is the number of panels and
'k' is the number of regression coefficients (not including _cons).
My fe.do and fe.mata files are also included at the end of this email.
--Jeff
[email protected]
***** BEGIN: indented version of AbdelRahmen's Mata code
version 9.1
mata:
mata clear
void fe()
{
stata ("drop _all")
stata ("use c:\projets\nicolas\sas\table")
stata("sort idfamt")
y_init = x_init = s_init = .
st_view(y_init, ., "f_week_hours")
st_view(x_init, ., ("f_lnwage", "m_lnwage", "assetinc" , "kids01" ,
"kids2_4", "kids5p" , ///
"pchange" , "puls_1" , "puls0" , "puls1" , "f_age"))
st_view(s_init, ., "s")
tmax = 21 // tmax periode maximale d'observation
n = rows(x_init)/tmax // nombre d'individus
nt= rows(x_init) // nombre total des lignes
k = cols(x_init) // nombre de regresseurs
k
y = J(nt,1,0)
x = J(nt,k,0)
s = s_init
for (i=1; i<=nt; i++) {
if (s[i]==1) {
y[i,.] = y_init[i,.]
x[i,.] = x_init[i,.]
}
}
y_within=J(nt, 1, 0)
x_within=J(nt, k, 0)
compt=0
for (i=1;i<=n;i++) {
compt = compt+tmax
rec1 = compt-tmax+1 // numero 1ere ligne d'apparition
rec2 = compt // numero derniere ligne d'apparition,
// la boucle marche jusqu'ici
si = s[|rec1 \ rec2|]
Ti = sum(si)
yi = y[rec1..rec2,.]
xi = x[rec1..rec2,.]
y_mean = colsum(yi)/Ti
x_mean = colsum(xi)/Ti
y_within[rec1..rec2,.] = si:*(yi:-y_mean)
x_within[rec1..rec2,.] = si:*(xi:-x_mean)
xi = si:*x_within[rec1..rec2,.]
yi = si:*y_within[rec1..rec2,.]
x[rec1..rec2,.] = xi
y[rec1..rec2,.] = yi
}
xx = cross(x,x)
xy = cross(x,y)
betaFE=invsym(xx)*xy
betaFE // tres bien Abdel!! courage
/* Matrice de varcoc robuste a l'heteroscedasdicite et
l'autocorrelation */
uhat = y_within-x_within*betaFE // Wooldridge page 275 et page 581
xuux = x'*uhat*uhat'*x
variance = invsym(xx)*xuux*invsym(xx)
ee = diagonal(variance)
ee
se = sqrt(ee)
se
tstat = betaFE:/se
tstat
}
fe()
end
***** END: indented version of AbdelRahmen's Mata code
***** BEGIN: fe.mata
version 9.1
mata:
mata clear
void fe(string b_out, string V_out, string se_out, string tstat_out)
{
// Local macros assumed defined in Stata:
// idvar - panel variable
// touse - sample selection variable
// yvar - dependent variable
// xvars - independent variables
// sort Stata's dataset on the panel variable
stata("sort " + st_local("idvar"))
st_view(id=., ., st_local("idvar"), st_local("touse"))
y = st_data(., st_local("yvar"), st_local("touse"))
x = st_data(., tokens(st_local("xvars")), st_local("touse"))
x_mean = mean(x, 1)
y_mean = mean(y, 1)
// use 'info' to identify the panels, 'id' is assumed already sorted
info = panelsetup(id, 1)
n = rows(info) // nombre d'individus
nt= rows(x) // nombre total des lignes
k = cols(x) // nombre de regresseurs
y_within=J(nt, 1, 0)
x_within=J(nt, k, 0)
compt=0
for (i=1;i<=n;i++) {
// 'yi' contains the depvar's values for panel 'i'
yi = panelsubmatrix(y, i, info)
// 'xi' contains the indepvars' values for panel 'i'
xi = panelsubmatrix(x, i, info)
// 'rec1' in the row number for the beginning of panel 'i'
rec1 = info[i,1]
// 'rec2' in the row number for the end of panel 'i'
rec2 = info[i,2]
// compute the panel means
yi_mean = mean(yi, 1)
xi_mean = mean(xi, 1)
y_within[rec1..rec2,.] = yi :- yi_mean
x_within[rec1..rec2,.] = xi :- xi_mean
}
y_within = y_within :+ y_mean
x_within = x_within :+ x_mean, J(nt,1,1)
xx = quadcross(x_within,x_within)
xy = quadcross(x_within,y_within)
betaFE = cholsolve(xx, xy)
invxx = cholinv(xx)
/* Matrice de varcoc robuste a l'heteroscedasdicite et
l'autocorrelation */
uhat = (y_within-x_within*betaFE):^2 // Wooldridge page 275 et page 581
xuux = quadcross(x_within,uhat,x_within)
q_c = nt / (nt - n - k)
variance = q_c * invxx * xuux * invxx
se = sqrt(diagonal(variance))
tstat = betaFE:/se
// saved results
st_matrix(b_out,betaFE')
st_matrix(V_out,variance)
st_matrix(se_out,se')
st_matrix(tstat_out,tstat')
}
end
* finished fe.mata
***** END:
***** BEGIN: fe.do
cscript
version 9.1
do fe.mata
use nlswork
gen age2 = age^2
generate ttl_exp2 = ttl_exp^2
generate tenure2 = tenure^2
generate byte black = race==2
unab yvar : ln_w
unab xvars : age* ttl_exp* tenure* // not_smsa south
unab idvar : idcode
sort idcode
by idcode: gen n = _N
mark touse if idcode < 21 & n > 2
markout touse `yvar' `xvars' `idvar'
keep if touse
unab touse : touse
xtreg `yvar' `xvars' if `touse', fe i(idcode) robust
matrix eb = e(b)
matrix ev = e(V)
mata: fe("b_fe","v_fe", "se_fe", "tstat_fe")
matrix b0 = eb[1,1..colsof(b_fe)]
matrix v0 = ev[1..colsof(b_fe),1..colsof(b_fe)]
mata:
se0 = sqrt(diagonal(st_matrix("v0")))'
mreldif(st_matrix("b_fe"), st_matrix("b0"))
mreldif(st_matrix("v_fe"), st_matrix("v0"))
mreldif(st_matrix("se_fe"), se0)
end
* finished fe.do
***** END:
*
* For searches and help try:
* http://www.stata.com/support/faqs/res/findit.html
* http://www.stata.com/support/statalist/faq
* http://www.ats.ucla.edu/stat/stata/