Bookmark and Share

Notice: On March 31, it was announced that Statalist is moving from an email list to a forum. The old list will shut down on April 23, and its replacement, statalist.org is already up and running.


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

st: Mata efficiency: st_views and st_selects


From   László Sándor <sandorl@gmail.com>
To   statalist@hsphsun2.harvard.edu
Subject   st: Mata efficiency: st_views and st_selects
Date   Fri, 27 Apr 2012 17:09:10 -0400

Hi,

For correct standard errors for matching on propensity scores, I
intend to use the following Mata function after a _logit run
estimating the propensity score. It completes in less than 3 seconds
for 5000 observations (on Stata 12.1 MP for mac), which is better than
my expectations (-nnmatch- is notoriously slow, at it was written
before Mata).

However, to ease my final calculations, I do not deal with completely
separate copies of the data into a treated and an untreated subsample,
but I use views (onto tempvars), so the values I set for the
subsamples end up in the "full sample". Now I realize that probably I
could have achieved the same with copying things into matrices only,
and at worst joining the few vectors in the end, as needed. My
question is: Do you expect it's worth the rewrite? In sample of
millions, would the views be much slower than otherwise?

Also, of course I am grateful for any other comment you have on the function.

Thanks!

Laszlo

void function mpsc(string scalar yvar,
					string scalar tvar,
					string scalar xvars,
					real scalar M,
					real scalar L,
					string scalar touse)
{
	string scalar pscdensvar
	real matrix y, t, X, theta, psc, pscd, weightvar, te, sigmabar,
kperm, covhat, mu0hat, mu1hat, xmte
	real matrix y0, y1, X0, X1, xyk, psc0, psc1, te0, te1, kperm0,
kperm1, sb0, sb1, ch0, ch1, mu0hat0, mu0hat1, mu1hat0, mu1hat1, xmte0,
xmte1
	real matrix yki, ykw, logitv, information_matrix, Iinv, tauqmv, tauqmvt
	real rowvector chat, ct2hat, ct1hat, dtaudtheta
	string matrix xv
	string rowvector xvarmat
	real colvector dist
	real scalar i, j, rows0, rows1, n, n1, n0, k
	real scalar sigmatau, sigmat, taun, taunt, sigmahatadj, sigmahatadjt
	st_view(y,.,yvar,touse)
	n = rows(y)
	st_view(t,.,tvar,touse)
	logitv = st_matrix("e(V)")
	theta = st_matrix("e(b)")
	k = length(theta)
	xv = st_matrixcolstripe_split("e(V)",.,0)
	for (i=1; i<rows(xv); i++) {
		xvarmat = xvarmat , xv[i,1]
		for (j=2; j<=cols(xv); j++) {
			xvarmat[i] = xvarmat[i] + xv[i,j]
		}
	}
	X = st_data(.,xvarmat,touse) , J(n,1,1)
	st_view(psc,.,st_addvar("double","pscvar"),touse) // NB to change
these to tempvars, there is no need to leave them hanging
	psc[.] = invlogit(X*theta')
	st_view(pscd,.,st_addvar("double",st_tempname(),1),touse) // NB to
change these to tempvars, there is no need to leave them hanging
	pscd[.] = psc:*(-1:*psc:+1)
	information_matrix = quadcross(X,pscd,X) :/n
	st_view(te,.,st_addvar("double",st_tempname(),1),touse)
	st_view(sigmabar,.,st_addvar("double",st_tempname(),1),touse)
	st_view(kperm,.,st_addvar("double",st_tempname(),1),touse)
	st_view(covhat,.,st_addvar("double",st_tempname(k),1),touse)
	st_view(mu0hat,.,st_addvar("double",st_tempname(),1),touse)
	st_view(mu1hat,.,st_addvar("double",st_tempname(),1),touse)
	st_view(xmte,.,st_addvar("double",st_tempname(),1),touse) // For
"TEs" on covariate matching
	kperm[.,.] = J(n,1,0)
	// Selecting observations for matching among untreated:
	st_select(y0,y,-1*t:+1)
	st_select(X0,X,-1*t:+1)
	st_select(psc0,psc,-1*t:+1)
	st_select(te0,te,-1*t:+1)
	st_select(sb0,sigmabar,-1*t:+1)
	st_select(kperm0,kperm,-1*t:+1)
	st_select(ch0,covhat,-1*t:+1)
	st_select(mu0hat0,mu0hat,-1*t:+1)
	st_select(mu1hat0,mu1hat,-1*t:+1)
	st_select(xmte0,xmte,-1*t:+1)
	// Selecting observations for matching among treated:
	st_select(y1,y,t)
	st_select(X1,X,t)
	st_select(psc1,psc,t)
	st_select(te1,te,t)
	st_select(sb1,sigmabar,t)
	st_select(kperm1,kperm,t)
	st_select(ch1,covhat,t)
	st_select(mu0hat1,mu0hat,t)
	st_select(mu1hat1,mu1hat,t)
	st_select(xmte1,xmte,t)
	n1 = rows(y1)
	n0 = n-n1	
	rows0 = n-n1
	rows1 = n1	
	for (i=1; i<=rows1; i++) {
		dist = abs(psc0:-psc1[i])
		minindex(dist,M,yki,ykw)
		te1[i] = y1[i] - mean(y0[yki])
		kperm0[yki] = kperm0[yki] :+ (1 / rows(yki))
		minindex(dist,M,yki,ykw)
		mu0hat1[i] = mean(y0[yki])
		minindex(abs(psc1:-psc1[i]),L+1,yki,ykw) // NB that it includes the
original too here!
		xyk = quadmeanvariance( (y1[yki],X1[yki,.]) )
		mu1hat1[i] = xyk[1,1]
		sb1[i] = xyk[2,1]
		ch1[i,.] = xyk[|2,2 \ 2,.|]
// Now match on covariates
		minindex(quadrowsum((X0:-X1[i,.]):^2),M,yki,ykw)
		xmte1[i] = y1[i] - mean(y0[yki])		
	}
	for (i=1; i<=rows0; i++) {
		dist = abs(psc1:-psc0[i])
		minindex(dist,M,yki,ykw)
		te0[i] = mean(y1[yki]) - y0[i]
		kperm1[yki] = kperm1[yki] :+ (1 / rows(yki))
		minindex(dist,L + 1,yki,ykw)
		mu1hat0[i] = mean(y1[yki])		
		minindex(abs(psc0:-psc0[i]),L+1,yki,ykw)  // NB that it includes the
original too here!
		xyk = quadmeanvariance( (y0[yki],X0[yki,.]) )
		mu0hat0[i] = xyk[1,1]
		sb0[i] = xyk[2,1]
		ch0[i,.] = xyk[|2,2 \ 2,.|]
// Now match on covariates
		minindex(quadrowsum((X1:-X0[i,.]):^2),M,yki,ykw)
		xmte0[i] = mean(y1[yki])-y0[i]		
	}
	
	tauqmv = quadmeanvariance(te)
	taun = tauqmv[1,1]
	sigmatau = tauqmv[2,1] + mean((kperm:^2 + ((2*M-1)/M) * kperm):*sigmabar)
	tauqmvt = quadmeanvariance(te,t)
	taunt = tauqmvt[1,1]
	sigmat = (n/n1) * tauqmvt[2,1] + (n*n0/n1^2) * mean((kperm0:^2 -
(kperm0 :/ M)):*sb0)
	
	chat = mean(covhat:*pscd:*(t:/psc:^2 + (-1:*t:+1):/(-1:+psc):^2))

	ct2hat =(n/n1) :* mean(covhat:*pscd:*psc:*(t:/(psc:^2) +
(-1:*t:+1):/((-1:+psc):^2)))
	ct1hat =(n/n1) :* mean(X:*pscd:*((mu1hat-mu0hat):-taunt))
	dtaudtheta = (n/n1) :* mean(X:*pscd:*(xmte:-taunt))
	Iinv = invsym(information_matrix)

	sigmahatadj = sigmatau - chat*Iinv*chat'
	sigmahatadjt = sigmat - (ct1hat+ct2hat)*Iinv*(ct1hat+ct2hat)' +
dtaudtheta*Iinv*dtaudtheta'
	
	st_numscalar("e(ate)", taun)
	st_numscalar("e(ate_se)", sqrt(sigmatau/n))
	st_numscalar("e(ate_se_adj)", sqrt(sigmahatadj/n))
	st_numscalar("e(atet)", taunt)
	st_numscalar("e(atet_se)", sqrt(sigmat/n))
	st_numscalar("e(atet_se_adj)", sqrt(sigmahatadjt/n))
	st_numscalar("e(N)", n)
	st_numscalar("e(N1)",n1)
	st_numscalar("e(N0)",n0)
}
mata mosave mpsc(), replace
end
*
*   For searches and help try:
*   http://www.stata.com/help.cgi?search
*   http://www.stata.com/support/statalist/faq
*   http://www.ats.ucla.edu/stat/stata/


© Copyright 1996–2014 StataCorp LP   |   Terms of use   |   Privacy   |   Contact us   |   Site index