Statalist


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

st: mata programming: trace moptimize evaluator calls & finding embedded functions


From   László Sándor <[email protected]>
To   [email protected]
Subject   st: mata programming: trace moptimize evaluator calls & finding embedded functions
Date   Sat, 2 Jan 2010 17:21:59 +0100

Hi all,

I'm coding up a Latent Class Model estimation routine under Stata 11,
and I tried to use -moptimize- for the M-step in the EM algorithm.
However, I would appreciate some help with the following problems:

— most importantly, my -moptimize- call does not seem to 'find'
feasible initial values, even though my replication of my test case in
an interactive session had no problems (i.e. the given intial values
should be feasible by themselves). Here I mean that I coded up the
same commands that my ado-file should call during my test run, and in
the interactive session -moptimize- ran. If you know how I could dig
deeper into this, this would help a lot. I tried -set trace on-, -set
matadebug on-, -mata: mata set matalnum on-, -mata: mata set
matastrict on-, as well as some -printf()-, -_error()-, and
-moptimize_query(M)- calls, but I cannot squeeze out useful
information from a -moptimize- run called by a Mata function in an ado
file.

— also, if you have any idea what could be the trick for -moptimize-,
I would be very, very grateful for that. I suspect that some
declarations might go awry, even though I went through them multiple
times. I post my longish code below.

— finally, it is a bit annoying that my newly defined Mata function
does not find other Mata function it should use, even if they are
defined in the same ado-file (and Mata call) OR they are saved as .mo
files in my personal folder. The only way it proceeds is if I ran
separate do-files generating the functions and saving the mo files in
the same session I try to run my ado-file. The manual suggests that
this should be much simpler and more robust.

Many thanks for any suggestions.

Laszlo

László Sándor
graduate student
Department of Economics
Harvard University


program emlatentclass, eclass
                version 11
                syntax varlist [if] [in], BY(varname)
                marksample touse
                markout `touse' `varlist' `by'
                tokenize `varlist'
                local lhs `1'
                macro shift
                local rhs `*'
                local k: word count `rhs'
                qui sum `touse'
                local nobs = r(sum)
                cap drop prob_class1
                gen prob_class1 = 0.5
                local mix "prob_class1"
                set matadebug on
                di " `lhs' " " `rhs' " " `mix' " " `by' " " `touse' " `k'
                mata: em("`lhs'","`rhs'","`mix'","`by'","`touse'",`k')
                // ereturn post b V, esample(touse) o(`nobs')
                ereturn display
end

version 11
mata:
mata set matalnum on
mata set matastrict on


void function estep(real rowvector b1, real rowvector b2, real scalar
s1, real scalar s2, string scalar lhs, string scalar rhs, string
scalar mix, string scalar by, string scalar touse) {
            real scalar logli_l, logli_h
            real vector y, id, pi, pi_i
            real matrix X
            st_view(id,.,by,touse)
            info = panelsetup(id, 1)
            st_view(X,.,rhs,touse)
            X = (X,J(rows(X),1,1))
            st_view(y,.,lhs,touse)
            st_view(pi,.,mix,touse)
            index_hi = normalden(y:-X*b1', 0, s1)
            index_li = normalden(y:-X*b2', 0, s2)
            for (i=1;i<=rows(info);i++) {
                index_l = panelsubmatrix(index_li,i,info)
                index_h = panelsubmatrix(index_hi,i,info)
                panelsubview(pi_i,pi,i,info)
                logli_l = quadcolsum(log(index_l))
                logli_h = quadcolsum(log(index_h))
                pi_i[.] =
J(rows(pi_i),1,exp(logli_h)/(exp(logli_h)+exp(logli_l)))
            if (hasmissing(pi)) _error(3351, "E-step produced missing
value, you're screwed.")
            }
}
mata mosave estep(), replace dir(PERSONAL)

void function mstep(transmorphic M,
                 real rowvector b,
                 real colvector lnf)
              {
                      real colvector  p1, p2, s1, s2
                      real colvector  y

                      p1 = moptimize_util_xb(M, b, 1)
                      p2 = moptimize_util_xb(M, b, 2)
                      s1 = moptimize_util_xb(M, b, 3)
                      s2 = moptimize_util_xb(M, b, 4)
                      y = moptimize_util_depvar(M, 1)

st_view(mix,.,moptimize_util_userinfo(M,1),moptimize_util_userinfo(M,2))

                        if (hasmissing(mix)) _error(3351, "mix has
missing value.")
                        if (hasmissing(y)) _error(3351, "y has missing value.")
                        if (hasmissing(p1)) _error(3351, "p1 has
missing value.")
                        if (hasmissing(p2)) _error(3351, "p2 has
missing value.")
                        if (hasmissing(s1)) _error(3351, "s1 has
missing value.")
                        if (hasmissing(s2)) _error(3351, "s2 has
missing value.")

                      lnf = ln(mix:*(normalden(y:-p1, 0,
s1)-normalden(y:-p2, 0, s2))+normalden(y:-p2, 0, s2))
              }
mata mosave mstep(), replace dir(PERSONAL)

void function em(string scalar lhs, string scalar rhs, string scalar
mix, string scalar by, string scalar touse, real scalar k) {
    real vector b1o, b1n, b2o, b2n, s1o, s1n, s2o, s2n
    real scalar d
    b1o = J(1,k+1,1)
    b2o = J(1,k+1,1)
    s1o = 0.5*colmaxabs(st_data(.,lhs,touse))
    s2o = 0.5*colmaxabs(st_data(.,lhs,touse))
    d = 1
while (d>10^(-3)) {
// E-step
    estep(b1o,b2o,s1o,s2o,lhs,rhs,mix,by,touse)
// M-step
    M = moptimize_init()
    moptimize_init_evaluator(M, &mstep())
    moptimize_init_evaluatortype(M,"lf")
    moptimize_init_touse(M, touse)
    moptimize_init_ndepvars(M, 1)
    moptimize_init_depvar(M, 1, lhs)
    moptimize_init_eq_n(M, 4)
    moptimize_init_eq_indepvars(M, 1, rhs)
    moptimize_init_eq_cons(M, 1, "on")
    moptimize_init_eq_indepvars(M, 2, rhs)
    moptimize_init_eq_cons(M, 2, "on")
    moptimize_init_eq_indepvars(M, 3, "")
    moptimize_init_eq_cons(M, 3, "on")
    moptimize_init_eq_indepvars(M, 4, "")
    moptimize_init_eq_cons(M, 4, "on")
    moptimize_init_eq_name(M, 1, "class1")
    moptimize_init_eq_name(M, 2, "class2")
    moptimize_init_eq_name(M, 3, "se1")
    moptimize_init_eq_name(M, 4, "se2")
//    moptimize_init_by(M, by)
    moptimize_init_eq_coefs(M, 1, b1o)
    moptimize_init_eq_coefs(M, 2, b2o)
    moptimize_init_eq_coefs(M, 3, s1o)
    moptimize_init_eq_coefs(M, 4, s2o)
    moptimize_init_search(M, "on")
    moptimize_init_search_random(M, "on")
    moptimize_init_search_bounds(M, 3, (0,.))
    moptimize_init_search_bounds(M, 4, (0,.))
    moptimize_init_valueid(M, "LogL with given mixing")
    moptimize_init_nuserinfo(M, 2)
    moptimize_init_userinfo(M, 1, mix)
    moptimize_init_userinfo(M, 2, touse)
    moptimize_init_tracelevel(M,"hessian")
    moptimize_query(M)
    _moptimize(M)
    printf("The moptimize return code is %5.4g",moptimize_result_returncode(M))
    moptimize_result_post(M)
    moptimize_result_display()
    b1n = moptimize_result_coefs(M,1)
    b2n = moptimize_result_coefs(M,2)
    s1n = moptimize_result_coefs(M,3)
    s2n = moptimize_result_coefs(M,4)
    d = sum(abs((b1n-b1o,b2n-b2o,s1n-s1o,s2n-s2o)))
    b1o = b1n
    b2o = b2n
    s1o = s1n
    s2o = s2n
}
}
mata mosave em(), replace dir(PERSONAL)

end
exit

*
*   For searches and help try:
*   http://www.stata.com/help.cgi?search
*   http://www.stata.com/support/statalist/faq
*   http://www.ats.ucla.edu/stat/stata/



© Copyright 1996–2024 StataCorp LLC   |   Terms of use   |   Privacy   |   Contact us   |   What's new   |   Site index