/************************************************************************* * Maximum Likelihood routines for the Bailey-Makeham model * * * * Commissioned by: R. Clifton Bailey, HCFA * * Henry Krakauer, HCFA HSQB * * Based on code written by * * James P. Summe, Stop 2, NMRI, NNMC. Bethesda, MD. * * Author, Bill Rogers: Rand, Computing Resource Center * *************************************************************************/ #include "bailey.h" #include int idcons = -1; int iter; /* Iteration count */ int maxiter=100; /* Maximum number of iterations */ int parms; /* Number of parameters */ Int2 actparm=0; /* Actual number of parameters */ Int2 *actprm; /* Index numbers of actual parameters */ int reasoniter=1; /* Indicator of convergence */ DOUBLE *means; /* Means of the data */ DOUBLE *variances; /* Variances of the data */ DOUBLE *ntheta; /* Test value of theta (local to calc_ll) */ DOUBLE *adiag; /* Diagonal values of a (used in output) */ DOUBLE *q; /* Values of the first partials */ DOUBLE *qerror; /* Error Bounds for Q */ DOUBLE *a; /* Triangular Second Partial Matrix */ DOUBLE *acp; /* First derivative cross-products diagonal */ DOUBLE *fa; /* Factorization of A */ DOUBLE *b; /* Temporary matrix for diag of 2nd partials*/ DOUBLE *v; /* The new solution */ float *pch; /* Parameter changes */ float *lpch; /* Last parameter changes */ DOUBLE nobs; /* number of observations */ DOUBLE ll0; /* Likelihood of the reference model */ DOUBLE llr0; /* Log likelihood at beginning of step */ DOUBLE llr1; /* Log likelihood at test Marquardt step */ DOUBLE llrlast; /* Log likelihood on the previous step */ DOUBLE rho; /* Marquardt step size */ DOUBLE rho1; /* Trial Marquardt step size */ DOUBLE rhomax=5.0; /* Maximum step size */ DOUBLE llropt; /* Log likelihood--Modified Marq optimum step */ DOUBLE ssexp=-5; /* Sum of squares of exponential ? */ DOUBLE nsf=4; /* Number of signficant figures */ DOUBLE tolerance=0.0; DOUBLE lambda=1.0; DOUBLE lastt; DOUBLE absch; DOUBLE magnitude_largest_change; DOUBLE sum_variances; DOUBLE sp0[NSP]; int found_better_answer; int debug=0; int singularity; int id_largest_change; Int2 error_flag = 0; int in_constant_pass=0; int stopflag=0; DOUBLE llch=0.0; int firstpass=1; /************************************************************************* * msetup() allocate data areas for the maximization part of routine * *************************************************************************/ void msetup() { int i, j; char *spn; parms = NSP*vars; for (i=0; i=0 && !stopflag; ++iter) { if (logf!=stdout) { fclose(logf); logf=fopen(logfile,"a"); } llr0 = ll (q, qerror, a, acp, theta, 2); if (iter==0 && onestep) ll0 = llr0; /* Save the first one */ iter_report(); if (yydebug) { for (i=0; i= pow(10.0,ssexp)) reasoniter=2; else if (2.0*fabs(v[j]) >= fabs(theta[actprm[j]])*pow(10.0,ssexp)) reasoniter=3; } if (iter>maxiter) { spout21("\n Terminating for maximum iterations\n"); reasoniter=0; } if (llch < tolerance && llch > 0.0 && iter>=3 ) reasoniter = -1; if (reasoniter<=0 && !in_constant_pass) { spout21("\n(convergence achieved)\n"); output(a,fa); return 0; } /************************************************************************* * if we are done with the constant portion of the estimation, we must * * reset the "fixed" indicators and adjust the constant terms to reflect * * contributions of the starting values. * * e.g. y = b0 + b1 x <==> y = (b0 - b1*C) + b1(x+C) * * * * Note: assumes that in_constant_pass==1 ==> idcons meaningful * *************************************************************************/ if ((reasoniter<=0 || llch<1e-4) && in_constant_pass && iter>3) { ll0 = llr0; for (i=0; i0.0 && rho+llr0>llr1) { rho *= (0.5/(rho+llr0-llr1)); if (rho>rhomax) rho=rhomax; sprintf(outbuf,"Optimal rho = %12g\n", rho); spout21o(); llropt = calc_ll(rho); } else llropt = llr0; /* Find the local maximum and adjust lambda and rho */ if (debug) { sprintf(outbuf,"Decision: llr0=%15g llr1=%15g llropt=%15g\n", llr0, llr1, llropt); spout21o(); sprintf(outbuf,"Rho = %15g, lambda = %15g\n", rho, lambda); spout21o(); } if (llropt>llr1 && llropt>llr0) { lambda *= pow(10.0,-rho); found_better_answer = 2; llch = llropt-llr0; } else if (llr1>llr0) { lambda /= 10.0; rho=1.0; found_better_answer = 1; llch = llr1 - llr0; } else { /* The local maximum is llr0. Adjust a, increase lambda, and obtain a new v. This is not a better answer, so we need to look more in the steepest descent direction. */ printf("Having trouble taking a good step\n"); if (ridge()) mxerror(1,""); solution(); continue; } } if (debug) printf("Optimal Rho = %22.7e\n", rho); /* Update theta */ id_largest_change = -1; magnitude_largest_change = absch = 0.0; for (j=0; jmagnitude_largest_change) { id_largest_change = j; magnitude_largest_change = t*w; } } /* Is Lambda too small? */ if (lambda<1e-17) lambda = 1e-17; } return 0; } int ridge( ) { int i, j; if (lambda <= 1e16) { lambda *= 10.0; if (debug && actparm<=3) { printf("ridge: lambda = %15g\n",lambda); sprt(a,actparm,"A before ridge"); vprt(b,actparm,"b before ridge"); } for (i=0,j=1; j<=actparm; j++) { i += j; a[i-1] += b[j-1]*lambda*0.9; } if (debug) { sprt(a,actparm,"A after ridge"); } return 0; } return 1; } void solution() { int j; int better; singularity = -1; while ((j=cholsky(a,actparm,fa))>=0){ /* Cholesky factorization of A */ if (j>=0) singularity = actprm[j]; if (debug) printf("Cholesky return was %d\n", j); if (ridge()) mxerror(2,ident[parm_number[actprm[j]%vars]]); if (debug) sprt(a,actparm,"Revised matrix of partials"); if (debug) printf("lambda = %20g, rho = %20g\n", lambda, rho); } solve (fa, v, q, actparm); } /************************************************************************* * calc_ll(rho) calculates the likelihood for the entire dataset * * for a given value of rho. rho is the amount of the stepsize * * indicated in globally known v[] to add to theta[]. No derivatives * * are calculated insofar as this is a trial value. * *************************************************************************/ DOUBLE calc_ll(rho) DOUBLE rho; { DOUBLE logl; int j, jj, i; /* Form the new parameter estimates */ for (jj=j=0; jj || jj==actparm) ntheta[j] = theta[j]; else { ntheta[j] = theta[j]+rho*v[jj]; ++jj; } } /* Output the step size and the Marquardt parameter */ if (debug) printf("rho=%12g, lambda=%12g\n", rho, lambda); logl=ll((DOUBLE *)NULL,(DOUBLE *)NULL,(DOUBLE *)NULL,(DOUBLE *)NULL,ntheta,0); if (debug) { vprt(ntheta,parms,"Trial solution"); sprintf(outbuf," Likelihood = %12g, Rho=%10g\n", logl, rho); spout21o(); } return logl; } /************************************************************************* * ll(q, qerror, a, acp, theta, NeedDerivatives) calculates the * * likelihood function for the dataset given a value of the LOCALLY * * DEFINED parameter theta. Derivatives are calculated according to * * NeedDerivatives * * * * This routine has knowledge of the global data structure and is * * responsible for passing this down to the observation-level lf fcn. * *************************************************************************/ DOUBLE ll(q, qerror, a, acp, theta, NeedDerivatives) DOUBLE *q; /* Derivatives (output) */ DOUBLE *qerror; /* Error bounds (output) */ DOUBLE *a; /* Second partials (output) */ DOUBLE *acp; /* Sum of deriv^2 diag (output) */ DOUBLE *theta; /* Parameters (input) */ int NeedDerivatives; /* 0 = none, 1 = first, 2 = second */ { int i, j, k, m, ii, jj, kk, mm, v, vv; DOUBLE weight; DOUBLE *p; DOUBLE sp[NSP], spd[NSP], spde[NSP], spd2[(NSP*(NSP+1))/2], dv[NDV]; DOUBLE x, xx, LL, t, LLobs; nobs = 0.0; /* zero out accumulations */ if (NeedDerivatives) for (p=a,i=0; i x[%d]=d[%d]=%20g\n", m, k, v, x); */ t = spd[m] * x; q[i] += t; acp[i] += t*t; qerror[i] += spde[m]; /* ??? */ /* printf("q[%d] = %20g\n", i, q[i]); */ if (NeedDerivatives==2) for (ii=0; ii<=i; ++ii) { jj = actprm[ii]; kk = jj%vars; vv = parm_number[kk]; xx = (kk==idcons) ? 1.0 : d[vv]; mm = jj/vars; a[(i*(i+1))/2+ii] += spd2[(m*(m+1))/2+mm]*x*xx; } } } if (firstpass) { sum_variances=0.0; for (i=0; i