Bookmark and Share

Notice: On April 23, 2014, Statalist moved from an email list to a forum, based at statalist.org.


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

st: SAS macro to do.file


From   [email protected]
To   "[email protected]" <[email protected]>
Subject   st: SAS macro to do.file
Date   Wed, 07 Aug 2013 13:12:47 +0200

Dear All,

I use Stata 13 IC.

I need help for "translate" a SAS macro for NRI for Cox regression in a do.file.

Is anyone interest?

The SAS macro is available at:
http://saswiki.org/images/5/53/16._KSFE_2012_M%C3%BChlenbruch_Makro_reclassification_phreg.sas

I also enclose copy of the macro (text)

************************************************************
Progname: 			reclassification_phreg.sas

Purpose: Creating a macro for the estimation of reclassification statistics (NRI, IDI)
					for the added predictive value of an new parameter

					%nriidi macro by Lars Berglund is extended for cox-regression!!!

Author:				Kristin Mühlenbruch 			[email protected]
					(with the help of W. Bernigau)  [email protected]

Date:				29.03..2011

Last Modification:	25.05.2011

Path:				"____"

Output:				ROC AUC values and comparisons
					ROC Curves
					Cox regression estimates
					Risk Tables
					NRI, continuous NRI
					IDI


************************************************************;
/* Description of macro components ******************************************************************** ***dataset - input dataset which includes all variables for analysis *** ***id - identification variable for each observation *** ***timevar - variable that contains the follow-up time of the study participants *** ***outcome - dependant binary variable for the logistic and cox regression model (Y variable) *** ***model1 - Model1 with all covariates included as independant variables (sparser model) ***
***nvar1   		- Number of variables included in model1											***
***roc_title1	- Title for the ROC curve of model 1												***
***model2 - Model2 with all covariates included as independant variables from model1
				  + added variables (larger model)													***
***nvar2		- Number of variables included in model2											***
***roc_title2	- Title for the ROC curve of model 2												***
***nriskcat		- Number of categories for risk stratification										***
***riskcutoffs	- Cut-off values for separating the risk categories									***
***risk_year - Time risk which will be estimated (5-year risk, 10-year risk, etc.) *** ***base_muster - First Baseline pattern of covariates for determining the survivor function
				  values for the variables are listed with blanks as delimiter						***
***printtable - option for the risk table output by event status (Y=Yes/N=No) ***
******************************************************************************************************/

%macro reclassification_phreg(dataset, id, outcome,timevar, model1,nvar1,roc_title1, model2, nvar2,roc_title2, nriskcat, riskcutoffs, risk_year, base_muster, printtable);

/*Determining the ROC-AUC value for each of the models and Comparison of those (Plots included)*/
	ods graphics on;
	proc logistic data=&dataset plots=roc descending;
		model &outcome= &model2 ;
		roc &roc_title1 &model1;
		roc &roc_title2 &model2;
		roccontrast reference(&roc_title1) / estimate e;
		ods output ROCAssociation=rocass ROCContrastEstimate=rocdiff;
	run;

	%let Anz1=1;
	%let Word=%scan(&model1,%eval(&Anz1+1));
	%do %while (&Word ne);
		%let Anz1=%eval(&Anz1+1);
		%let Word=%scan(&model1,%eval(&Anz1+1));
	%end;

	%do i=1 %to &Anz1;
		%let Mod1&i=%scan(&model1,&i);
	%end;

	/*Estimation of model 1 by cox regression*/
	proc phreg data=&dataset;
   		model &timevar*&outcome(0)= &model1 ;
		ods output ParameterEstimates=out1_1;
	run;

	proc transpose data=out1_1(keep=estimate)  out=out1_2;
	run;


	data Inrisks1;
        %do i=1 %to &Anz1;
			%let base1_wert&i=%scan(&base_muster,&i);
		%end;
		%do i=1 %to &Anz1; &&mod1&i=&&base1_wert&i;
		%end;
		output;
	run;

*Determining the individual risks out of the cox model parameters from model 1;
	proc phreg data=&dataset nosummary;
		model &timevar*&outcome(0)=&model1 / rl;
baseline covariates=Inrisks1 out=riskscore1 survival=s lower=S_lower upper=S_upper/nomean;
	run;
	data riskscore1_1;
set riskscore1(where=(&timevar>(&risk_year-0.01) and &timevar<(&risk_year+0.01)));
		absrisk=1-s;
		LCL=1-S_upper;
		UCL=1-S_lower;
	run;
	data out_1;
		if _N_=1 then set out1_2;
		set &dataset;
	run;
	data pred_1;
		array col col1-col&Anz1.;
		if _N_=1 then set riskscore1_1(keep=absrisk);
		set out_1;
		pred1=.;
		exponent1=(col1*&mod11 %do i=2 %to &Anz1; +col&i*&&Mod1&i %end;);
		pred1=1-(1-absrisk)**exp(exponent1);
	run;

	%let Anz2=1;
	%let Word2=%scan(&model2,%eval(&Anz2+1));
	%do %while (&Word2 ne);
		%let Anz2=%eval(&Anz2+1);
		%let Word2=%scan(&model2,%eval(&Anz2+1));
	%end;

	%do i=1 %to &Anz2;
		%let Mod2&i=%scan(&model2,&i);
	%end;

	/*Estimation of model 2 by cox regression*/
	proc phreg data=&dataset;
   		model &timevar*&outcome(0)=&model2;
		ods output ParameterEstimates=out2_1;
	run;
	proc transpose data=out2_1(keep=estimate)  out=out2_2;
	run;

	data Inrisks2;
        %do i=1 %to &Anz2;
			%let base2_wert&i=%scan(&base_muster,&i);
		%end;
		%do i=1 %to &Anz2; &&mod2&i=&&base2_wert&i;
		%end;
		output;
	run;

*Determining the individual risks out of the cox model parameters from model 2;
	proc phreg data=&dataset nosummary;
		model &timevar*&outcome(0)=&model2/ rl;
baseline covariates=Inrisks2 out=riskscore2 survival=s lower=S_lower upper=S_upper/nomean;
	run;

	data riskscore2_1;
set riskscore2(where=(&timevar>(&risk_year-0.01) and &timevar<(&risk_year+0.01)));
		absrisk=1-s;
		LCL=1-S_upper;
		UCL=1-S_lower;
	run;
	data out_2;
		if _n_=1 then set out2_2;
		set &dataset;
	run;
	data pred_2;
		array col col1-col&Anz2.;
		if _N_=1 then set riskscore2_1(keep=absrisk);
		set out_2;
		exponent2=(col1*&mod21 %do i=2 %to &Anz2; +col&i*&&Mod2&i %end;);
		pred2=1-(1-absrisk)**exp(exponent2);
	run;

	proc sort data=pred_1;
		by &id;
	run;
	proc sort data=pred_2;
		by &id;
	run;

	/*From here onwards using the %nriidi macro by Lars Berglund*/
	data out;
merge pred_1(drop=absrisk col1--col&Anz1.) pred_2(drop=absrisk col1--col&Anz2.);
		by &id;
		predcat1=.;
	    predcat2=.;
		predcat1c='                                               ';
		predcat2c='                                               ';
		label predcat1c='Established risk factors';
		label predcat2c='Established risk factors + new risk factors';
		label &outcome='Cases(1)/Controls(0)=';
	    riskcoh=.;
	    riskcol=.;
		riskcohc='                           ';
	    riskcolc='                           ';
		ic='   ';
		colon=':';
		to='-';
		one='1';
		lt='<';
		gt='>=';
		delim='()';
		modif='oq';
	    length riskco $ 400;
	    riskco=symget('riskcutoffs');
		do i=1 to &nriskcat;
	      j=i-1;
	      if i=1 then do;
		    riskcoh=scan(riskco,i,delim,modif);
			riskcohc=riskcoh;
if pred1 ne . and pred1 lt riskcoh then call cats(predcat1c,one,colon,lt,riskcohc);
	        if pred1 ne . and pred1 lt riskcoh then predcat1=1;
if pred2 ne . and pred2 lt riskcoh then call cats(predcat2c,one,colon,lt,riskcohc);
		    if pred2 ne . and pred2 lt riskcoh then predcat2=1;
		end;
	      if i > 1 and i < &nriskcat then do;
	        riskcoh=scan(riskco,i,delim,modif);
			riskcohc=riskcoh;
		    riskcol=scan(riskco,j,delim,modif);
			riskcolc=riskcol;
	        ic=i;
if pred1 ne . and pred1 ge riskcol and pred1 lt riskcoh then call cats(predcat1c,ic,colon,riskcolc,to,riskcohc);
	 		if pred1 ne . and pred1 ge riskcol  and pred1 lt riskcoh  then predcat1=i;
if pred2 ne . and pred2 ge riskcol and pred2 lt riskcoh then call cats(predcat2c,ic,colon,riskcolc,to,riskcohc); if pred2 ne . and pred2 ge riskcol and pred2 lt riskcoh then predcat2=i;
		  end;
	      if i = &nriskcat then do;
		  	riskcoh=.;
			riskcohc='       ';
	        riskcol=scan(riskco,j,delim,modif);
			riskcolc=riskcol;
			ic=i;
if pred1 ne . and pred1 ge riskcol then predcat1c=ic || colon || gt || left(riskcolc);
			if pred1 ne . and pred1 ge riskcol  then predcat1=i;
if pred2 ne . and pred2 ge riskcol then predcat2c=ic || colon || gt || left(riskcolc);
	        if pred2 ne . and pred2 ge riskcol  then predcat2=i;
		  end;
		output;
	end;
	run;
	data out;
set out(where=(predcat1 ne . and predcat2 ne . and predcat1=i or predcat1 ne . and predcat2 ne . and predcat2=i)); ***Modified;
	    diff_pred=pred2-pred1;
	run;

	  data temp2;
	    do &outcome=0 to 1;
			length &outcome 4;***Modified;
	      do predcat1=1 to &nriskcat;
		    do predcat2=1 to &nriskcat;
		      output;
	        end;
	      end;
	    end;

	  run;

	  proc sort data=temp2;
	    by &outcome predcat1 predcat2;

	  run;
	  ***Adding continuous NRI and prior calculations for that;
	  data temp3;
	  	set out;
		n_up=.;
		n_down=.;
		if diff_pred>0 then up=1;
			else up=0;
		if diff_pred<0 then down=1;
			else down=0;
	  run;

	  proc freq data=temp3 noprint;
	    tables &outcome*up*down/nocol nopercent norow out=outfr_3 sparse;

	  run;
	  data outfr_3;
set outfr_3(where=((up=1 and down=0) or (up=0 and down=1))keep=&outcome up down count);
	  run;
	  proc transpose data=outfr_3 out=outfr_4;
	  run;
	  data outfr_4;
set outfr_4(rename=(col1=n_down_ne col2=n_up_ne col3=n_down_e col4=n_up_e));
		if _name_='COUNT';
	  run;


	  proc freq data=out noprint;
	    tables &outcome*predcat1*predcat2/nocol nopercent norow out=outfr sparse;

	  run;

	  %if %upcase(&printtable) = Y %then %do;

	    proc sort data=out ;
	      by descending &outcome predcat1 predcat2;

	    run;

		proc tabulate data=out format=12.0 order=data;
		  class &outcome predcat1c predcat2c;
		  table &outcome,predcat1c all='Total',predcat2c all='Total' ;
		run;

	  %end;

	  proc sort data=outfr;
	    by &outcome predcat1 predcat2;

	  run;

	  data outfr;
	    merge outfr temp2;
	      by &outcome predcat1 predcat2;

	  run;

	  data outfr;
	    set outfr;
	    drop &outcome percent;

	  run;

	  proc transpose data=outfr out=outfr2;

	  run;

	  data outfr2;
	  	if _N_=1 then set outfr_4;
	    array col col1-col10000;
	    set outfr2;
	      if _name_='COUNT';
		nrc=symget('nriskcat');
		istop=2*nrc**2;
	    do i=1 to istop;
	      if col{i}=. then col{i}=0;
		end;
	    n_nonevents=0;
	    n_events=0;
	    n_up_nonevents=0;
	    n_down_nonevents=0;
	    n_up_events=0;
	    n_down_events=0;
	    do i=1 to istop;
	      if i le istop/2 then n_nonevents=n_nonevents+col{i};
		  if i gt istop/2 then n_events=n_events+col{i};

		end;
	    counter=0;
	    do i=1 to nrc;
	      do j= 1 to nrc;
		    counter=counter+1;
	        if j gt i then n_up_nonevents= n_up_nonevents+col{counter};
		    if j lt i then n_down_nonevents= n_down_nonevents+col{counter};
	 	  end;
	    end;
	    counter=0;
	    do i=1 to nrc;
	      do j= 1 to nrc;
		    counter=counter+1;
	        if j gt i then n_up_events= n_up_events+col{istop/2+counter};
		    if j lt i then n_down_events= n_down_events+col{istop/2+counter};
	      end;
	    end;
	    p_up_nonevents=  n_up_nonevents/n_nonevents;
	    p_down_nonevents=  n_down_nonevents/n_nonevents;
	    p_up_events=  n_up_events/n_events;
	    p_down_events=  n_down_events/n_events;
nri = ( p_up_events - p_down_events ) - ( p_up_nonevents - p_down_nonevents ) ;
		nri_case=( p_up_events - p_down_events );
		nri_noncase=(p_down_nonevents  - p_up_nonevents);
nri_c= ( (n_up_e/n_events)-(n_down_e/n_events))+( (n_down_ne/n_nonevents)-(n_up_ne/n_nonevents));
	    *Under hypothesis;
		/*h_se_nri_case= (( p_up_events + p_down_events ) / n_events)**0.5;
h_se_nri_noncase= (( p_up_nonevents + p_down_nonevents ) / n_nonevents)**0.5; h_se_nri = ( (( p_up_events + p_down_events ) / n_events) + (( p_up_nonevents + p_down_nonevents ) / n_nonevents )) **.5 ;
		h_z_nri_case= nri_case / h_se_nri_case;
		h_z_nri_noncase= nri_noncase / h_se_nri_noncase;
		h_z_nri = nri / h_se_nri ;
	    p_h_nri= 2 * ( 1 - probnorm ( abs ( h_z_nri ) ) ) ;*/
		*General;
se_nri_case= (( p_up_events + p_down_events -(p_up_events - p_down_events)**2) / n_events)**0.5; se_nri_noncase= (( p_up_nonevents + p_down_nonevents -(p_up_nonevents - p_down_nonevents)**2) / n_nonevents)**0.5;
		se_nri = (se_nri_case**2 + se_nri_noncase**2)**.5 ;
		z_nri_case= nri_case / se_nri_case;
		z_nri_noncase= nri_noncase / se_nri_noncase;
	    z_nri = nri / se_nri ;
	    p_nri=     2 * ( 1 - probnorm ( abs ( z_nri ) ) ) ;
	    p_nri_case=    ( 1 - probnorm ( abs ( z_nri_case ) ) ) ;
	    p_nri_noncase= ( 1 - probnorm ( abs ( z_nri_noncase ) ) ) ;
		nri_ucl= nri+(1.96*se_nri);
		nri_lcl= nri-(1.96*se_nri);
		nri_case_ucl= nri_case+(1.96*se_nri_case);
		nri_case_lcl= nri_case-(1.96*se_nri_case);
		nri_noncase_ucl= nri_noncase+(1.96*se_nri_noncase);
		nri_noncase_lcl= nri_noncase-(1.96*se_nri_noncase);
	    i=1;

	  run;

	  proc means data=out noprint;
	    class &outcome;
	    var pred1 pred2 diff_pred;
output out=idi n=n n2 n3 mean=m_pred1 m_pred2 m_diff_pred stderr=a b se_diff_pred;

	  run;

	  data idi;
	    set idi;
	    if &outcome ne .;
label n="n" n2="n2" n3="n3" m_pred1="mean_pred1" m_pred2="mean_pred2" a="a" b="b";

	  run;

	  proc sort data=idi;
	    by &outcome;

	  run;

	  data idi2;
	    do i=1 to 2;
	      set idi;
		  if &outcome=0 then do;
		    m_pred1_nonevents=m_pred1;
		    m_pred2_nonevents=m_pred2;
		    se_diff_nonevents=se_diff_pred;
		    n0=n;
		  end;
		  if &outcome=1 then do;
		    m_pred1_events=m_pred1;
		    m_pred2_events=m_pred2;
		    se_diff_events=se_diff_pred;
		    n1=n;
		  end;
		end;

	  run;

	  data idi2;
	    set idi2;
idi=(m_pred2_events-m_pred1_events)-(m_pred2_nonevents-m_pred1_nonevents);
		rel_idi=((m_pred2_events-m_pred2_nonevents)/(m_pred1_events-m_pred1_nonevents))-1;
	    se_idi= ( se_diff_nonevents**2+se_diff_events**2) ** .5;
	    z_idi=idi/se_idi;
	    p_idi= 2 * ( 1 - probnorm ( abs ( z_idi) ) ) ;
	    i=1;

	  run;

	  data nriidi;
merge outfr2(keep=i nri nri_case nri_noncase nri_c se_nri_case se_nri_noncase z_nri_case z_nri_noncase z_nri p_nri p_nri_case p_nri_noncase nri_ucl nri_lcl nri_case_ucl nri_case_lcl nri_noncase_ucl nri_noncase_lcl) idi2(keep=n0 n1 i idi rel_idi se_idi p_idi);
	      by i;
	    drop i;
	    n=n0+n1;
	    label n='n';

	  run;

	  proc print data=nriidi noobs label double;
	    where nri ne .;
var n nri nri_case nri_noncase nri_c z_nri_case z_nri_noncase z_nri p_nri p_nri_case p_nri_noncase nri_ucl nri_lcl nri_case_ucl nri_case_lcl nri_noncase_ucl nri_noncase_lcl idi rel_idi se_idi p_idi;

	  run;

%mend;
 /*Beispielaufruf
%reclassification_phreg(dataset=gdrs, id=ident, outcome=fall, timevar=fup_zeit, model1=alter_basis groesse taille hypertonie , nvar1=4,roc_title1='Model 2', model2=alter_basis groesse taille hypertonie alkohol_3 aktiv_2 rauch_2 rauch_4 wgrainbread coffee redmeat, nvar2=11,roc_title2='Model 2 plus Modifiable Risk Factors',
				  nriskcat=5, riskcutoffs=(0.0088)(0.0237)(0.0630)(0.1621),
				  risk_year=5,  base_muster=0 0 0 0 0 0 0 0 0 0 0 0 , printtable=Y);*/






--
Mario Petretta
Department of Translational Medical Sciences
Naples University Federico II
Italy





*
*   For searches and help try:
*   http://www.stata.com/help.cgi?search
*   http://www.stata.com/support/faqs/resources/statalist-faq/
*   http://www.ats.ucla.edu/stat/stata/


© Copyright 1996–2018 StataCorp LLC   |   Terms of use   |   Privacy   |   Contact us   |   Site index