/**************************************************************** * Programs for reading raw and stata format datasets * * * * This program is placed in the public domain * * * * Globals used: * * double d[] (export values read from the dataset) * * int debug (debugging output flag) * ************************************ Bill Rogers *** 2/5/91 ****/ #include "compiler.h" #include "ds.h" #include #include #include #include #include extern char outbuf[]; extern int debug; extern int vars; extern int nread; extern Int2 labheads; double d[MAX_DATA_VALUES]; char *rvec[MAX_DATA_VALUES]; char *ridbuf; char *ridp; #define RAW 1 #define STATAIN 2 static int input_type; static FILE *f; static int firstpass=1; static struct raw { double *data[MAX_DATA_VALUES]; } mydata; static short read_number[MAX_DATA_VALUES]; static char *format; static int nmissing=0; static double blackhole; char misssym[12]="M"; int nomissing=1; /* Fix a bad bug in THINK_C strtod */ #if !defined(THINK_C) #define nstrtod strtod #else double nstrtod(s,q) char *s; char **q; { double dd; char *p, *cp, *cnz; dd = strtod(s,q); if (strncmp(s,"0.0",3) && strncmp(s,"-0.0",4) && strncmp(s,".0",2) && strncmp(s,"-.0",3) ) return dd; cnz = cp = (char *)NULL; for (p=s; *p; ++p) { if (*p!='0' && cp!=(char *)NULL && cnz==(char *)NULL) cnz=p; if (*p=='.') cp=p; } return dd/pow(10,cnz-cp-1); } #endif /********************************************************************* * int raw_setup(dsn,form) char *dsn, *form; * * * * Initializes a read of the ASCII dataset "dsn" with the cscanf * * format "form". * * --Does not currently handle table input format * *********************************************************************/ int raw_setup(dsn,form) char *dsn, *form; { int j; input_type = RAW; format = form; f = fopen(dsn,"r"); if (!f) return 3; if (debug) printf("Reading from RAW dataset %s\n", dsn); ridp = ridbuf = (char *) malloc(9*MAX_DATA_VALUES); return 0; } int headread() { int c; char vbuf[128]; char *v; c = ' '; while(1) { for (;;c=fgetc(f)) { if (c==EOF) break; if (c=='\t') continue; if (c==' ') continue; if (c=='\r') return; if (c=='\n') return; break; } v=vbuf; for (;;c=fgetc(f)) { if (c==EOF) break; if (c==' ') break; if (c=='\r') break; if (c=='\n') break; if (c=='\t') break; *(v++) = c; } *v = 0; set_readid(vbuf); } } /********************************************************************* * int rawread() * * * * Reads the next observation from a raw dataset * * NOTE: THERE IS A BAD!! BUG IN THINK-C. DOUBLES OF THE FORM * * 0.0x are promoted to 0.x * *********************************************************************/ static long nobs = 0L; int rawread() { int j; char vbuf[128], *v; int c; if (nobs==0L) fseek(f,0L,0); if (*format) { if (feof(f)) goto iseof; if(fscanf(f,format,mydata)==EOF) goto iseof; } else { retryit: ++nobs; c = ' '; if (labheads && nobs==1L) { while(c!='\n' && c!='\r' && c!=EOF) c=fgetc(f); } for (j=0; j=3) printf("Reading from STATA dataset %s: %s\n", dsn, header.data_label); getbytes((char *)&header.nvar,(char *)&header.nvar,2); getbytes((char *)&header.nobs,(char *)&header.nobs,4); if (header.release > 104) return(7); if (header.filetype!=1) return(7); typlist = (unsigned char *) malloc(header.nvar); varlist = (char *) malloc(9*header.nvar); vposn = (short *) malloc(sizeof(short)*header.nvar); if (!typlist || !varlist || !vposn) return(10); fread(typlist,1,header.nvar,f); fread(varlist,9,header.nvar,f); for (lrecl=i=0; i=3) { printf("@%3d %3d %c(%2x)=%8s\n", i, lrecl, typlist[i],typlist[i], varlist+9*i); } } readposn = ftell(f); readposn += 50*header.nvar+2; return(0); } set_readid(s) char *s; { mydata.data[nread] = &blackhole; strcpy(ridp,s); rvec[nread++]=ridp; if (0) printf("Saving %s in posn %d\n", s, nread-1); ridp += strlen(s) + 1; } lookup_readid(s) char *s; { int i; for (i=0; i=3) printf("#%d %d %s ==> %p\n", nread, j, s, dptr); ++nread; return(0); } } return(1); } if (input_type == RAW) { j = lookup_readid(s); if (0) printf("#%d %s ==> %p\n", j, s, dptr); mydata.data[j] = dptr; if (j > -1) return(0); } return(1); } static int firstread=1; static int lastrecordno=-1; static int isbig=1; static char *mydatabuf; char *datalocate(i) long i; { if (input_type==STATAIN) { if (firstread) { if (LONGREAD) mydatabuf = (char *) malloc(header.nobs*lrecl); if (!LONGREAD || !mydatabuf) { mydatabuf = (char *) malloc(lrecl); if (!mydatabuf) return(NULL); isbig = 0; } else { fseek(f,readposn,0); fread(mydatabuf,lrecl,header.nobs,f); } firstread=0; } if (isbig) return (mydatabuf+lrecl*i); fseek(f,readposn+lrecl*i,0); fread(mydatabuf,1,lrecl,f); return(mydatabuf); } } int stataread() { int i, j; short ival; char cval; long lval; float fval; DOUBLE dval; reread: if (mynobs>=header.nobs) {mynobs=0L; return(1);} databuf = datalocate(mynobs++); if (!databuf) mxerror(12,""); /* allocation error */ for (i=0; i=3) printf("Obs %ld: reading @%15g to %p\n",mynobs,*(mydata.data[i]),mydata.data[i]); } return 0; } void ds_reset() { if (input_type==RAW) nobs = 0L; if (input_type==STATAIN) mynobs = 0L; } int dsread() { if (input_type==RAW) return rawread(); if (input_type==STATAIN) return stataread(); } int missings() { int i; i = nmissing; nmissing = 0; return i; }