/* * stata2sep.c This program was written by Phil Goldberg to convert * Stata generated information into the standard, comma * separated information which forms the base data configuration * in our system. Stata contains an internal function * called 'outfile'. While this function can output * comma-separated values, it makes them "Nice" by putting * carriage returns at the end of each line. It does not * mark the end of record in any special fashion. This * results in our other programs being unable to deal with * the information properly. To fix this, the following steps * are taken: * 1. Use a .do file/program to write out * the number of variables contained within the data * set. * * s2sep.do: * version 3.0 * capture confirm existence `1' * if (_rc == 0) { * quietly{ * describe * log using StAtA_log * noisily display _result(2) * quietly log close * quietly outfile using StAtA_data * !stata2sep StAtA_log.log StAtA_data.raw > `1' * !rm StAtA_data.raw * !rm StAtA_log.log * } * } * else { * noisily display "usage requires an output file name" * } * * usage: (within stata) * run s2sep outputfile * * limits: * Notice that subsets of variables are not supported. * * 2. If you look at the first '!' command in the .do file, * you'll notice that it invokes this program. Remaining * steps in the .do file are merely for housekeeping. * * 3. Using argv[1] as the log file and argv[2] as the data * file names, open both files. * * 4. Parse out the number of variables contained in the * argv[1] file. * * 5. Read in the argv[2] file, replacing CR's with commas * except for the last in each record. The easiest way * to do this is to use '\n' and ' '(*n) as separators. * Copy from input to output, counting separators as you * go, and replace '\n' with ',' internally. When you are * on the last element, use a '\n' and continue onto the * next line. * * 6. Those of you who are familiar with outfile may recognize * that the "outfile ...., comma" command is not used. The * file outfile writes is separated by runs of blanks, *not* by * commas. The reason for this is that when I tried to use * the comma-separated output (which is smaller than blank- * separated), I lost variables! Some records seemed to have * values missing ... maybe it was too much caffeine. Whatever * the cause, runs of blanks separated values works reliably. * * Dedication: This program is dedicated to the memory of David * Sellars, a victim of Cystic Fibrosis and a dear * friend. If you find this software useful, please * send a donation to the Cystic Fibrosis Foundation. * * The Cystic Fibrosis Foundation * 6931 Arlington Road * Bethesda, MD, 20814 * 800-344-4823 * * */ #include FILE *logfile; FILE *datafile; int numvars; int varcount; main(argc, argv) int argc; char *argv[]; { if (argc < 3) { printf("Usage: %s log_file data_file\n", argv[0]); exit(-1); } if ((logfile = fopen(argv[1], "r")) == NULL) { printf("Couldn't open %s\n", argv[1]); exit(-1); } if ((datafile = fopen(argv[2], "r")) == NULL) { printf("Couldn't open %s\n", argv[2]); exit(-1); } numvars = getcount(); reformat(); } /* main */ getcount() { char c; int count; fscanf(logfile, "%d", &count); fclose(logfile); return(count); } reformat() { char c; char d; int ix; char buffer[1000]; varcount = 1; while (!feof(datafile)) { c = fgetc(datafile); if (c != ' ') /* we skip blanks */ { if (c == '"') /* quoted string */ { ix = 0; while ((!feof(datafile)) && ((c = fgetc(datafile)) != '"')) buffer[ix++] = c; buffer[ix] = '\0'; if (strcmp(buffer, ".") != 0) printf("%s", buffer); varcount++; terminate(); } else /* a token - not quoted */ { if (c != '\n') /* skip blank lines */ { ix = 1; buffer[0] = c; while ((!feof(datafile)) && ((c = fgetc(datafile)) != ' ') && (c != '\n')) buffer[ix++] = c; buffer[ix] = '\0'; /* terminate the string */ if ((buffer[0] != (char) 0xff) && (buffer[0] != '\0')) { if (strcmp(buffer, ".") != 0) /* don't write '.' as missing */ printf("%s", buffer); varcount++; terminate(); } } } } } } /* * * terminate() - outputs a comma if we are not on the last variable on the * line, and outputs a '\n' if we are. */ terminate() { if (varcount <= numvars) printf(","); else { printf("\n"); varcount = 1; } }