/* DTASURV.C By Timothy J. Schmidt 9/8/95 Reads Stata-format data sets and summarizes data contained therein. STB-28: dm35 */ #include #include #include // for findfirst #include // for findfirst #include // for getftime #include void strxtrct(char*, char*, char*); char* getinfo(char*); void findvar(char*, char*); int strcompare(const void *, const void *); int obscompare(const void *, const void *); int varcompare(const void *, const void *); int doubcompare(const void *, const void *); int flocompare(const void *, const void *); int longcompare(const void *, const void *); int intcompare(const void *, const void *); int bytecompare(const void *, const void *); int misscompare(const void *, const void *); int bintodec(int, int); char Buffer[81]; void main(int argc, char *argv[]) { int i = 1, j; char *buff, buffarr[200][81], (*buffptr[200])[81]; struct ffblk dosinfo; if (findfirst("*.dta", &dosinfo, FA_NORMAL)) { printf("no .dta files found in current directory\n"); exit(1); } if (argc == 1 || (argc == 3 && !strcmp(argv[1], "!"))) { buff = getinfo(dosinfo.ff_name); buffptr[0] = strcpy(buffarr[0], buff); } else findvar(argv[1], dosinfo.ff_name); for (i = 1; !findnext(&dosinfo); i++) { if (argc == 1 || (argc == 3 && !strcmp(argv[1], "!"))) { buff = getinfo(dosinfo.ff_name); buffptr[i] = strcpy(buffarr[i], buff); } else findvar(argv[1], dosinfo.ff_name); } if (argc != 2) { if (!strcmp(argv[2], "fname")) qsort((void *) buffptr, (size_t) i, (size_t) sizeof(char *), strcompare); else if (!strcmp(argv[2], "obs")) qsort((void *) buffptr, (size_t) i, (size_t) sizeof(char *), obscompare); else if (!strcmp(argv[2], "var")) qsort((void *) buffptr, (size_t) i, (size_t) sizeof(char *), varcompare); else if (!strcmp(argv[2], "double")) qsort((void *) buffptr, (size_t) i, (size_t) sizeof(char *), doubcompare); else if (!strcmp(argv[2], "float")) qsort((void *) buffptr, (size_t) i, (size_t) sizeof(char *), flocompare); else if (!strcmp(argv[2], "long")) qsort((void *) buffptr, (size_t) i, (size_t) sizeof(char *), longcompare); else if (!strcmp(argv[2], "integer")) qsort((void *) buffptr, (size_t) i, (size_t) sizeof(char *), intcompare); else if (!strcmp(argv[2], "byte")) qsort((void *) buffptr, (size_t) i, (size_t) sizeof(char *), bytecompare); else if (!strcmp(argv[2], "miss")) qsort((void *) buffptr, (size_t) i, (size_t) sizeof(char *), misscompare); printf("Data set Date Time Obs Vars Doub Floa Long" " Int Byte Miss\n"); for (j = 0; j < i; j++) printf("%s", buffptr[j]); } system("pause"); } void strxtrct(char* strbeg, char* strend, char* strnew) { while (strbeg <= strend) *strnew++ = *strbeg++; } int strcompare(const void *str1, const void *str2) { return strncmp(*(char**)str1, *(char**)str2, 13); } int obscompare(const void *str1, const void *str2) { char strvar1[81], strvar2[81], num1str[5], num2str[5]; strncpy(strvar1, *(char**)str1, 81); strncpy(strvar2, *(char**)str2, 81); strxtrct(&strvar1[31], &strvar1[35], num1str); strxtrct(&strvar2[31], &strvar2[35], num2str); return (atoi(num1str) > atoi(num2str))? 1 : -1; } int varcompare(const void *str1, const void *str2) { char strvar1[81], strvar2[81], num1str[4], num2str[4]; strncpy(strvar1, *(char**)str1, 81); strncpy(strvar2, *(char**)str2, 81); strxtrct(&strvar1[38], &strvar1[41], num1str); strxtrct(&strvar2[38], &strvar2[41], num2str); return (atoi(num1str) > atoi(num2str))? 1 : -1; } int doubcompare(const void *str1, const void *str2) { char strvar1[81], strvar2[81], num1str[4], num2str[4]; strncpy(strvar1, *(char**)str1, 81); strncpy(strvar2, *(char**)str2, 81); strxtrct(&strvar1[44], &strvar1[47], num1str); strxtrct(&strvar2[44], &strvar2[47], num2str); return (atoi(num1str) > atoi(num2str))? 1 : -1; } int flocompare(const void *str1, const void *str2) { char strvar1[81], strvar2[81], num1str[4], num2str[4]; strncpy(strvar1, *(char**)str1, 81); strncpy(strvar2, *(char**)str2, 81); strxtrct(&strvar1[50], &strvar1[53], num1str); strxtrct(&strvar2[50], &strvar2[53], num2str); return (atoi(num1str) > atoi(num2str))? 1 : -1; } int longcompare(const void *str1, const void *str2) { char strvar1[81], strvar2[81], num1str[4], num2str[4]; strncpy(strvar1, *(char**)str1, 81); strncpy(strvar2, *(char**)str2, 81); strxtrct(&strvar1[56], &strvar1[59], num1str); strxtrct(&strvar2[56], &strvar2[59], num2str); return (atoi(num1str) > atoi(num2str))? 1 : -1; } int intcompare(const void *str1, const void *str2) { char strvar1[81], strvar2[81], num1str[4], num2str[4]; strncpy(strvar1, *(char**)str1, 81); strncpy(strvar2, *(char**)str2, 81); strxtrct(&strvar1[62], &strvar1[65], num1str); strxtrct(&strvar2[62], &strvar2[65], num2str); return (atoi(num1str) > atoi(num2str))? 1 : -1; } int bytecompare(const void *str1, const void *str2) { char strvar1[81], strvar2[81], num1str[4], num2str[4]; strncpy(strvar1, *(char**)str1, 81); strncpy(strvar2, *(char**)str2, 81); strxtrct(&strvar1[68], &strvar1[71], num1str); strxtrct(&strvar2[68], &strvar2[71], num2str); return (atoi(num1str) > atoi(num2str))? 1 : -1; } int misscompare(const void *str1, const void *str2) { char strvar1[81], strvar2[81], num1str[5], num2str[5]; strncpy(strvar1, *(char**)str1, 81); strncpy(strvar2, *(char**)str2, 81); strxtrct(&strvar1[74], &strvar1[78], num1str); strxtrct(&strvar2[74], &strvar2[78], num2str); return (atoi(num1str) > atoi(num2str))? 1 : -1; } int bintodec(int decnum, int factor) { int binnum = 0; if ((decnum/128) > 1) binnum += 128*factor; if (((decnum-128)/64) > 1) binnum += 64*factor; if (((decnum-192)/32) > 1) binnum += 32*factor; if (((decnum-224)/16) > 1) binnum += 16*factor; if (((decnum-240)/8) > 1) binnum += 8*factor; if (((decnum-248)/4) > 1) binnum += 4*factor; if (((decnum-252)/2) > 1) binnum += 2*factor; if (decnum-254) binnum += factor; return binnum; } char* getinfo(char* filename) { char header[42], datalab[32], type, typelist[3000]; FILE *infile; long i, typebyte1, skiplength, missing = 0; int inhandle, numd = 0, numf = 0, numl = 0, numi = 0, numb = 0, numvar, numvar1, numvar2, numobs, numobs1, numobs2, numobs3, numobs4, j, k, skip = 1, len, byte1, byte2, byteorder; struct ftime dtinfo; if ((infile = fopen(filename, "rb")) == NULL) { printf("could not open %s\n", filename); exit(2); } inhandle = fileno(infile); getftime(inhandle, &dtinfo); fread(header, sizeof(char), 42, infile); // Read dta file header strxtrct(&header[10], &header[41], datalab); // Extract Stata data label fseek(infile, 1L, SEEK_SET); byteorder = fgetc(infile); fgetc(infile); fgetc(infile); numvar1 = fgetc(infile); numvar2 = fgetc(infile); numobs1 = fgetc(infile); numobs2 = fgetc(infile); numobs3 = fgetc(infile); numobs4 = fgetc(infile); numobs = (byteorder == 1)? numobs4 : numobs1; if (numobs2) numobs += (byteorder == 1)? numobs2*65536 : numobs2*256; if (numobs3) numobs += (byteorder == 1)? numobs3*256 : numobs3*65536; if (!numvar1) numvar = numvar2; else numvar = numvar1 + (numvar2*256); if (header[0] >= 105) typebyte1 = 60L; // First byte of type list for Stata version 4.0 else typebyte1 = 42L; // First byte of type list for Stata versions < 4.0 for (i = typebyte1; i < (typebyte1+numvar); i++) { // Count variable types fseek(infile, i, SEEK_SET); type = fgetc(infile); if (!isascii(type)) type -= 0xff7f; typelist[(int)(i-typebyte1)] = type; switch (type) { case 'd' : numd++; break; case 'f' : numf++; break; case 'l' : numl++; break; case 'i' : numi++; break; case 'b' : numb++; break; } } skiplength = (header[0] >= 105)? 9*numvar+2*(numvar+1)+12*numvar+9*numvar+32*numvar : 9*numvar+2*(numvar+1)+7*numvar+9*numvar+32*numvar; fseek(infile, skiplength, SEEK_CUR); // Go to first byte of // expansion field or data if (header[0] >= 105) { while (skip) { // Skip over expansion field fgetc(infile); // if version >= 4.0 skip = fgetc(infile); if (len = fgetc(infile)) skip += bintodec(len, 256); fseek(infile, (long) skip, SEEK_CUR); } } for (k = 1; k <= numobs; k++) { for (j = 0; j < numvar; j++) { if (typelist[j] == 'b') { if (fgetc(infile) == 127) missing++; } else if (typelist[j] == 'i') { if (byteorder != 1) { fseek(infile, 1L, SEEK_CUR); if (fgetc(infile) == 127) missing++; } else { if (fgetc(infile) == 127) missing++; fseek(infile, 1L, SEEK_CUR); } } else if (typelist[j] == 'l' || typelist[j] == 'f') { if (byteorder != 1) { fseek(infile, 3L, SEEK_CUR); if (fgetc(infile) == 127) missing++; } else { if (fgetc(infile) == 127) missing++; fseek(infile, 3L, SEEK_CUR); } } else if (typelist[j] == 'd') { if (byteorder != 1) { fseek(infile, 6L, SEEK_CUR); byte1 = fgetc(infile); byte2 = fgetc(infile); if (byte1 == 192 && byte2 == 84) missing++; } else { byte1 = fgetc(infile); byte2 = fgetc(infile); if (byte1 == 84 && byte2 == 192) missing++; fseek(infile, 6L, SEEK_CUR); } } else fseek(infile, (long) typelist[j], SEEK_CUR); } } fclose(infile); sprintf(Buffer, "%-12s %2d-%2.2d-%d %2d:%2.2d %5d %4d %4d %4d %4d" " %4d %4d %5ld\n", filename, dtinfo.ft_month, dtinfo.ft_day, dtinfo.ft_year+80, dtinfo.ft_hour, dtinfo.ft_min, numobs, numvar, numd, numf, numl, numi, numb, missing); return Buffer; } void findvar(char* vn, char* filename) { FILE *infile; int version, numobs1, numobs2, numobs3, numobs4, numobs, numvar1, numvar2, numvar, i, j, k, flag = 0, numd = 0, numf = 0, numl = 0, numi = 0, numb = 0, skip = 1, len, byte1, byte2, byteorder; long offset, skiplength, missing = 0, x; char varstr[9], *varname, typelist[3000], type; varname = vn; if ((infile = fopen(filename, "rb")) == NULL) { printf("could not open %s\n", filename); exit(2); } fseek(infile, 0L, SEEK_SET); version = fgetc(infile); byteorder = fgetc(infile); fseek(infile, 4L, SEEK_SET); numvar1 = fgetc(infile); numvar2 = fgetc(infile); numobs1 = fgetc(infile); numobs2 = fgetc(infile); numobs3 = fgetc(infile); numobs4 = fgetc(infile); numobs = (byteorder == 1)? numobs4 : numobs1; if (numobs2) numobs += (byteorder == 1)? numobs2*65536 : numobs2*256; if (numobs3) numobs += (byteorder == 1)? numobs3*256 : numobs3*65536; if (!numvar1) numvar = numvar2; else numvar = numvar1 + (numvar2*256); offset = (version >= 105)? numvar+60 : numvar+42; fseek(infile, offset, SEEK_SET); for (i = 1; i <= numvar; i++) { fread(varstr, sizeof(char), 9, infile); if (!strcmp(varname, varstr)) { flag = 1; printf("%-12s contains %s : ", filename, varname); break; } } if (flag) { offset = (version >= 105)? 60L : 42L; // First byte of type list for (x = offset; x < (offset+numvar); x++) { // Count variable types fseek(infile, x, SEEK_SET); type = fgetc(infile); if (!isascii(type)) type -= 0xff7f; typelist[(int)(x-offset)] = type; switch (type) { case 'd' : numd++; break; case 'f' : numf++; break; case 'l' : numl++; break; case 'i' : numi++; break; case 'b' : numb++; break; } if ((x-offset) == (i-1)) { switch (type) { case 'd' : printf("double "); break; case 'f' : printf("float "); break; case 'l' : printf("long "); break; case 'i' : printf("int "); break; case 'b' : printf("byte "); break; default : printf("string "); } } } skiplength = (version >= 105)? 9*numvar+2*(numvar+1)+12*numvar+9*numvar+32*numvar : 9*numvar+2*(numvar+1)+7*numvar+9*numvar+32*numvar; fseek(infile, skiplength, SEEK_CUR); // Go to first byte of // expansion field or data if (version >= 105) { while (skip) { // Skip over expansion field fgetc(infile); // if version >= 4.0 skip = fgetc(infile); if (len = fgetc(infile)) skip += bintodec(len, 256); fseek(infile, (long) skip, SEEK_CUR); } } for (k = 1; k <= numobs; k++) { for (j = 0; j < numvar; j++) { if (typelist[j] == 'b') { if (fgetc(infile) == 127 && j == (i-1)) missing++; } else if (typelist[j] == 'i') { if (byteorder != 1) { fseek(infile, 1L, SEEK_CUR); if (fgetc(infile) == 127 && j == (i-1)) missing++; } else { if (fgetc(infile) == 127 && j == (i-1)) missing++; fseek(infile, 1L, SEEK_CUR); } } else if (typelist[j] == 'l' || typelist[j] == 'f') { if (byteorder != 1) { fseek(infile, 3L, SEEK_CUR); if (fgetc(infile) == 127 && j == (i-1)) missing++; } else { if (fgetc(infile) == 127 && j == (i-1)) missing++; fseek(infile, 3L, SEEK_CUR); } } else if (typelist[j] == 'd') { if (byteorder != 1) { fseek(infile, 6L, SEEK_CUR); byte1 = fgetc(infile); byte2 = fgetc(infile); if (byte1 == 192 && byte2 == 84 && j == (i-1)) missing++; } else { byte1 = fgetc(infile); byte2 = fgetc(infile); if (byte1 == 84 && byte2 == 192 && j == (i-1)) missing++; fseek(infile, 6L, SEEK_CUR); } } else fseek(infile, (long) typelist[j], SEEK_CUR); } } printf("%5d observations %5ld missing\n", numobs, missing); } fclose(infile); }