/******************************************************************* General lexical analyzer. This lexical analyzer is sufficient to support a language like C. It is designed to work with the unix yacc parser. Note that the #defines for symbol types are required in ytab.h (made by yacc). We need to know (through compiler.h) what some definitions of the variable types are. ****************************************** Bill Rogers *************/ #if defined(THINK_C) #define strtod nstrtod #endif #include #include #include "compiler.h" #include "ytab.h" extern char *ident[]; extern int error_flag; char token[256]; static int curlevel=0; static char infile[5][64]; static int lineno[5] = {1, 0, 0, 0, 0}; static int charcount; static FILE *f; static char *litlist, *litlistend; DOUBLE *dend; static char *dstart; /************************************************************************ * External interface routines * Several routines are needed to interface into the specific application. * int yylex() returns token type to external parser (ytab.c) * ************************************************************************/ /************************************************************************ * Error routines that know about where the error occurred ************************************************************************/ void yyerror(s) char *s; { fprintf(stderr,"ERROR: %s in %s(line %d, col %d)\n", s, infile[curlevel], lineno[curlevel], charcount); error_flag = 99; } void yyerror2(s,s2) char *s; char *s2; { fprintf(stderr,"ERROR: %s %s in %s(line %d, col %d)\n", s, s2, infile[curlevel], lineno[curlevel], charcount); error_flag = 99; } /************************************************************************ * lookup routine (called by yylex below) knows about keywords ************************************************************************/ #if !defined(BAD_ID) #define BAD_ID (-1) #endif int CurrentID=BAD_ID; int numident=0; #define MAX_IDENT 256 short *token_val; int LookupID(s) char *s; { char *p; for (CurrentID=0; CurrentID= dstart; (--CurrentID),--p) { if (*p == d) return CurrentID; } dstart -= (sizeof(DOUBLE)); if (dstart'7') {*p=c; return;} /* Non-Number */ *p = c-'0'; while (1) { nextchar = c = fgetc(f); if (c==EOF) return; if (c>='0' && c<='7') *p = 8 * (*p) + c; else return; } } } /* findstr copyies a string from f to token, and returns its length * The string must exist on a single line. Returns -1 if string is * unterminated. */ static int findstr(endquote) char endquote; { char *p; int c; p=token; while(1) { if (!nextchar) { nextchar = fgetc(f); ++charcount; } c = nextchar; nextchar = 0; if (c == ESCAPECHAR) { cvtchar(p++); continue; } if (c=='\n') {++lineno[curlevel]; charcount=1;} if (endquote==' ' && (c=='\t' || c=='\n')) c = endquote; if (c == endquote) { *p=0; return(p-token); } if (c == '\n' || c == EOF) return (-1); else *(p++) = c; } } static void check_name(i,s) int i; char *s; { if (i<0) { yyerror(s); while(nextchar!='\n' && nextchar!=EOF) nextchar = fgetc(f); ++lineno[curlevel]; charcount=1; nextchar = 0; } } /* mygetc() gets the next character of text to nextchar. Throws away * end-of-line, and processes directives. consumes nextchar. */ static int mygetc() { int i; if (lastchar) {nextchar=lastchar; lastchar=0; } else { nextchar = fgetc(f); ++charcount; } if (nextchar=='\n') { ++lineno[curlevel]; sol=1; charcount=1; nextchar=0; return(nextchar); } while (sol && nextchar==PREPROCESSOR_CH) { nextchar=0; check_name(findstr(' '),"No preprocessor directive"); if (!strcmp(token,"include")) { check_name(findstr('\"'),"No include name"); inf[curlevel++] = f; lineno[curlevel] = 1; strcpy(infile[curlevel],token); f = fopen(token,"r"); if (!f) { f = inf[--curlevel]; yyerror2("Unable to open include file",infile[curlevel+1]); } } else { yyerror2("Misspelled preprocessor directive",token); while (1) { nextchar=fgetc(f); if (nextchar=='\n') break; if (nextchar==EOF) break; } } mygetc(); } if (nextchar==EOF) { if (ferror(f)) { yyerror("error in file"); if (curlevel) --curlevel; else return(-2); f = inf[curlevel]; } if (feof(f)) { if (curlevel) --curlevel; else return(EOF); f = inf[curlevel]; } } sol=0; return 0; } static mygetcp(pp) char **pp; { *(++(*pp)) = nextchar; (*pp)[1] = 0; mygetc(); } #define INTEGER 1513 #define LONGNUM 1514 #define FLOATING 1515 #define HEXADEC 1516 /* The purpose of this is to convey the next token */ /* The following code is useful when debugging the parser */ yylex() { int i; i = yylex1(); if (yydebug) printf("The return from yylex1 is %d, token='%s'\n",i, token); return i; } yylex1() { /* End of debugging code */ char *p, *q; int comment, i; for (;;) { while (!nextchar || nextchar==' ' || nextchar=='\t') mygetc(); if (nextchar==EOF || nextchar<0) return LEXEOF; /* comments */ if (nextchar=='/') { ++charcount; if ((nextchar=fgetc(f))=='*') { /* its a comment */ nextchar=fgetc(f); ++charcount; for (comment=1; comment; ) { if (nextchar==EOF) { yyerror("Unterminated comment"); comment=0; } if (nextchar=='*') { ++charcount; if ((nextchar=fgetc(f))=='/') comment=0; else continue; } mygetc(); } continue; } else { lastchar = nextchar; /* need to put one back */ nextchar = '/'; } } /* indentifiers and key words. Start with alpha or _ */ if (isalpha(nextchar) || nextchar=='_') { p = token; while (isalnum(nextchar) || nextchar=='_') { *(p++)=nextchar; mygetc(); } *p = 0; i = LookupID(token); yylval = (YYSTYPE) i; return token_val[i]; } /* potential numbers, signs, and periods */ if (isdigit(nextchar) || nextchar=='.') { p=token; *(p++)=nextchar; mygetc(); /* take care of oddities */ if (!isdigit(*token) && !isdigit(nextchar)) { if (*token=='.') {token[1]=0; return ('.');} } while (isdigit(nextchar) || nextchar=='.' || nextchar=='x' || nextchar=='e') { *(p++) = nextchar; mygetc(); if (*(p-1)=='e' && nextchar=='-') { *(p++) = nextchar; mygetc(); } } *p=0; lextype = INTEGER; if (token[1]=='x') lextype = HEXADEC; if (nextchar=='L') {islong = 1; mygetc();} if (strchr(token,'.')) lextype = FLOATING; if (strchr(token,'e')) lextype = FLOATING; if (lextype==FLOATING) dval = strtod(token,&q); if (lextype==INTEGER) dval = longval = strtol(token,&q,10); if (lextype==HEXADEC) { longval = strtol(token,&q,16); lextype=INTEGER; } if ( lextype==INTEGER && !islong && (longval<-32768 || longval>32767)) islong=1; intval=longval; floatval=dval; yylval = LookupNum(dval); return CONSTANT; } /* strings */ if (nextchar==quotechar) { nextchar=0; stringlen=findstr(quotechar); yylval = (YYSTYPE) LookupID(token); return STRING; } if (nextchar=='\'') { nextchar=0; stringlen=findstr('\''); if (stringlen>1) yyerror("Bad character definition"); longval = intval = token[0]; return CONSTANT; } /* lots of other stuff, one character each */ p = token; *token=nextchar; if (*token==';') { token[1] = nextchar = 0; return (';'); } mygetc(); if (*token == '=') { if (nextchar == '=') {mygetcp(&p); return EQL;} } else if (*token == '<') { if (nextchar == '=') {mygetcp(&p); return LESSEQ;} if (nextchar == '<') { mygetcp(&p); if (nextchar == '=') {mygetcp(&p); return LSHIFTEQ; } else return LSHIFT; } } else if (*token == '>') { if (nextchar == '=') {mygetcp(&p); return GRTEQ;} if (nextchar == '>') { mygetcp(&p); if (nextchar == '=') {mygetcp(&p); return RSHIFTEQ;} else return RSHIFT; } } else if (*token == '-') { if (nextchar == '-') {mygetcp(&p); return DEC;} if (nextchar == '=') {mygetcp(&p); return MINUSEQ;} if (nextchar == '>') {mygetcp(&p); return PTR;} } else if (*token == '+') { if (nextchar == '+') {mygetcp(&p); return INC;} if (nextchar == '=') {mygetcp(&p); return PLUSEQ;} } else if (*token == '!') { if (nextchar == '=') {mygetcp(&p); return NOTEQ;} } else if (*token == '/') { if (nextchar == '=') {mygetcp(&p); return DIVEQ;} } else if (*token == '*') { if (nextchar == '=') {mygetcp(&p); return TIMESEQ;} } else if (*token == '%') { if (nextchar == '=') {mygetcp(&p); return MODEQ;} } token[1]=0; return token[0]; /* default for most above ifs */ } }