要求: git
(1) TINY+词法分析器以 TINY+源代码为输入,输出为识别出的 token 序 列; app
(2) 词法分析器以最长匹配为原则,例如‘:=’应识别为赋值符号而非单独 的‘:’及‘=’; 测试
(3) Token 以(种别码,属性值)表示,包含如下类型的种别码:this
a) KEY 为关键字; spa
b) SYM 为系统特殊字符; code
c) ID 为变量; blog
d) NUM 为数值常量; token
e) STR 为字符串常量。 ci
(4) 识别词法错误。词法分析器能够给出词法错误的行号并打印出对应的 出错消息,主要包含如下类型的词法错误: a) 非法字符。即不属于TINY+字母表的字符,好比$就是一个非法字符; b) 字符串匹配错误,好比右部引号丢失,如‘scanner c) 注释的右部括号丢失或匹配错误,如 {this is an example 字符串
核心代码:
typetoken定义集:
globals.h
typedef enum /* book-keeping tokens */ {ENDFILE,ERROR, /* reserved words */ IF,THEN,ELSE,END,REPEAT,UNTIL,READ,WRITE,TRUE1,FALSE1,OR,AND,NOT,INT,BOOL1,STRING,FLOAT,DOUBLE,DO,WHILE, /* multicharacter tokens */ ID,NUM,STR, /* special symbols */ ASSIGN,EQ,LT,MT,ME,LE,PLUS,MINUS,TIMES,OVER,LPAREN,RPAREN,SEMI,COMMA,UPDOX,PERCENT } TokenType;扫描器:
scan.c
/* states in scanner DFA */ typedef enum { START,INASSIGN,INCOMMENT,INNUM,INID,DONE,INLE,INME,INUPDOX } StateType;
/****************************************/ /* the primary function of the scanner */ /****************************************/ /* function getToken returns the * next token in source file */ TokenType getToken(void) { /* index for storing into tokenString */ int tokenStringIndex = 0; /* holds current token to be returned */ TokenType currentToken; /* current state - always begins at START */ StateType state = START; /* flag to indicate save to tokenString */ int save; while (state != DONE) { int c = getNextChar(); save = TRUE; switch (state) { case START: if (isdigit(c)) state = INNUM; else if (isalpha(c)) state = INID; else if (c == ':') state = INASSIGN; else if (c == '>') state = INME; else if (c == '<') state = INLE; else if ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r')) save = FALSE; else if (c == '{') { save = FALSE; state = INCOMMENT; } else if (c == '\'') { save = FALSE; state = INUPDOX; } else { state = DONE; switch (c) { case EOF: save = FALSE; currentToken = ENDFILE; break; case '=': currentToken = EQ; break; case '+': currentToken = PLUS; break; case '-': currentToken = MINUS; break; case '*': currentToken = TIMES; break; case '/': currentToken = OVER; break; case '(': currentToken = LPAREN; break; case ')': currentToken = RPAREN; break; case ';': currentToken = SEMI; break; case ',': currentToken = COMMA; break; case '%': currentToken = PERCENT; break; default: currentToken = ERROR; break; } } break; case INCOMMENT: save = FALSE; if (c == EOF) { state = DONE; currentToken = ERROR; strcpy(tokenString,"Missing \" } \" !"); tokenStringIndex+=15; } else if (c == '}') state = START; break; case INUPDOX: if (c == '\'') { save = FALSE; state = DONE; currentToken = STR; } else if (!(linepos < bufsize)) { save = FALSE; state = DONE; currentToken = ERROR; strcpy(tokenString,"Missing \" \' \" !"); tokenStringIndex+=15; } break; case INASSIGN: state = DONE; if (c == '=') currentToken = ASSIGN; else { /* backup in the input */ ungetNextChar(); save = FALSE; currentToken = ERROR; } break; case INNUM: if (!isdigit(c)) { /* backup in the input */ ungetNextChar(); save = FALSE; state = DONE; currentToken = NUM; } break; case INLE: if (c=='=') { state = DONE; currentToken = LE; } else { /* backup in the input */ ungetNextChar(); save = FALSE; state = DONE; currentToken = LT; } break; case INME: if (c=='=') { state = DONE; currentToken = ME; } else { /* backup in the input */ ungetNextChar(); save = FALSE; state = DONE; currentToken = MT; } break; case INID: if (!(isalpha(c)||isdigit(c))) { /* backup in the input */ ungetNextChar(); save = FALSE; state = DONE; currentToken = ID; } break; case DONE: default: /* should never happen */ fprintf(listing,"Scanner Bug: state= %d\n",state); state = DONE; currentToken = ERROR; break; } if ((save) && (tokenStringIndex <= MAXTOKENLEN)) tokenString[tokenStringIndex++] = (char) c; if (state == DONE) { tokenString[tokenStringIndex] = '\0'; if (currentToken == ID) currentToken = reservedLookup(tokenString); } } if (TraceScan) { fprintf(listing,"\t%d: ",lineno); printToken(currentToken,tokenString); } return currentToken; } /* end getToken */
打印分词得到的token:
util.c
/* Procedure printToken prints a token * and its lexeme to the listing file */ void printToken( TokenType token, const char* tokenString ) { switch (token) { case IF: case THEN: case ELSE: case END: case REPEAT: case UNTIL: case READ: case WRITE: case TRUE1: case FALSE1: case OR: case AND: case NOT: case INT: case BOOL1: case FLOAT: case STRING: case DOUBLE: case DO: case WHILE: fprintf(listing, "KEY: %s\n",tokenString); break; case ASSIGN: fprintf(listing,"SYM: :=\n"); break; case LT: fprintf(listing,"SYM: <\n"); break; case MT: fprintf(listing,"SYM: >\n"); break; case LE: fprintf(listing,"SYM: <=\n"); break; case ME: fprintf(listing,"SYM: >=\n"); break; case EQ: fprintf(listing,"SYM: =\n"); break; case COMMA: fprintf(listing,"SYM: ,\n"); break; case UPDOX: fprintf(listing,"SYM: \'\n"); break; case PERCENT: fprintf(listing,"SYM: %\n"); break; case LPAREN: fprintf(listing,"SYM: (\n"); break; case RPAREN: fprintf(listing,"SYM: )\n"); break; case SEMI: fprintf(listing,"SYM: ;\n"); break; case PLUS: fprintf(listing,"SYM: +\n"); break; case MINUS: fprintf(listing,"SYM: -\n"); break; case TIMES: fprintf(listing,"SYM: *\n"); break; case OVER: fprintf(listing,"SYM: /\n"); break; case ENDFILE: fprintf(listing,"EOF\n"); break; case NUM: fprintf(listing, "NUM, val= %s\n",tokenString); break; case ID: fprintf(listing, "ID, name= %s\n",tokenString); break; case STR: fprintf(listing, "STR, val= %s\n",tokenString); break; case ERROR: fprintf(listing, "ERROR: %s\n",tokenString); break; default: /* should never happen */ fprintf(listing,"Unknown token: %d\n",token); } }
测试与用例:
错误用例: