TINY+Scanner词法分析程序设计

任务:理解 TINY 语言的词法及词法分析器的实现,并基于该词法分析器, 实现拓展语言 TINY+的词法分析器。 
 

要求: git

(1) TINY+词法分析器以 TINY+源代码为输入,输出为识别出的 token 序 列; app

(2) 词法分析器以最长匹配为原则,例如‘:=’应识别为赋值符号而非单独 的‘:’及‘=’; 测试

(3) Token 以(种别码,属性值)表示,包含如下类型的种别码:this

        a) KEY 为关键字; spa

        b) SYM 为系统特殊字符; code

        c) ID 为变量; blog

        d) NUM 为数值常量; token

        e) STR 为字符串常量。 ci

(4) 识别词法错误。词法分析器能够给出词法错误的行号并打印出对应的 出错消息,主要包含如下类型的词法错误: a) 非法字符。即不属于TINY+字母表的字符,好比$就是一个非法字符; b) 字符串匹配错误,好比右部引号丢失,如‘scanner c) 注释的右部括号丢失或匹配错误,如 {this is an example 字符串

核心代码:

typetoken定义集:

globals.h
typedef enum 
    /* book-keeping tokens */
   {ENDFILE,ERROR,
    /* reserved words */
    IF,THEN,ELSE,END,REPEAT,UNTIL,READ,WRITE,TRUE1,FALSE1,OR,AND,NOT,INT,BOOL1,STRING,FLOAT,DOUBLE,DO,WHILE,
    /* multicharacter tokens */
    ID,NUM,STR,
    /* special symbols */
    ASSIGN,EQ,LT,MT,ME,LE,PLUS,MINUS,TIMES,OVER,LPAREN,RPAREN,SEMI,COMMA,UPDOX,PERCENT
   } TokenType;
扫描器:
 
scan.c
/* states in scanner DFA */
typedef enum
   { START,INASSIGN,INCOMMENT,INNUM,INID,DONE,INLE,INME,INUPDOX }
   StateType;
/****************************************/
/* the primary function of the scanner  */
/****************************************/
/* function getToken returns the 
 * next token in source file
 */
TokenType getToken(void)
{  /* index for storing into tokenString */
   int tokenStringIndex = 0;
   /* holds current token to be returned */
   TokenType currentToken;
   /* current state - always begins at START */
   StateType state = START;
   /* flag to indicate save to tokenString */
   int save;
   while (state != DONE)
   {
	int c = getNextChar();
     save = TRUE;
     switch (state)
     { case START:
         if (isdigit(c))
           state = INNUM;
         else if (isalpha(c))
           state = INID;
         else if (c == ':')
           state = INASSIGN;
	 else if (c == '>')
           state = INME;
	 else if (c == '<')
           state = INLE;
         else if ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'))
           save = FALSE;
         else if (c == '{')
         { save = FALSE;
           state = INCOMMENT;
         }
	 else if (c == '\'')
         { save = FALSE;
           state = INUPDOX;
         }
         else
         { state = DONE;
           switch (c)
           { case EOF:
               save = FALSE;
               currentToken = ENDFILE;
               break;
             case '=':
               currentToken = EQ;
               break;
             case '+':
               currentToken = PLUS;
               break;
             case '-':
               currentToken = MINUS;
               break;
             case '*':
               currentToken = TIMES;
               break;
             case '/':
               currentToken = OVER;
               break;
             case '(':
               currentToken = LPAREN;
               break;
             case ')':
               currentToken = RPAREN;
               break;
             case ';':
               currentToken = SEMI;
               break;
	     case ',':
               currentToken = COMMA;
               break;
	     case '%':
               currentToken = PERCENT;
               break;
             default:
               currentToken = ERROR;
               break;
           }
         }
         break;
       case INCOMMENT:
         save = FALSE;
         if (c == EOF)
         { 
	   state = DONE;
           currentToken = ERROR;
	   strcpy(tokenString,"Missing \" } \" !");
	   tokenStringIndex+=15;
         }
         else if (c == '}') 
		state = START;
         break;
       case INUPDOX:
         if (c == '\'') 
	 {
	   save = FALSE;
	   state = DONE;
	   currentToken = STR;
	 }
	 else if (!(linepos < bufsize))
	 {
	   save = FALSE;
	   state = DONE;
	   currentToken = ERROR;
	   strcpy(tokenString,"Missing \" \' \" !");
	   tokenStringIndex+=15;
	 }
         break;
       case INASSIGN:
         state = DONE;
         if (c == '=')
           currentToken = ASSIGN;
         else
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           currentToken = ERROR;
         }
         break;
       case INNUM:
         if (!isdigit(c))
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           state = DONE;
           currentToken = NUM;
         }
         break;
       case INLE:
         if (c=='=')
	 {  state = DONE;
	    currentToken = LE;
	 }
	 else
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           state = DONE;
           currentToken = LT;
         }
         break;
       case INME:
         if (c=='=')
	 {  state = DONE; 
	    currentToken = ME;
	 }
	 else
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           state = DONE;
           currentToken = MT;
         }
         break;
       case INID:
         if (!(isalpha(c)||isdigit(c)))
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           state = DONE;
           currentToken = ID;
         }
         break;
       case DONE:
       default: /* should never happen */
         fprintf(listing,"Scanner Bug: state= %d\n",state);
         state = DONE;
         currentToken = ERROR;
         break;
     }
     if ((save) && (tokenStringIndex <= MAXTOKENLEN))
       tokenString[tokenStringIndex++] = (char) c;
     if (state == DONE)
     { tokenString[tokenStringIndex] = '\0';
       if (currentToken == ID)
         currentToken = reservedLookup(tokenString);
     }
   }
   if (TraceScan) {
     fprintf(listing,"\t%d: ",lineno);
     printToken(currentToken,tokenString);
   }
   return currentToken;
} /* end getToken */


打印分词得到的token:

util.c
/* Procedure printToken prints a token 
 * and its lexeme to the listing file
 */
void printToken( TokenType token, const char* tokenString )
{ switch (token)
  { case IF:
    case THEN:
    case ELSE:
    case END:
    case REPEAT:
    case UNTIL:
    case READ:
    case WRITE:
    case TRUE1:
    case FALSE1:
    case OR:
    case AND:
    case NOT:
    case INT:
    case BOOL1:
    case FLOAT:
    case STRING:
    case DOUBLE:
    case DO:
    case WHILE:
      fprintf(listing,
         "KEY: %s\n",tokenString);
      break;
    case ASSIGN: fprintf(listing,"SYM: :=\n"); break;
    case LT: fprintf(listing,"SYM: <\n"); break;
    case MT: fprintf(listing,"SYM: >\n"); break;
    case LE: fprintf(listing,"SYM: <=\n"); break;
    case ME: fprintf(listing,"SYM: >=\n"); break;
    case EQ: fprintf(listing,"SYM: =\n"); break;
    case COMMA: fprintf(listing,"SYM: ,\n"); break;
    case UPDOX: fprintf(listing,"SYM: \'\n"); break;
    case PERCENT: fprintf(listing,"SYM: %\n"); break;
    case LPAREN: fprintf(listing,"SYM: (\n"); break;
    case RPAREN: fprintf(listing,"SYM: )\n"); break;
    case SEMI: fprintf(listing,"SYM: ;\n"); break;
    case PLUS: fprintf(listing,"SYM: +\n"); break;
    case MINUS: fprintf(listing,"SYM: -\n"); break;
    case TIMES: fprintf(listing,"SYM: *\n"); break;
    case OVER: fprintf(listing,"SYM: /\n"); break;
    case ENDFILE: fprintf(listing,"EOF\n"); break;
    case NUM:
      fprintf(listing,
          "NUM, val= %s\n",tokenString);
      break;
    case ID:
      fprintf(listing,
          "ID, name= %s\n",tokenString);
      break;
    case STR:
      fprintf(listing,
          "STR, val= %s\n",tokenString);
      break;
    case ERROR:
      fprintf(listing,
          "ERROR: %s\n",tokenString);
      break;
    default: /* should never happen */
      fprintf(listing,"Unknown token: %d\n",token);
  }
}

测试与用例:

错误用例: