这是偶然翻出来的一段大学时的代码——词法分析器又称扫描器。词法分析是指将咱们编写的文本代码流解析为一个一个的记号,分析获得的记号以供后续语法分析使用。词法分析器的工做是低级别的分析:将字符或者字符序列转化成记号.。在谈论词法分析时,咱们使用术语“词法记号”(简称记号)、“模式”和“词法单元”表示特定的含义。
在分析时,一是把词法分析器当成语法分析的一部分,另外一种是把词法分析器当成编译程序的独立部分。在前一种状况下,词法分析器不断地被语法分析器调用,每调用一次词法分析器将从源程序的字符序列拼出一个单词,并将其Token值返回给语法分析器。后一种状况则不一样,词法分析器不是被语法分析器不断地调用,而是一次扫描所有单词完成编译器的独立一遍任务。
词法分析器主要特色是不依靠语法,而只依靠词法,即处理一个单词时不依赖于外部单词的信息,所以词法分析器通常都很简单。固然,对某些语言在做词法分析时,在有些状况下不得不往前查看多个字符,有时还要作一些特殊处理,还有一些在词法分析中处理不了的,要留到语法分析中进行处理。 git
本算法主要利用状态转换图生成一个词法分析器,对输入的程序进行词法分析,并将分析获得的单词造表。其中关键字表和界限符表的大小是由高级语言的子集决定的,能够用数组装载;而标识符表和常数表的大小取决于输入的待分析程序中的变量、过程名、常数的个数,因此要用指针组成动态链表来装载。当为未了方便,咱们也把它定义成数组处理。算法
语法分析时,调用词法分析器,根据已知文法利用递归向下分析,检查语法错误。小程序
本程序为类C编译程序,词法分析时能识别各类关键字、标识符、常数、界符,、算符,可以剔除注释内容(支持单行注释及多行注释),语法分析等分析C语言中的一个小程序,程序体中能够进行变量声明、简单赋值语句、带表达式的复杂赋值语句等内容。数组
程序在Microsoft Visual C++ 6.0 中运行经过。ide
/************************************************************************/ /* 使用说明:在程序所在工做目录下创建一个文本文档取名为file.txt,并在其中 输入待分析代码。改程序能分析一简单的C语言程序:头文件、函数。函数体中可包 含变量的定义、声明、赋值语句、条件语句、循环语句等,其中赋值语句中的赋值表 达式能够是任何知足四则混合运算的表达式。 头文件 :#include < stdio.h > #inclide " scanner.c " 变量声明 :int sum ,num1i,num2i,num ; int flag ; bool T ; 赋值语句 :sum = num1i + num2i * (num1i / (num2i + num)) flag = 1 ; T = true ; 条件语句 :if(num <= 30) { …… } if( flag ) { …… } 循环语句 :while( true ){ …… } while( flag ){ …… } while( 1 ){ …… } while( T ){ …… } while( num == 20 ){ …… } for( num = 0; num < 100; num += 2 ){ …… } for( num = 100; num >= 0; num -- ){ …… } 做者 :车水码农 更新时间 :2009.4.24 最后更新 :2009.5.27 更新内容 :增长了条件语句、增量语句,更新了部分函数名,替换部分空语句为reutrn; 最末错误标记代码143 最后更新 :2009.5.28 更新内容 :对代码作了一小部分修改,可有可无 最后更新 :2009.5.29 更新内容 :添加了部分注释,对说明文件作了补充 最后更新 :2009.5.30 更新内容 :添加了返回值语句,错误代码152 */ /************************************************************************/ #include<stdio.h> #include<stdlib.h> #include <ctype.h> #include<string.h> FILE *fp; int Line = 1; int Valuei; float Valuef; char *type; int err_count=0; //标识符表 struct id_Table { char id_Name[15];//函数、变量名 char *id_type;//类型(int void) char *id_Attrib;//属性(函数,变量?) float id_Valuef; int id_Valuei; int id_Beg;//做用域开始标记 int id_End;//做用域结束标记 }ID[500]; //常数表 struct num_Table { char num[10];//常数串 char *num_Type;//数据类型 }NUM[100]; char *token="";//存放词法分析时的单词 char *headerFile[10]; int keyCount = 32;//关键字个数 char *key[] = {"auto","break","case","char","const","continue","default", "do","double","else","enum","extern","float","for", "goto","if","int","long","register","return","short", "signed","static","sizeof","struct","switch","typedef","union", "unsigned","void","volatile","while"};//关键字 int opPoint = 0; int numPoint = 0; int headerPoint = 0; char ch; //判断空格 int space(char ch) { return isspace(ch); } //判断字母 int letter(char ch) { return isalpha(ch); } //判断数字 int digit(char ch) { return isdigit(ch); } //出错处理 void error(int i,int line,char *str) { err_count++; printf(" error:%2d, Line: %2d : %s\n",i,line,str); // printf("\n The source file:%d error(s)\n\n\n",err_count); } //将读入的单个字符组成单词存放于token中 void concatennation(char ch) { int i = strlen(token); token[i] = ch; token[i+1] = '\0'; } //查关键字表 int reserve(char *token) { for(int i=0;i<keyCount;i++) { if(!strcmp(token,key[i])) return i+1;//是保留字返回其编码 } return 0;//不是返回0 } //文件指针回退 void retract(FILE *fp,char ch) { fseek(fp,-1l,SEEK_CUR); ch = ' '; } //建表 int buildlist(char *token,int tag) { int i =0; int position = -1; switch (tag) { case 0: //标识符表 for(i=0;i<opPoint;i++) { if(!strcmp(ID[i].id_Name,token))//同一标识符再次出现 position = i; //返回其表中位置 } if(-1 == position) { strcpy(ID[opPoint].id_Name,token);//不然追加在表尾 position = opPoint; opPoint++; } break; case 1://常数表 strcpy(NUM[numPoint].num,token);//将常数加入常数表中 position = numPoint; numPoint++; break; case 2://头文件 headerFile[headerPoint] = (char *)malloc(15); for(i=0;i<headerPoint;i++) { if(!strcmp(headerFile[i],token))//假如相等 position = i; } strcpy(headerFile[headerPoint],token); position = headerPoint; headerPoint++; break; } return position; } /************************************************************************/ /* 词法分析片断 */ /************************************************************************/ char *scanner(char ch) { token=(char *)malloc(15); *token = NULL; char chNext; /*剔除空白字符*/ while((ch ==' '|| ch == '\t')&&(ch != EOF)) { ch=fgetc(fp); } /************************************************************************/ /* 头文件名、标识符或保留字 */ /************************************************************************/ //合法标识符一字母或下划线打头 if(letter(ch)||(ch=='_')) { while(letter(ch) || digit(ch)||(ch =='_')) { concatennation(ch); ch = fgetc(fp); } chNext = fgetc(fp); //头文件 if ((ch=='.')&&( (chNext=='h')||(chNext=='c') )) { concatennation(ch); concatennation(chNext); buildlist(token,2); return token; } else { retract(fp,chNext); retract(fp,ch); if (reserve(token)) { //保留字 reserve(token); return token; } else {//标识符 buildlist(token,0); return token; } } } /************************************************************************/ /* 常数数字串 */ /************************************************************************/ else if(digit(ch)) { //数字串 int t = 0; while(digit(ch) || (ch == '.')) { // if ('.' == ch) { t ++ ; } concatennation(ch); ch = fgetc(fp); } retract(fp,ch); buildlist(token,1); if (0 == t) { NUM[numPoint-1].num_Type = "int"; } else if (1 == t) { NUM[numPoint-1].num_Type = "float"; } else { error(101,Line,"scanner error : Incrementimal point"); } return token; } /************************************************************************/ /* 运算符'+'系列 */ /************************************************************************/ else if(ch == '+') { chNext = fgetc(fp); if(chNext == '=') { //赋值运算符 return "+="; } else if(chNext == '+') { //自增运算符 return "++"; } else { //双目运算符'+' retract(fp,chNext); return "+"; } } /************************************************************************/ /* 运算符'-'系列 */ /************************************************************************/ else if (ch == '-') { chNext = fgetc(fp); if (ch == '=') { //赋值运算符 return "-="; } else if(chNext == '-') { //自减运算符 return "--"; } else { //双目运算符'-' retract(fp,chNext); return "-"; } } /************************************************************************/ /* 运算符'*'系列 */ /************************************************************************/ else if (ch == '*') { chNext = fgetc(fp); if(chNext == '=') { //赋值运算符 return "*="; } else { retract(fp,chNext);//双目运算符'*' return "*"; } } /************************************************************************/ /* 运算符'/'及注释系列 */ /************************************************************************/ else if (ch == '/') { chNext = fgetc(fp); if(chNext == '=') { //赋值运算符 return "/="; } else if(chNext == '/') { //单行注释 ch = fgetc(fp); while (ch!='\n') { ch = fgetc(fp); } scanner(fgetc(fp)); } else if(chNext == '*') { char chh=' '; ch = fgetc(fp); while((chh!='*')&&(ch!='/')) { //多行注释 chh = ch; ch = fgetc(fp); } scanner(fgetc(fp)); } else { //双目运算符'/' retract(fp,chNext); return "/"; } } /************************************************************************/ /* 赋值运算符、等号 */ /************************************************************************/ else if(ch == '=') { chNext = fgetc(fp); if(chNext == '=') { //等号 return "=="; } else { //赋值号 retract(fp,chNext); return "="; } } /************************************************************************/ /* 小于及小于等于 */ /************************************************************************/ else if (ch == '<') { chNext = fgetc(fp); if(chNext == '=') { return "<="; } else { retract(fp,chNext); return "<"; } } /************************************************************************/ /* 大于及大于等于 */ /************************************************************************/ else if (ch == '>') { chNext = fgetc(fp); if(chNext == '=') { return ">="; } else { retract(fp,chNext); return ">"; } } else if (ch == '%') { return "%"; } /************************************************************************/ /* 界符'('、')'、'{'、'}'、'['、']'、'"'、'\''、','、';'等 */ /************************************************************************/ else if (ch == '(') { return "("; } else if (ch == ')') { return ")"; } else if (ch == '{') { return "{"; } else if (ch == '}') { return "}"; } else if (ch == '[') { return "["; } else if (ch == ']') { return "]"; } else if (ch == ',') { return ","; } else if (ch == ';') { return ";"; } else if (ch == '"') { return "\""; } else if (ch == '\'') { return "'"; } /************************************************************************/ /* 其余字符 */ /************************************************************************/ else if (ch == '\n') { return "\n"; } else if (ch == '#') { return "#"; } else if (ch == '&') { return "&"; } else if (ch == '|') { return "|"; } else if (ch == '^') { return "^"; } else if (ch == '%') { return "%"; } else { error(100,Line,"scanner error : "); //scanner(fgetc(fp)); } return "chwen"; } /************************************************************************/ /* 语法分析部分 */ /************************************************************************/ void var_List(); //调用词法分析器获取一个单词 char *getNext() { char ch; ch = fgetc(fp); while (ch != EOF) { token = scanner(ch);//获取一个词语 if (!strcmp(token,"\n")) { //若为换行符则行数加加 Line++; getNext(); } return token; } return "chwen"; } //变量定义类型 是返回1,不然返回0 int Type(char *lookhead) { if(!strcmp(lookhead,"int")) { type = "int"; return 1; } else if (!strcmp(lookhead,"float")) { type = "float"; return 1; } else { return 0; } } //函数返回值类型 void re_Type(char*lookhead) { if (!strcmp(lookhead,"void")) { type = "void"; } else { Type(lookhead); } } //函数名 void myFunction_Name(char *lookhead) { for (int i=0;i<opPoint;i++) { if (!strcmp(lookhead,ID[i].id_Name)) { ID[i].id_Attrib = "hanshu"; ID[i].id_Beg = Line; ID[i].id_type = type; } } } //变量声明 void id_name() { char *lookhead = getNext(); for (int i=0;i<opPoint;i++) { if (!strcmp(lookhead,ID[i].id_Name)) { if (i == opPoint-1) { ID[i].id_Attrib = "bianliang"; ID[i].id_Beg = Line; ID[i].id_type = type; } } } } //变量名 不合法返回-1 int id_name1(char*lookhead) { int flag = -1; for (int i=0;i<opPoint;i++) { if (!strcmp(lookhead,ID[i].id_Name)) { flag=i; if (ID[i].id_Beg == 0) { error(110,Line,"syntax error :variable unDeclareared identifier"); } } } return flag; } //变量长定义 void var_List1() { char *lookhead = getNext(); if (!strcmp(lookhead,",")) { var_List();// } else if (!strcmp(token,";")) { ; } else { error(111,Line,"syntax error : missing ';'"); } } //变量列表 void var_List() { id_name();// var_List1();// } //变量声明 void Declare(char *lookhead) { Type(lookhead);// var_List(); // } //常数 不是常数返回 -1 int Const(char *lookhead) { int Flag = -1; for (int i=0;i<numPoint;i++) { if (!strcmp(lookhead,NUM[i].num)) { Flag = i; Valuei = atoi(lookhead); } } return Flag; } //运算符号 是返回1 不然返回0 int opChar(char *lookhead) { if (!strcmp(lookhead,"+") || !strcmp(lookhead,"-") || !strcmp(lookhead,"*") || !strcmp(lookhead,"/")) { return 1; } return 0; } void T(char*); //带括号的表达式 void F(char*lookhead) { if (-1 != id_name1(lookhead)) { ; } else if(!strcmp(lookhead,"(")) { T(getNext()); if (strcmp(token,")")) { error(112,Line,"syntax error( : missing ')'"); } } else { error(113,Line,"syntax error( : missing ')'"); } } void T1() { char *lookhead = getNext(); if (opChar(lookhead)) { lookhead = getNext(); F(lookhead); T1();// } else if(!strcmp(lookhead,";")) { ; } else { error(145,Line,"syntax error : missing ';'"); } } //带括号的加减乘除表达式 void T(char*lookhead) { F(lookhead);// T1();// } void Bdshi(char*lookhead) { T(lookhead);// } int test_id(char *lookhead) { int Flag = -1; for (int i=0;i<opPoint;i++) { //该str是否是合法标识符 if (!strcmp(lookhead,ID[i].id_Name) && (ID[i].id_Beg!=0)) { Flag = i; } } return Flag; } //赋值语句 void Fuzhi(char *lookhead,int Flag) { lookhead = getNext(); if (!strcmp(lookhead,"=")) { lookhead = getNext(); if (-1 != Const(lookhead)) { //将常量付给变量 ID[Flag].id_Valuei = Valuei; if(strcmp(getNext(),";")) { error(114,Line,"syntax error : missing ';'"); } } else { Bdshi(lookhead);//表达式付给变量 } } } //条件语句 void Condition() { char * lookhead = getNext(); if(!strcmp(lookhead,"true")||!strcmp(lookhead,"false")||!strcmp(lookhead,"0")||!strcmp(lookhead,"1")) { ; } else if(-1 != id_name1(lookhead)) { lookhead = getNext(); if( (!strcmp(lookhead,"<")) ||(!strcmp(lookhead,"<=")) ||(!strcmp(lookhead,">")) ||(!strcmp(lookhead,">=")) ||(!strcmp(lookhead,"==")) ||(!strcmp(lookhead,"!="))) { lookhead = getNext(); if(-1 == Const(lookhead)) { error(140,Line,"syntax error : ')'"); } } else { retract(fp,ch); } } else { error(141,Line,"syntax error : ')'"); } } //增量语句 void Increment() { char * lookhead = getNext(); if (-1 != id_name1(lookhead)) { lookhead = getNext(); if ((!strcmp(lookhead,"++"))||(!strcmp(lookhead,"--"))) { ; } else if((!strcmp(lookhead,"+=")) ||(!strcmp(lookhead,"-=")) ||(!strcmp(lookhead,"*=")) ||(!strcmp(lookhead,"/="))) { lookhead = getNext(); if(-1 == Const(lookhead)) { error(142,Line,"syntax error : ')'"); } } } else { retract(fp,ch); } } void myFunction(); //if 条件语句 void myIf() { char *lookhead = getNext(); if (!strcmp(lookhead,"(")) { Condition(); if (!strcmp(getNext(),")")) { if (!strcmp(getNext(),"{")) { myFunction(); retract(fp,ch); if (strcmp(getNext(),"}")) { error(116,Line,"fatal error : unexpected end of file found"); } } else { error(135,Line,"syntax error : missing ';' before '}'"); } } else { error(136,Line,"syntax error : missing ')' before '{'"); } } else { error(117,Line,"syntax error : missing ';' after 'if'"); } } //while 循环语句 void myWhile() { char *lookhead = getNext(); if (!strcmp(lookhead,"(")) { Condition(); if (!strcmp(getNext(),")")) { if (!strcmp(getNext(),"{")) { myFunction(); retract(fp,ch); if (strcmp(getNext(),"}")) { //改写 error(116,Line,"fatal error : unexpected end of file found"); } } else { error(137,Line,"syntax error : missing ';' before '}'"); } } else { error(138,Line,"syntax error : missing ')' before '{'"); } } else { error(117,Line,"syntax error : missing ';' after 'while'"); } } //for 循环语句 void myFor() { int i=0; char * lookhead = getNext(); if (!strcmp(lookhead,"(")) { lookhead = getNext(); if (-1 != ( i=test_id(lookhead))) { //初值 Fuzhi(lookhead,i); } else { error(131,Line,"unDeclareared identifier"); } Condition();//条件 lookhead = getNext(); if (!strcmp(lookhead,";")) { Increment();//增量 lookhead = getNext(); if (!strcmp(lookhead,")")) { if (!strcmp(getNext(),"{")) { myFunction(); retract(fp,ch); if (strcmp(getNext(),"}")) { error(116,Line,"fatal error : unexpected end of file found"); } } else { error(134,Line,"syntax error : missing ';' before '}'"); } } else { error(133,Line,"syntax error : missing ')' before '{'"); } } else { error(130,Line,"not ';'"); } } } void myReturn () { char *lookhead = getNext(); if (!strcmp(lookhead,"0")) { lookhead = getNext(); if (strcmp(lookhead,";")) { error(151,Line,"syntax error : missing ';' before '}'"); } } else { error(150,Line,"function should return a value; 'void' return type assumed"); } } //函数体语句 void myFunction1(char *str) { int i = 0; if (Type(str)) { Declare(str); return; } else if (-1 != ( i=test_id(str))) { Fuzhi(str,i); return; } else if (!strcmp(str,"if")) { myIf(); return; } else if (!strcmp(str,"while")) { myWhile(); return; } else if (!strcmp(str,"for")) { myFor(); return; } else if (!strcmp(str,"return")) { myReturn(); return; } else { ; } } //函数体 void myFunction() { char *str = getNext(); while (strcmp(str,"}")) { myFunction1(str); str = getNext(); } } //参数表 void P() { ; } void headFile() { char *lookhead = getNext(); } //编译预处理头文件 void includ(char*lookhead) { if (!strcmp(lookhead,"#")) { if (!strcmp(getNext(),"include")) { if (!strcmp(getNext(),"<") ) { headFile();// if (strcmp(getNext(),">") ) { error(119,Line,"invalid preprocessor command"); } } else { error(120,Line,"invalid preprocessor command"); } } else { error(121,Line,"invalid preprocessor command"); } } else { error(122,Line,"syntax error : missing ';'"); } } //主函数语句 void myMain(char *lookhead) { re_Type(lookhead);//1 lookhead = getNext(); myFunction_Name(lookhead);//2 lookhead = getNext(); if(!strcmp(lookhead,"(")) { P(); if(!strcmp(getNext(),")")) { if(!strcmp( getNext(),"{")) { myFunction();// retract(fp,ch); lookhead = getNext(); if (!strcmp( lookhead,"}")) { ch = fgetc(fp); while (EOF != ch) { //处理结束符‘}’后还有其余字符 if (!(ch==' ')&&!(ch=='\n')&&!(ch=='\t')) { error(143,Line,"syntax error : missing ';' before '}'"); } ch = fgetc(fp); } } else { error(123,Line,"fatal error : unexpected end of file found or missing '}'"); } } else { error(124,Line,"syntax error : missing '{'"); } } else { error(125,Line,"syntax error : missing ')'"); } } else { error(126,Line,"syntax error : missing '('"); } } //一简单程序(头文件、主函数) int S() { char*lookhead = ""; lookhead = getNext(); while (!strcmp(lookhead,"#")) { includ(lookhead); lookhead = getNext(); } myMain(lookhead); return err_count; } void tttt(int t) { for (int i=0;i<t;i++) { printf("\t"); } } //将源程序格式化输出 void print() { fseek(fp,0,0);int tt = 0; ch = fgetc(fp); while(ch != EOF) { if(';'==ch) { putchar(ch); putchar('\n'); tttt(tt); } else if('\n'==ch) { ; } else if ('{'==ch) { tt++; putchar(ch); putchar('\n'); tttt(tt); } else if('}'==ch) { tt--; printf("\b\b\b\b\b\b\b\b"); putchar(ch); putchar('\n'); tttt(tt); } else if('>'==ch) { putchar(ch); putchar('\n'); } else if((' '==ch)||('\t'==ch)) { putchar(' '); while((' '==ch)||('\t'==ch)) { ch = fgetc(fp); } if(EOF!=ch) { retract(fp,ch); } else { break; } } else { putchar(ch); } ch = fgetc(fp); } } void Result() { if (!S()) { printf("\nThe source file: %d error(s)\n",err_count); print(); printf("\n The source file success!\n\n\n"); } else { printf("\n The source file:%d error(s)\n\n\n",err_count); } } /************************************************************************/ /* 主测试函数 */ /************************************************************************/ int main() { if((fp = fopen("003.txt","rt")) == NULL) { printf("cannot open the file!!\n"); exit(0); } Result(); fclose(fp); return 0; }