简易词法分析功能java
(1)读取一个 txt 程序文件(最后的 # 做为结束标志,不可省去)git
{ int a, b; a = 10; if(a>=1){ b = a + 20; } } #
(2)词法识别分析表编程
单词类别 | 单词自身值 | 内部编码 |
---|---|---|
关键字 | int、for、while、do、return、break、continue | 1 |
标识符 | 除关键字外的以字母开头,后跟字母、数字的字符序列 | 2 |
常数 | 无符号整型数 | 3 |
运算符 | +、-、*、/、>、<、=、>=、<=、!= | 4 |
界限符 | ,、;、{、}、(、) | 5 |
换行符 | \n | 6 |
(3)输出结果:数组
(5,{) (6,\n) (1,int) (2,a) (5,,) (2,b) (5,;) (6,\n) (2,a) (4,=) (3,10) (5,;) (6,\n) (2,if) (5,() (2,a) (4,>=) (3,1) (5,)) (5,{) (6,\n) (2,b) (4,=) (2,a) (4,+) (3,20) (5,;) (6,\n) (5,}) (6,\n) (5,}) (6,\n) (0,#)
并保存成新的 txt 文件app
(1)程序文件目录:ide
(2)Word.java 文件:测试
package com.java997.analyzer.lexical; /** * <p> * 表示识别后的词实体类 * * @author XiaoPengwei * @since 2019-06-13 */ public class Word { /** * 种别码 */ private int typeNum; /** * 扫描获得的词 */ private String word; public int getTypeNum() { return typeNum; } public void setTypeNum(int typeNum) { this.typeNum = typeNum; } public String getWord() { return word; } public void setWord(String word) { this.word = word; } }
(3)CodeScanner.java 文件:ui
package com.java997.analyzer.lexical; /** * <p> * 字符扫描 * * @author XiaoPengwei * @since 2019-06-13 */ public class CodeScanner { private static String _KEY_WORD_END = "end string of string"; private int charNum = 0; private Word word; private char[] input = new char[255]; private char[] token = new char[255]; private int p_input = 0; private int p_token = 0; private char ch; /** * 关键字数组 */ private String[] rwtab = {"int", "if", "while", "do", "return", "break", "continue", _KEY_WORD_END}; /** * 逻辑运算数组 */ private String[] logicTab = {"==",">=","<=","!=", _KEY_WORD_END}; public CodeScanner(char[] input) { this.input = input; } /** * 取下一个字符 * * @return */ public char m_getch() { if (p_input < input.length) { ch = input[p_input]; p_input++; } return ch; } /** * 若是是标识符或者空白符就取下一个字符 */ public void getbc() { while ((ch == ' ' || ch == '\t') && p_input < input.length) { ch = input[p_input]; p_input++; } } /** * 把当前字符和原有字符串链接 */ public void concat() { token[p_token] = ch; p_token++; token[p_token] = '\0'; } /** * 回退一个字符 */ public void retract() { p_input--; } /** * 判断是否为字母 * * @return boolean * @author XiaoPengwei */ public boolean isLetter() { return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z'; } /** * 判断是否为数字 * * @return boolean * @author XiaoPengwei */ public boolean isDigit() { return ch >= '0' && ch <= '9'; } /** * 查看 token 中的字符串是不是关键字,是的话返回关键字种别编码,不然返回 2 * * @return */ public int isKey() { int i = 0; while (rwtab[i].compareTo(_KEY_WORD_END) != 0) { if (rwtab[i].compareTo(new String(token).trim()) == 0) { return i + 1; } i++; } return 2; } /** * 多是逻辑预算字符 * * @return */ public Boolean isLogicChar() { return ch == '>' || ch == '<'|| ch == '='|| ch == '!'; } /** * 查看 token 中的字符串是不是逻辑运算符,是的话返回关键字种别编码,不然返回 2 * * @return */ public int isLogicTab() { int i = 0; while (logicTab[i].compareTo(_KEY_WORD_END) != 0) { if (logicTab[i].compareTo(new String(token).trim()) == 0) { return i + 1; } i++; } return 4; } /** * 可以识别换行,单行注释和多行注释的 * 换行的种别码设置成30 * 多行注释的种别码设置成31 * * @return */ public Word scan() { token = new char[255]; Word myWord = new Word(); myWord.setTypeNum(10); myWord.setWord(""); p_token = 0; m_getch(); getbc(); if (isLetter()) { while (isLetter() || isDigit()) { concat(); m_getch(); } retract(); myWord.setTypeNum(isKey()); myWord.setWord(new String(token).trim()); return myWord; } else if (isLogicChar()) { while (isLogicChar()) { concat(); m_getch(); } retract(); myWord.setTypeNum(4); myWord.setWord(new String(token).trim()); return myWord; } else if (isDigit()) { while (isDigit()) { concat(); m_getch(); } retract(); myWord.setTypeNum(3); myWord.setWord(new String(token).trim()); return myWord; } else { switch (ch) { //5 case ',': myWord.setTypeNum(5); myWord.setWord(","); return myWord; case ';': myWord.setTypeNum(5); myWord.setWord(";"); return myWord; case '{': myWord.setTypeNum(5); myWord.setWord("{"); return myWord; case '}': myWord.setTypeNum(5); myWord.setWord("}"); return myWord; case '(': myWord.setTypeNum(5); myWord.setWord("("); return myWord; case ')': myWord.setTypeNum(5); myWord.setWord(")"); return myWord; //4 case '=': myWord.setTypeNum(4); myWord.setWord("="); return myWord; case '+': myWord.setTypeNum(4); myWord.setWord("+"); return myWord; case '-': myWord.setTypeNum(4); myWord.setWord("-"); return myWord; case '*': myWord.setTypeNum(4); myWord.setWord("*"); return myWord; case '/': myWord.setTypeNum(4); myWord.setWord("/"); return myWord; case '\n': myWord.setTypeNum(6); myWord.setWord("\\n"); return myWord; case '#': myWord.setTypeNum(0); myWord.setWord("#"); return myWord; default: concat(); myWord.setTypeNum(-1); myWord.setWord("ERROR INFO: WORD = \"" + new String(token).trim() + "\""); return myWord; } } } }
(4)MainAnalyzer.java 文件:this
package com.java997.analyzer.lexical; import java.io.File; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.Scanner; /** * <p> * 执行主程序 * * @author XiaoPengwei * @since 2019-06-13 */ public class MainAnalyzer { private File inputFile; private File outputFile; private String fileContent; private ArrayList<Word> list = new ArrayList<>(); /** * 构造方法 * * @param input * @param output * @author XiaoPengwei */ public MainAnalyzer(String input, String output) { //实例化输入文件 inputFile = new File(input); //实例化输出文件 outputFile = new File(output); } /** * 从指定的 txt 文件中读取源程序文件内容 * * @return java.lang.String */ public String getContent() { StringBuilder stringBuilder = new StringBuilder(); try (Scanner reader = new Scanner(inputFile)) { while (reader.hasNextLine()) { String line = reader.nextLine(); stringBuilder.append(line + "\n"); System.out.println(line); } System.out.println("Successful reading of files:" + inputFile.getName()); } catch (FileNotFoundException e) { e.printStackTrace(); } return fileContent = stringBuilder.toString(); } /** * 而后扫描程序,在程序结束前将扫描到的词添加到 list 中 * 最后把扫描结果保存到指定的文件中 * * @param fileContent * @return void */ public void analyze(String fileContent) { int over = 1; Word word = new Word(); //调用扫描程序 CodeScanner scanner = new CodeScanner(fileContent.toCharArray()); System.out.println("The result:"); while (over != 0) { word = scanner.scan(); System.out.println("(" + word.getTypeNum() + "," + word.getWord() + ")"); list.add(word); over = word.getTypeNum(); } saveResult(); } /** * 将结果写入到到指定文件中 * 若是文件不存在,则建立一个新的文件 * 用一个 foreach 循环将 list 中的项变成字符串写入到文件中 */ public void saveResult() { //建立文件 if (!outputFile.exists()) { try { outputFile.createNewFile(); } catch (IOException e1) { e1.printStackTrace(); } } //写入文件 try (Writer writer = new FileWriter(outputFile)) { for (Word word : list) { writer.write("(" + word.getTypeNum() + " ," + word.getWord() + ")\n"); } } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { //注意输入文件路径/名称必须对, 输出文件能够由程序建立 MainAnalyzer analyzer = new MainAnalyzer("D:\\analyzer\\src\\main\\java\\com\\java997\\analyzer\\lexical\\input.txt", "D:\\analyzer\\src\\main\\java\\com\\java997\\analyzer\\lexical\\output.txt"); analyzer.analyze(analyzer.getContent()); } }
(5)input.txt 文件:编码
{ int a, b; a = 10; if(a>=1){ b = a + 20; } } #