llvmir some opt

2025-12-10 17:58:17 +08:00
commit 84827838e2
103 changed files with 5838 additions and 0 deletions
--- a/frontend/lexer/Lexer.java
+++ b/frontend/lexer/Lexer.java
@@ -0,0 +1,245 @@
+package frontend.lexer;
+
+import java.util.ArrayList;
+
+import error.Error;
+import error.ErrorType;
+import error.Errors;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.io.IOException;
+
+public class Lexer {
+    private String input;
+    private ArrayList<Token> tokens;
+    private int position;
+    private int line;
+    private char currentChar;
+
+    public Lexer(String input) {
+        this.input = input;
+        this.tokens = new ArrayList<Token>();
+        this.position = 0;
+        this.line = 1;
+    }
+
+    public void lex(Errors errors) {
+        int sigComment = 0;
+        while (this.position < this.input.length()) {
+            currentChar = this.input.charAt(this.position);
+            if (currentChar == ' ' || currentChar == '\t') {
+                this.position++;
+                continue;
+            }
+            if (currentChar == '\n' || currentChar == '\r') {
+                if (currentChar == '\r') {
+                    if (this.position + 1 < this.input.length() &&
+                            this.input.charAt(this.position + 1) == '\n') {
+                        this.position++;
+                    }
+                }
+                this.line++;
+                this.position++;
+                if (sigComment == 1) {
+                    sigComment = 0;
+                }
+                continue;
+            }
+            if (sigComment == 0) {
+                if (currentChar == '/') {
+                    if (this.position + 1 < this.input.length() &&
+                            this.input.charAt(this.position + 1) == '/') {
+                        sigComment = 1;
+                        this.position += 2;
+                        continue;
+                    }
+                    if (this.position + 1 < this.input.length() &&
+                            this.input.charAt(this.position + 1) == '*') {
+                        sigComment = 2;
+                        this.position += 2;
+                        continue;
+                    }
+                }
+                if (this.isIntCons()) {
+                    lexInt();
+                    continue;
+                }
+                if (this.isStrCons()) {
+                    lexStr();
+                    continue;
+                }
+                if (this.isIdenfr()) {
+                    lexIdenfr();
+                    continue;
+                }
+                lexOp(errors);
+            }
+            if (sigComment == 2) {
+                if (this.position + 1 < this.input.length() &&
+                        this.currentChar == '*' && this.input.charAt(this.position + 1) == '/') {
+                    sigComment = 0;
+                    this.position += 2;
+                    continue;
+                }
+            }
+            if (sigComment != 0) {
+                this.position++;
+            }
+        }
+    }
+
+    public boolean isIntCons() {
+        return Character.isDigit(this.currentChar);
+    }
+
+    public boolean isStrCons() {
+        return this.currentChar == '"';
+    }
+
+    public boolean isIdenfr() {
+        return Character.isLowerCase(this.currentChar) || Character.isUpperCase(this.currentChar)
+                || this.currentChar == '_';
+    }
+
+    public boolean isNotWp() {
+        return !Character.isWhitespace(this.currentChar);
+    }
+
+    public boolean isOp() {
+        return !this.isIntCons() && !this.isStrCons() && !this.isIdenfr();
+    }
+
+    public void lexInt() {
+        StringBuilder sb = new StringBuilder();
+        while (this.position < this.input.length() && this.isIntCons()) {
+            sb.append(this.currentChar);
+            this.position++;
+            updateCurrentChar();
+        }
+        this.tokens.add(new Token(sb.toString(), this.line));
+    }
+
+    public void lexStr() {
+        StringBuilder sb = new StringBuilder();
+        sb.append(this.currentChar);
+        this.position++;
+        updateCurrentChar();
+        while (this.position < this.input.length() && this.currentChar != '"') {
+            sb.append(this.currentChar);
+            this.position++;
+            updateCurrentChar();
+        }
+        if (this.position < this.input.length() && this.currentChar == '"') {
+            sb.append(this.currentChar);
+            this.position++;
+        }
+        this.tokens.add(new Token(sb.toString(), this.line));
+    }
+
+    public void lexIdenfr() {
+        StringBuilder sb = new StringBuilder();
+        while (this.position < this.input.length() && (this.isIdenfr() || this.isIntCons())) {
+            sb.append(this.currentChar);
+            this.position++;
+            updateCurrentChar();
+        }
+        this.tokens.add(new Token(sb.toString(), this.line));
+    }
+
+    public void lexOp(Errors errors) {
+        StringBuilder sb = new StringBuilder();
+        if (this.currentChar == '/' && this.position + 1 < this.input.length() &&
+                (this.input.charAt(this.position + 1) == '/' ||
+                        this.input.charAt(this.position + 1) == '*')) {
+            return;
+        }
+        sb.append(this.currentChar);
+        switch (this.currentChar) {
+            case '&':
+                readAnd(sb);
+                break;
+            case '|':
+                readOr(sb);
+                break;
+            case '<':
+                readEq(sb);
+                break;
+            case '>':
+                readEq(sb);
+                break;
+            case '=':
+                readEq(sb);
+                break;
+            case '!':
+                readEq(sb);
+                break;
+            default:
+                break;
+        }
+        this.position++;
+        if (sb.toString().equals("&") || sb.toString().equals("|")) {
+            errors.addError(new Error(this.line, ErrorType.a));
+        }
+        this.tokens.add(new Token(sb.toString(), this.line));
+    }
+
+    public void readAnd(StringBuilder sb) {
+        if (this.position + 1 < this.input.length() &&
+                this.input.charAt(this.position + 1) == '&') {
+            this.position++;
+            currentChar = this.input.charAt(this.position);
+            sb.append(currentChar);
+        }
+    }
+
+    public void readOr(StringBuilder sb) {
+        if (this.position + 1 < this.input.length() &&
+                this.input.charAt(this.position + 1) == '|') {
+            this.position++;
+            currentChar = this.input.charAt(this.position);
+            sb.append(currentChar);
+        }
+    }
+
+    public void readEq(StringBuilder sb) {
+        if (this.position + 1 < this.input.length() &&
+                this.input.charAt(this.position + 1) == '=') {
+            this.position++;
+            currentChar = this.input.charAt(this.position);
+            sb.append(currentChar);
+        }
+    }
+
+    public void updateCurrentChar() {
+        if (this.position < this.input.length()) {
+            currentChar = this.input.charAt(this.position);
+        }
+    }
+
+    public void printTokens() {
+        for (Token token : this.tokens) {
+            token.adjustType();
+            System.out.println(token.getType() + " " + token.getValue() + " " + token.getLine());
+        }
+    }
+
+    public void writeToFile(String fileName) {
+        try {
+            StringBuilder sb = new StringBuilder();
+            for (Token token : this.tokens) {
+                token.adjustType();
+                sb.append(token.toString());
+            }
+            Files.write(Paths.get(fileName), sb.toString().getBytes());
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    public ArrayList<Token> getTokens() {
+        for (Token token : this.tokens) {
+            token.adjustType();
+        }
+        return this.tokens;
+    }
+}
--- a/frontend/lexer/Token.java
+++ b/frontend/lexer/Token.java
@@ -0,0 +1,42 @@
+package frontend.lexer;
+
+public class Token {
+    private TokenType type;
+    private String value;
+    private int line;
+
+    public Token(String value, int line) {
+        this.value = value;
+        this.type = TokenType.isWhatType(value);
+        this.line = line;
+    }
+    
+    public void adjustType() {
+        if (this.type == TokenType.IDENFR) {
+            if (this.value.charAt(0) == '\"' && 
+                this.value.charAt(this.value.length() - 1) == '\"') {
+                this.type = TokenType.STRCON;
+            }
+            String regex = "^\\d+$";
+            if (this.value.matches(regex)) {
+                this.type = TokenType.INTCON;
+            }
+        }
+    }
+
+    public String getValue() {
+        return this.value;
+    }
+
+    public TokenType getType() {
+        return this.type;
+    }
+
+    public int getLine() {
+        return this.line;
+    }
+
+    public String toString() {
+        return this.type + " " + this.value + "\n";
+    }
+}
--- a/frontend/lexer/TokenStream.java
+++ b/frontend/lexer/TokenStream.java
@@ -0,0 +1,42 @@
+package frontend.lexer;
+
+import java.util.ArrayList;
+
+public class TokenStream {
+    private ArrayList<Token> tokens;
+    private int currentIndex;
+
+    public TokenStream(ArrayList<Token> tokens) {
+        this.tokens = tokens;
+    }
+
+    public Token read() {
+        if (currentIndex < tokens.size()) {
+            return tokens.get(currentIndex++);
+        } else {
+            return null;
+        }
+    }
+
+    public void recall() {
+        if (currentIndex > 0) {
+            currentIndex--;
+        }
+    }
+
+    public int getCurrentIndex() {
+        return currentIndex;
+    }
+
+    public void resetIndex(int index) {
+        currentIndex = index;
+    }
+
+    public Token peek(int step) {
+        if (currentIndex + step < tokens.size()) {
+            return tokens.get(currentIndex + step);
+        } else {
+            return null;
+        }
+    }
+}
--- a/frontend/lexer/TokenType.java
+++ b/frontend/lexer/TokenType.java
@@ -0,0 +1,123 @@
+package frontend.lexer;
+
+public enum TokenType {
+    IDENFR,
+    INTCON,
+    STRCON,
+    CONSTTK,
+    INTTK,
+    STATICTK,
+    BREAKTK,
+    CONTINUETK,
+    IFTK,
+    MAINTK,
+    ELSETK,
+    NOT,
+    AND,
+    OR,
+    FORTK,
+    RETURNTK,
+    VOIDTK,
+    PLUS,
+    MINU,
+    PRINTFTK,
+    MULT,
+    DIV,
+    MOD,
+    LSS,
+    LEQ,
+    GRE,
+    GEQ,
+    EQL,
+    NEQ,
+    SEMICN,
+    COMMA,
+    LPARENT,
+    RPARENT,
+    LBRACK,
+    RBRACK,
+    LBRACE,
+    RBRACE,
+    ASSIGN;
+
+    public static TokenType isWhatType(String str) {
+        switch (str) {
+            case "const":
+                return TokenType.CONSTTK;
+            case "int":
+                return TokenType.INTTK;
+            case "static":
+                return TokenType.STATICTK;
+            case "break":
+                return TokenType.BREAKTK;
+            case "continue":
+                return TokenType.CONTINUETK;
+            case "if":
+                return TokenType.IFTK;
+            case "main":
+                return TokenType.MAINTK;
+            case "else":
+                return TokenType.ELSETK;
+            case "!":
+                return TokenType.NOT;
+            case "&&":
+                return TokenType.AND;
+            case "&":
+                return TokenType.AND;
+            case "||":
+                return TokenType.OR;
+            case "|":
+                return TokenType.OR;
+            case "for":
+                return TokenType.FORTK;
+            case "return":
+                return TokenType.RETURNTK;
+            case "void":
+                return TokenType.VOIDTK;
+            case "+":
+                return TokenType.PLUS;
+            case "-":
+                return TokenType.MINU;
+            case "printf":
+                return TokenType.PRINTFTK;
+            case "*":
+                return TokenType.MULT;
+            case "/":
+                return TokenType.DIV;
+            case "%":
+                return TokenType.MOD;
+            case "<":
+                return TokenType.LSS;
+            case "<=":
+                return TokenType.LEQ;
+            case ">":
+                return TokenType.GRE;
+            case ">=":
+                return TokenType.GEQ;
+            case "==":
+                return TokenType.EQL;
+            case "!=":
+                return TokenType.NEQ;
+            case ";":
+                return TokenType.SEMICN;
+            case ",":
+                return TokenType.COMMA;
+            case "(":
+                return TokenType.LPARENT;
+            case ")":
+                return TokenType.RPARENT;
+            case "[":
+                return TokenType.LBRACK;
+            case "]":
+                return TokenType.RBRACK;
+            case "{":
+                return TokenType.LBRACE;
+            case "}":
+                return TokenType.RBRACE;
+            case "=":
+                return TokenType.ASSIGN;
+            default:
+                return TokenType.IDENFR;
+        }
+    }
+}