llvmir some opt

This commit is contained in:
邓智航
2025-12-10 17:58:17 +08:00
commit 84827838e2
103 changed files with 5838 additions and 0 deletions

245
frontend/lexer/Lexer.java Normal file
View File

@@ -0,0 +1,245 @@
package frontend.lexer;
import java.util.ArrayList;
import error.Error;
import error.ErrorType;
import error.Errors;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.io.IOException;
public class Lexer {
private String input;
private ArrayList<Token> tokens;
private int position;
private int line;
private char currentChar;
public Lexer(String input) {
this.input = input;
this.tokens = new ArrayList<Token>();
this.position = 0;
this.line = 1;
}
public void lex(Errors errors) {
int sigComment = 0;
while (this.position < this.input.length()) {
currentChar = this.input.charAt(this.position);
if (currentChar == ' ' || currentChar == '\t') {
this.position++;
continue;
}
if (currentChar == '\n' || currentChar == '\r') {
if (currentChar == '\r') {
if (this.position + 1 < this.input.length() &&
this.input.charAt(this.position + 1) == '\n') {
this.position++;
}
}
this.line++;
this.position++;
if (sigComment == 1) {
sigComment = 0;
}
continue;
}
if (sigComment == 0) {
if (currentChar == '/') {
if (this.position + 1 < this.input.length() &&
this.input.charAt(this.position + 1) == '/') {
sigComment = 1;
this.position += 2;
continue;
}
if (this.position + 1 < this.input.length() &&
this.input.charAt(this.position + 1) == '*') {
sigComment = 2;
this.position += 2;
continue;
}
}
if (this.isIntCons()) {
lexInt();
continue;
}
if (this.isStrCons()) {
lexStr();
continue;
}
if (this.isIdenfr()) {
lexIdenfr();
continue;
}
lexOp(errors);
}
if (sigComment == 2) {
if (this.position + 1 < this.input.length() &&
this.currentChar == '*' && this.input.charAt(this.position + 1) == '/') {
sigComment = 0;
this.position += 2;
continue;
}
}
if (sigComment != 0) {
this.position++;
}
}
}
public boolean isIntCons() {
return Character.isDigit(this.currentChar);
}
public boolean isStrCons() {
return this.currentChar == '"';
}
public boolean isIdenfr() {
return Character.isLowerCase(this.currentChar) || Character.isUpperCase(this.currentChar)
|| this.currentChar == '_';
}
public boolean isNotWp() {
return !Character.isWhitespace(this.currentChar);
}
public boolean isOp() {
return !this.isIntCons() && !this.isStrCons() && !this.isIdenfr();
}
public void lexInt() {
StringBuilder sb = new StringBuilder();
while (this.position < this.input.length() && this.isIntCons()) {
sb.append(this.currentChar);
this.position++;
updateCurrentChar();
}
this.tokens.add(new Token(sb.toString(), this.line));
}
public void lexStr() {
StringBuilder sb = new StringBuilder();
sb.append(this.currentChar);
this.position++;
updateCurrentChar();
while (this.position < this.input.length() && this.currentChar != '"') {
sb.append(this.currentChar);
this.position++;
updateCurrentChar();
}
if (this.position < this.input.length() && this.currentChar == '"') {
sb.append(this.currentChar);
this.position++;
}
this.tokens.add(new Token(sb.toString(), this.line));
}
public void lexIdenfr() {
StringBuilder sb = new StringBuilder();
while (this.position < this.input.length() && (this.isIdenfr() || this.isIntCons())) {
sb.append(this.currentChar);
this.position++;
updateCurrentChar();
}
this.tokens.add(new Token(sb.toString(), this.line));
}
public void lexOp(Errors errors) {
StringBuilder sb = new StringBuilder();
if (this.currentChar == '/' && this.position + 1 < this.input.length() &&
(this.input.charAt(this.position + 1) == '/' ||
this.input.charAt(this.position + 1) == '*')) {
return;
}
sb.append(this.currentChar);
switch (this.currentChar) {
case '&':
readAnd(sb);
break;
case '|':
readOr(sb);
break;
case '<':
readEq(sb);
break;
case '>':
readEq(sb);
break;
case '=':
readEq(sb);
break;
case '!':
readEq(sb);
break;
default:
break;
}
this.position++;
if (sb.toString().equals("&") || sb.toString().equals("|")) {
errors.addError(new Error(this.line, ErrorType.a));
}
this.tokens.add(new Token(sb.toString(), this.line));
}
public void readAnd(StringBuilder sb) {
if (this.position + 1 < this.input.length() &&
this.input.charAt(this.position + 1) == '&') {
this.position++;
currentChar = this.input.charAt(this.position);
sb.append(currentChar);
}
}
public void readOr(StringBuilder sb) {
if (this.position + 1 < this.input.length() &&
this.input.charAt(this.position + 1) == '|') {
this.position++;
currentChar = this.input.charAt(this.position);
sb.append(currentChar);
}
}
public void readEq(StringBuilder sb) {
if (this.position + 1 < this.input.length() &&
this.input.charAt(this.position + 1) == '=') {
this.position++;
currentChar = this.input.charAt(this.position);
sb.append(currentChar);
}
}
public void updateCurrentChar() {
if (this.position < this.input.length()) {
currentChar = this.input.charAt(this.position);
}
}
public void printTokens() {
for (Token token : this.tokens) {
token.adjustType();
System.out.println(token.getType() + " " + token.getValue() + " " + token.getLine());
}
}
public void writeToFile(String fileName) {
try {
StringBuilder sb = new StringBuilder();
for (Token token : this.tokens) {
token.adjustType();
sb.append(token.toString());
}
Files.write(Paths.get(fileName), sb.toString().getBytes());
} catch (IOException e) {
e.printStackTrace();
}
}
public ArrayList<Token> getTokens() {
for (Token token : this.tokens) {
token.adjustType();
}
return this.tokens;
}
}

42
frontend/lexer/Token.java Normal file
View File

@@ -0,0 +1,42 @@
package frontend.lexer;
public class Token {
private TokenType type;
private String value;
private int line;
public Token(String value, int line) {
this.value = value;
this.type = TokenType.isWhatType(value);
this.line = line;
}
public void adjustType() {
if (this.type == TokenType.IDENFR) {
if (this.value.charAt(0) == '\"' &&
this.value.charAt(this.value.length() - 1) == '\"') {
this.type = TokenType.STRCON;
}
String regex = "^\\d+$";
if (this.value.matches(regex)) {
this.type = TokenType.INTCON;
}
}
}
public String getValue() {
return this.value;
}
public TokenType getType() {
return this.type;
}
public int getLine() {
return this.line;
}
public String toString() {
return this.type + " " + this.value + "\n";
}
}

View File

@@ -0,0 +1,42 @@
package frontend.lexer;
import java.util.ArrayList;
public class TokenStream {
private ArrayList<Token> tokens;
private int currentIndex;
public TokenStream(ArrayList<Token> tokens) {
this.tokens = tokens;
}
public Token read() {
if (currentIndex < tokens.size()) {
return tokens.get(currentIndex++);
} else {
return null;
}
}
public void recall() {
if (currentIndex > 0) {
currentIndex--;
}
}
public int getCurrentIndex() {
return currentIndex;
}
public void resetIndex(int index) {
currentIndex = index;
}
public Token peek(int step) {
if (currentIndex + step < tokens.size()) {
return tokens.get(currentIndex + step);
} else {
return null;
}
}
}

View File

@@ -0,0 +1,123 @@
package frontend.lexer;
public enum TokenType {
IDENFR,
INTCON,
STRCON,
CONSTTK,
INTTK,
STATICTK,
BREAKTK,
CONTINUETK,
IFTK,
MAINTK,
ELSETK,
NOT,
AND,
OR,
FORTK,
RETURNTK,
VOIDTK,
PLUS,
MINU,
PRINTFTK,
MULT,
DIV,
MOD,
LSS,
LEQ,
GRE,
GEQ,
EQL,
NEQ,
SEMICN,
COMMA,
LPARENT,
RPARENT,
LBRACK,
RBRACK,
LBRACE,
RBRACE,
ASSIGN;
public static TokenType isWhatType(String str) {
switch (str) {
case "const":
return TokenType.CONSTTK;
case "int":
return TokenType.INTTK;
case "static":
return TokenType.STATICTK;
case "break":
return TokenType.BREAKTK;
case "continue":
return TokenType.CONTINUETK;
case "if":
return TokenType.IFTK;
case "main":
return TokenType.MAINTK;
case "else":
return TokenType.ELSETK;
case "!":
return TokenType.NOT;
case "&&":
return TokenType.AND;
case "&":
return TokenType.AND;
case "||":
return TokenType.OR;
case "|":
return TokenType.OR;
case "for":
return TokenType.FORTK;
case "return":
return TokenType.RETURNTK;
case "void":
return TokenType.VOIDTK;
case "+":
return TokenType.PLUS;
case "-":
return TokenType.MINU;
case "printf":
return TokenType.PRINTFTK;
case "*":
return TokenType.MULT;
case "/":
return TokenType.DIV;
case "%":
return TokenType.MOD;
case "<":
return TokenType.LSS;
case "<=":
return TokenType.LEQ;
case ">":
return TokenType.GRE;
case ">=":
return TokenType.GEQ;
case "==":
return TokenType.EQL;
case "!=":
return TokenType.NEQ;
case ";":
return TokenType.SEMICN;
case ",":
return TokenType.COMMA;
case "(":
return TokenType.LPARENT;
case ")":
return TokenType.RPARENT;
case "[":
return TokenType.LBRACK;
case "]":
return TokenType.RBRACK;
case "{":
return TokenType.LBRACE;
case "}":
return TokenType.RBRACE;
case "=":
return TokenType.ASSIGN;
default:
return TokenType.IDENFR;
}
}
}