diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 243ea78..974a248 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -3,189 +3,190 @@ Lexer::Lexer(string source): source(source) { } -vector Lexer::getTokens() { - vector tokens; +vector> Lexer::getTokens() { + vector> tokens; + shared_ptr token = nullptr; do { - Token token = nextToken(); + token = nextToken(); + // Got a nullptr, shouldn't have happened + if (!token) { + cerr << "Failed to scan tokens" << endl; + exit(1); + } // Abort scanning if we got an error - if (!token.isValid()) { - cerr << "Unexpected character '" << token.getLexme() << "' at " << token.getLine() << ":" << token.getColumn() << endl; - return vector(); + if (!token->isValid()) { + cerr << "Unexpected character '" << token->getLexme() << "' at " << token->getLine() << ":" << token->getColumn() << endl; + exit(1); } - currentIndex += token.getLexme().length(); - currentColumn += token.getLexme().length(); + currentIndex += token->getLexme().length(); + currentColumn += token->getLexme().length(); - if (token.getKind() == Token::Kind::NEW_LINE) { + if (token->getKind() == Token::Kind::NEW_LINE) { currentLine++; currentColumn = 0; } // filter out multiple new lines - if (tokens.empty() || token.getKind() != Token::Kind::NEW_LINE || tokens.back().getKind() != token.getKind()) + if (tokens.empty() || token->getKind() != Token::Kind::NEW_LINE || tokens.back()->getKind() != token->getKind()) tokens.push_back(token); - } while (tokens.back().getKind() != Token::Kind::END); + } while (token->getKind() != Token::Kind::END); return tokens; } -Token Lexer::nextToken() { +shared_ptr Lexer::nextToken() { while (currentIndex < source.length() && isWhiteSpace(currentIndex)) { currentIndex++; currentColumn++; } - { - Token token = matchEnd(); - if (token.isValid()) - return token; - } + shared_ptr token; - { - Token token = matchSymbol('+', Token::Kind::PLUS); - if (token.isValid()) - return token; - } + // arithmetic + token = match(Token::Kind::PLUS, "+", false); + if (token != nullptr) + return token; + + token = match(Token::Kind::MINUS, "-", false); + if (token != nullptr) + return token; - { - Token token = matchSymbol('-', Token::Kind::MINUS); - if (token.isValid()) - return token; - } + token = match(Token::Kind::STAR, "*", false); + if (token != nullptr) + return token; - { - Token token = matchSymbol('*', Token::Kind::STAR); - if (token.isValid()) - return token; - } + token = match(Token::Kind::SLASH, "/", false); + if (token != nullptr) + return token; - { - Token token = matchSymbol('/', Token::Kind::SLASH); - if (token.isValid()) - return token; - } + token = match(Token::Kind::PERCENT, "%", false); + if (token != nullptr) + return token; + + // logical + token = match(Token::Kind::NOT_EQUAL, "!=", false); + if (token != nullptr) + return token; + + token = match(Token::Kind::EQUAL, "=", false); + if (token != nullptr) + return token; + + token = match(Token::Kind::LESS_EQUAL, "<=", false); + if (token != nullptr) + return token; - { - Token token =matchSymbol('%', Token::Kind::PERCENT); - if (token.isValid()) - return token; - } + token = match(Token::Kind::LESS, "<", false); + if (token != nullptr) + return token; - { - Token token = matchSymbol('(', Token::Kind::LEFT_PAREN); - if (token.isValid()) - return token; - } + token = match(Token::Kind::GREATER_EQUAL, ">=", false); + if (token != nullptr) + return token; - { - Token token = matchSymbol(')', Token::Kind::RIGHT_PAREN); - if (token.isValid()) - return token; - } + token = match(Token::Kind::GREATER, ">", false); + if (token != nullptr) + return token; - { - Token token = matchSymbol(':', Token::Kind::COLON); - if (token.isValid()) - return token; - } + // structural + token = match(Token::Kind::LEFT_PAREN, "(", false); + if (token != nullptr) + return token; - { - Token token = matchSymbol(';', Token::Kind::SEMICOLON); - if (token.isValid()) - return token; - } + token = match(Token::Kind::RIGHT_PAREN, ")", false); + if (token != nullptr) + return token; - { - Token token = matchKeyword("fun", Token::Kind::FUNCTION); - if (token.isValid()) - return token; - } + token = match(Token::Kind::COLON, ":", false); + if (token != nullptr) + return token; - { - Token token = matchKeyword("ret", Token::Kind::RETURN); - if (token.isValid()) - return token; - } + token = match(Token::Kind::SEMICOLON, ";", false); + if (token != nullptr) + return token; - { - Token token = matchInteger(); - if (token.isValid()) - return token; - } + // keywords + token = match(Token::Kind::FUNCTION, "fun", true); + if (token != nullptr) + return token; + + token = match(Token::Kind::RETURN, "ret", true); + if (token != nullptr) + return token; + + // literal + token = matchInteger(); + if (token != nullptr) + return token; - { - Token token = matchIdentifier(); - if (token.isValid()) - return token; - } + // identifier + token = matchIdentifier(); + if (token != nullptr) + return token; - { - Token token = matchNewLine(); - if (token.isValid()) - return token; - } + // new line + token = match(Token::Kind::NEW_LINE, "\r\n", false); + if (token != nullptr) + return token; + + token = match(Token::Kind::NEW_LINE, "\n", false); + if (token != nullptr) + return token; + + // other + token = matchEnd(); + if (token != nullptr) + return token; return matchInvalid(); } -Token Lexer::matchEnd() { - if (currentIndex >= source.length()) - return Token(Token::Kind::END, "", currentLine, currentColumn); - - return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); -} +shared_ptr Lexer::match(Token::Kind kind, string lexme, bool needsSeparator) { + bool isMatching = source.compare(currentIndex, lexme.length(), lexme) == 0; + bool isSeparatorSatisfied = !needsSeparator || isSeparator(currentIndex + lexme.length()); -Token Lexer::matchNewLine() { - if (isNewLine(currentIndex)) - return Token(Token::Kind::NEW_LINE, "\n", currentLine, currentColumn); - - return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); -} - -Token Lexer::matchSymbol(char symbol, Token::Kind kind) { - if (source.at(currentIndex) == symbol) - return Token(kind, string(1, symbol), currentLine, currentColumn); - - return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); -} - -Token Lexer::matchKeyword(string keyword, Token::Kind kind) { - bool isMatching = source.compare(currentIndex, keyword.length(), keyword) == 0; - - if (isMatching && isSeparator(currentIndex + keyword.length())) - return Token(kind, keyword, currentLine, currentColumn); + if (isMatching && isSeparatorSatisfied) + return make_shared(kind, lexme, currentLine, currentColumn); else - return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); + return nullptr; } -Token Lexer::matchInteger() { +shared_ptr Lexer::matchInteger() { int nextIndex = currentIndex; while (nextIndex < source.length() && isDigit(nextIndex)) nextIndex++; if (nextIndex == currentIndex || !isSeparator(nextIndex)) - return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); + return nullptr; string lexme = source.substr(currentIndex, nextIndex - currentIndex); - return Token(Token::Kind::INTEGER, lexme, currentLine, currentColumn); + return make_shared(Token::Kind::INTEGER, lexme, currentLine, currentColumn); } -Token Lexer::matchIdentifier() { +shared_ptr Lexer::matchIdentifier() { int nextIndex = currentIndex; while (nextIndex < source.length() && isIdentifier(nextIndex)) nextIndex++; if (nextIndex == currentIndex || !isSeparator(nextIndex)) - return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); + return nullptr; string lexme = source.substr(currentIndex, nextIndex - currentIndex); - return Token(Token::Kind::IDENTIFIER, lexme, currentLine, currentColumn); + return make_shared(Token::Kind::IDENTIFIER, lexme, currentLine, currentColumn); } -Token Lexer::matchInvalid() { - return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); +shared_ptr Lexer::matchEnd() { + if (currentIndex >= source.length()) + return make_shared(Token::Kind::END, "", currentLine, currentColumn); + + return nullptr; +} + +shared_ptr Lexer::matchInvalid() { + return make_shared(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); } bool Lexer::isWhiteSpace(int index) { @@ -193,11 +194,6 @@ bool Lexer::isWhiteSpace(int index) { return character == ' ' || character == '\t'; } -bool Lexer::isNewLine(int index) { - char character = source.at(index); - return character == '\n'; -} - bool Lexer::isDigit(int index) { char character = source.at(index); return character >= '0' && character <= '9'; @@ -223,12 +219,17 @@ bool Lexer::isSeparator(int index) { case '*': case '/': case '%': + case '=': + case '<': + case '>': case '(': case ')': case ':': + case ';': case ' ': case '\t': case '\n': + case '\r'; return true; default: return false; diff --git a/src/Token.cpp b/src/Token.cpp index b56c462..b8b6948 100644 --- a/src/Token.cpp +++ b/src/Token.cpp @@ -1,6 +1,7 @@ #include "Token.h" -Token::Token(Kind kind, string lexme, int line, int column): kind(kind), lexme(lexme), line(line), column(column) { +Token::Token(Kind kind, string lexme, int line, int column): + kind(kind), lexme(lexme), line(line), column(column) { } Token::Kind Token::getKind() { @@ -44,6 +45,20 @@ string Token::toString() { return "SLASH"; case PERCENT: return "PERCENT"; + + case EQUAL: + return "EQUAL"; + case NOT_EQUAL: + return "NOT_EQUAL"; + case LESS: + return "LESS"; + case LESS_EQUAL: + return "LESS_EQUAL"; + case GREATER: + return "GREATER"; + case GREATER_EQUAL: + return "GREATER_EQUAL"; + case LEFT_PAREN: return "LEFT_PAREN"; case RIGHT_PAREN: @@ -52,14 +67,17 @@ string Token::toString() { return "COLON"; case SEMICOLON: return "SEMICOLON"; + case INTEGER: - return "INTEGER"; + return "INTEGER(" + lexme + ")"; case IDENTIFIER: - return "IDENTIFIER"; + return "IDENTIFIER(" + lexme + ")"; + case FUNCTION: return "FUNCTION"; case RETURN: return "RETURN"; + case NEW_LINE: return "NEW_LINE"; case END: diff --git a/src/Token.h b/src/Token.h index 3206f91..1f32570 100644 --- a/src/Token.h +++ b/src/Token.h @@ -13,18 +13,25 @@ public: STAR, SLASH, PERCENT, + + EQUAL, + NOT_EQUAL, + LESS, + LESS_EQUAL, + GREATER, + GREATER_EQUAL, LEFT_PAREN, RIGHT_PAREN, COLON, SEMICOLON, - INTEGER, - IDENTIFIER, - FUNCTION, RETURN, + INTEGER, + IDENTIFIER, + NEW_LINE, END, diff --git a/src/main.cpp b/src/main.cpp index f3475bb..2b5aeef 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -35,31 +35,30 @@ int main(int argc, char **argv) { std::string source = readFile(std::string(argv[1])); Lexer lexer(source); - std::vector tokens = lexer.getTokens(); - if (tokens.empty()) { - exit(1); + std::vector> tokens = lexer.getTokens(); + for (int i=0; itoString(); + if (i < tokens.size() - 1) + std::cout << " "; } - for (Token &token : tokens) - std::cout << token.toString() << " "; std::cout << std::endl; - Parser parser(tokens); - vector> statements = parser.getStatements(); - if (statements.empty()) { - exit(1); - } - for (shared_ptr &statement : statements) { - cout << statement->toString(); - cout << endl; - } + //Parser parser(tokens); + //vector> statements = parser.getStatements(); + //if (statements.empty()) { + // exit(1); + //} + //for (shared_ptr &statement : statements) { + // cout << statement->toString(); + // cout << endl; + //} - //ModuleBuilder moduleBuilder(expression); - ModuleBuilder moduleBuilder(statements); - shared_ptr module = moduleBuilder.getModule(); - module->print(llvm::outs(), nullptr); + //ModuleBuilder moduleBuilder(statements); + //shared_ptr module = moduleBuilder.getModule(); + //module->print(llvm::outs(), nullptr); - CodeGenerator codeGenerator(module); - codeGenerator.generateObjectFile("dummy.s"); + //CodeGenerator codeGenerator(module); + //codeGenerator.generateObjectFile("dummy.s"); return 0; } \ No newline at end of file