From 61e648e55b6b4e25658b7981ab38f83267589190 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Sat, 31 May 2025 23:24:21 +0900 Subject: [PATCH] Better error reporting --- src/Expression.cpp | 25 +++---- src/Expression.h | 7 +- src/Lexer.cpp | 159 ++++++++++++++++++++++++++++++--------------- src/Lexer.h | 13 ++-- src/Parser.cpp | 45 ++++++++----- src/Parser.h | 2 +- src/Token.cpp | 24 ++++--- src/Token.h | 25 ++++--- src/main.cpp | 6 ++ 9 files changed, 198 insertions(+), 108 deletions(-) diff --git a/src/Expression.cpp b/src/Expression.cpp index 5895b7b..4eadd62 100644 --- a/src/Expression.cpp +++ b/src/Expression.cpp @@ -1,14 +1,13 @@ #include "Expression.h" -std::shared_ptr Expression::Invalid = std::make_shared(Expression::Kind::INVALID, Token::Invalid, nullptr, nullptr); - -Expression::Expression(Kind kind, Token token, shared_ptr left, shared_ptr right) { +Expression::Expression(Kind kind, Token token, shared_ptr left, shared_ptr right): token(token) { switch (kind) { case LITERAL: setupLiteral(token); break; case GROUPING: setupGrouping(token, left); + break; case BINARY: setupBinary(token, left, right); break; @@ -18,7 +17,7 @@ Expression::Expression(Kind kind, Token token, shared_ptr left, shar } void Expression::setupLiteral(Token token) { - bool isKindValid = token.isOneOf({Token::Kind::INTEGER}); + bool isKindValid = token.isOfKind({Token::Kind::INTEGER}); if (!isKindValid) return; @@ -38,7 +37,7 @@ void Expression::setupGrouping(Token token, shared_ptr expression) { } void Expression::setupBinary(Token token, shared_ptr left, shared_ptr right) { - bool isKindValid = token.isOneOf({Token::Kind::PLUS, Token::Kind::MINUS, Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT}); + bool isKindValid = token.isOfKind({Token::Kind::PLUS, Token::Kind::MINUS, Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT}); bool isLeftValid = left != nullptr && left->getKind() != Kind::INVALID; bool isRightValid = right != nullptr && right->getKind() != Kind::INVALID; @@ -65,6 +64,8 @@ void Expression::setupBinary(Token token, shared_ptr left, shared_pt break; case Token::Kind::INVALID: break; + default: + exit(1); } this->left = left; @@ -75,6 +76,10 @@ Expression::Kind Expression::getKind() { return kind; } +Token Expression::getToken() { + return token; +} + int64_t Expression::getInteger() { return integer; } @@ -91,15 +96,11 @@ shared_ptr Expression::getRight() { return right; } -bool Expression::operator==(Expression const& other) { - return kind == other.kind; +bool Expression::isValid() { + return kind != Expression::Kind::INVALID; } -bool Expression::operator!=(Expression const& other) { - return kind != other.kind; -} - -std::string Expression::toString() { +string Expression::toString() { switch (kind) { case LITERAL: return to_string(integer); diff --git a/src/Expression.h b/src/Expression.h index d93ecc3..cde03aa 100644 --- a/src/Expression.h +++ b/src/Expression.h @@ -25,6 +25,7 @@ public: private: Kind kind = INVALID; + Token token; int64_t integer = 0; Operator operation = NONE; shared_ptr left = nullptr; @@ -37,15 +38,13 @@ private: public: Expression(Kind kind, Token token, shared_ptr left, shared_ptr right); Kind getKind(); + Token getToken(); int64_t getInteger(); Operator getOperator(); shared_ptr getLeft(); shared_ptr getRight(); - bool operator==(Expression const& other); - bool operator!=(Expression const& other); + bool isValid(); string toString(); - - static shared_ptr Invalid; }; #endif \ No newline at end of file diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 16b5150..a6a68c8 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -1,92 +1,140 @@ #include "Lexer.h" -Lexer::Lexer(std::string source) : source(source) { +Lexer::Lexer(string source): source(source) { } -std::vector Lexer::getTokens() { - std::vector tokens; +vector Lexer::getTokens() { + vector tokens; do { Token token = nextToken(); - currentIndex += token.getLexme().length(); - if (token.getKind() == Token::Kind::NEW_LINE) + // Abort scanning if we got an error + if (token.getKind() == Token::Kind::INVALID) { + cerr << "Unexpected character '" << token.getLexme() << "' at " << token.getLine() << ":" << token.getColumn() << endl; + return vector(); + } + + currentIndex += token.getLexme().length(); + currentColumn += token.getLexme().length(); + + if (token.getKind() == Token::Kind::NEW_LINE) { currentLine++; + currentColumn = 0; + } // filter out multiple new lines - if (tokens.empty() || token.getKind() != Token::Kind::NEW_LINE || tokens.back() != token) + if (tokens.empty() || token.getKind() != Token::Kind::NEW_LINE || tokens.back().getKind() != token.getKind()) tokens.push_back(token); } while (tokens.back().getKind() != Token::Kind::END); return tokens; } Token Lexer::nextToken() { - Token token = Token::Invalid; - - while (currentIndex < source.length() && isWhiteSpace(currentIndex)) + while (currentIndex < source.length() && isWhiteSpace(currentIndex)) { currentIndex++; + currentColumn++; + } - do { - if ((token = matchEnd()) != Token::Invalid) - break; - - if ((token = matchSymbol('+', Token::Kind::PLUS)) != Token::Invalid) - break; - - if ((token = matchSymbol('-', Token::Kind::MINUS)) != Token::Invalid) - break; + { + Token token = matchEnd(); + if (token.isValid()) + return token; + } - if ((token = matchSymbol('*', Token::Kind::STAR)) != Token::Invalid) - break; + { + Token token = matchSymbol('+', Token::Kind::PLUS); + if (token.isValid()) + return token; + } - if ((token = matchSymbol('/', Token::Kind::SLASH)) != Token::Invalid) - break; + { + Token token = matchSymbol('-', Token::Kind::MINUS); + if (token.isValid()) + return token; + } - if ((token = matchSymbol('%', Token::Kind::PERCENT)) != Token::Invalid) - break; + { + Token token = matchSymbol('*', Token::Kind::STAR); + if (token.isValid()) + return token; + } - if ((token = matchSymbol('(', Token::Kind::LEFT_PAREN)) != Token::Invalid) - break; + { + Token token = matchSymbol('/', Token::Kind::SLASH); + if (token.isValid()) + return token; + } - if ((token = matchSymbol(')', Token::Kind::RIGHT_PAREN)) != Token::Invalid) - break; + { + Token token =matchSymbol('%', Token::Kind::PERCENT); + if (token.isValid()) + return token; + } - if ((token = matchSymbol('.', Token::Kind::DOT)) != Token::Invalid) - break; + { + Token token = matchSymbol('(', Token::Kind::LEFT_PAREN); + if (token.isValid()) + return token; + } - if ((token = matchSymbol(',', Token::Kind::COMMA)) != Token::Invalid) - break; + { + Token token = matchSymbol(')', Token::Kind::RIGHT_PAREN); + if (token.isValid()) + return token; + } - if ((token = matchInteger()) != Token::Invalid) - break; + { + Token token =matchSymbol('.', Token::Kind::DOT); + if (token.isValid()) + return token; + } - if ((token = matchNewLine()) != Token::Invalid) - break; - - token = matchInvalid(); - } while(false); + { + Token token = matchSymbol(',', Token::Kind::COMMA); + if (token.isValid()) + return token; + } - return token; + { + Token token = matchInteger(); + if (token.isValid()) + return token; + } + + { + Token token = matchKeyword("fun", Token::Kind::FUNCTION); + if (token.isValid()) + return token; + } + + { + Token token = matchNewLine(); + if (token.isValid()) + return token; + } + + return matchInvalid(); } Token Lexer::matchEnd() { if (currentIndex >= source.length()) - return Token(Token::Kind::END, ""); + return Token(Token::Kind::END, "", currentLine, currentColumn); - return Token::Invalid; + return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); } Token Lexer::matchNewLine() { if (isNewLine(currentIndex)) - return Token(Token::Kind::NEW_LINE, "\n"); + return Token(Token::Kind::NEW_LINE, "\n", currentLine, currentColumn); - return Token::Invalid; + return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); } Token Lexer::matchSymbol(char symbol, Token::Kind kind) { if (source.at(currentIndex) == symbol) - return Token(kind, std::string(1, symbol)); + return Token(kind, string(1, symbol), currentLine, currentColumn); - return Token::Invalid; + return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); } Token Lexer::matchInteger() { @@ -96,15 +144,24 @@ Token Lexer::matchInteger() { nextIndex++; if (nextIndex == currentIndex) - return Token::Invalid; + return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); - std::string lexme = source.substr(currentIndex, nextIndex - currentIndex); - return Token(Token::Kind::INTEGER, lexme); + string lexme = source.substr(currentIndex, nextIndex - currentIndex); + return Token(Token::Kind::INTEGER, lexme, currentLine, currentColumn); +} + +Token Lexer::matchKeyword(string keyword, Token::Kind kind) { + bool isMatching = source.compare(currentIndex, keyword.length(), keyword) == 0; + bool isSeparated = (currentIndex + keyword.length() >= source.length()) || isWhiteSpace(currentIndex + keyword.length()) || isNewLine(currentIndex + keyword.length()); + + if (isMatching && isSeparated) + return Token(Token::Kind::FUNCTION, keyword, currentLine, currentColumn); + else + return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); } Token Lexer::matchInvalid() { - char symbol = source.at(currentIndex); - return Token(Token::Kind::INVALID, std::string(1, symbol)); + return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); } bool Lexer::isWhiteSpace(int index) { diff --git a/src/Lexer.h b/src/Lexer.h index 4225f2c..9d663dc 100644 --- a/src/Lexer.h +++ b/src/Lexer.h @@ -1,14 +1,18 @@ #ifndef LEXER_H #define LEXER_H -#include #include "Token.h" +#include + +using namespace std; + class Lexer { private: - std::string source; + string source; int currentIndex = 0; int currentLine = 0; + int currentColumn = 0; Token nextToken(); Token matchEnd(); @@ -16,14 +20,15 @@ private: Token matchInvalid(); Token matchSymbol(char symbol, Token::Kind kind); Token matchInteger(); + Token matchKeyword(string keyword, Token::Kind kind); bool isWhiteSpace(int index); bool isNewLine(int index); bool isDigit(int index); public: - Lexer(std::string source); - std::vector getTokens(); + Lexer(string source); + vector getTokens(); }; #endif \ No newline at end of file diff --git a/src/Parser.cpp b/src/Parser.cpp index 928e0c5..07530ac 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -4,13 +4,18 @@ Parser::Parser(vector tokens): tokens(tokens) { } shared_ptr Parser::getExpression() { - return term(); + shared_ptr expression = term(); + if (!expression->isValid()) { + cerr << "Unexpected token '" << expression->getToken().getLexme() << "' at " << expression->getToken().getLine() << ":" << expression->getToken().getColumn() << endl; + return nullptr; + } + return expression; } shared_ptr Parser::term() { shared_ptr expression = factor(); - while (tokens.at(currentIndex).isOneOf({Token::Kind::PLUS, Token::Kind::MINUS})) { + while (tokens.at(currentIndex).isOfKind({Token::Kind::PLUS, Token::Kind::MINUS})) { expression = matchBinary(expression); } @@ -20,7 +25,7 @@ shared_ptr Parser::term() { shared_ptr Parser::factor() { shared_ptr expression = primary(); - while (tokens.at(currentIndex).isOneOf({Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT})) { + while (tokens.at(currentIndex).isOfKind({Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT})) { expression = matchBinary(expression); } @@ -28,17 +33,19 @@ shared_ptr Parser::factor() { } shared_ptr Parser::primary() { - shared_ptr expression = Expression::Invalid; + { + shared_ptr expression = matchInteger(); + if (expression->isValid()) + return expression; + } - do { - if((expression = matchInteger()) != Expression::Invalid) - break; - - if((expression = matchGrouping()) != Expression::Invalid) - break; - } while(false); + { + shared_ptr expression = matchGrouping(); + if (expression->isValid()) + return expression; + } - return expression; + return make_shared(Expression::Kind::INVALID, tokens.at(currentIndex), nullptr, nullptr); } shared_ptr Parser::matchInteger() { @@ -48,7 +55,7 @@ shared_ptr Parser::matchInteger() { return make_shared(Expression::Kind::LITERAL, token, nullptr, nullptr); } - return Expression::Invalid; + return make_shared(Expression::Kind::INVALID, token, nullptr, nullptr); } shared_ptr Parser::matchGrouping() { @@ -56,22 +63,28 @@ shared_ptr Parser::matchGrouping() { if (token.getKind() == Token::Kind::LEFT_PAREN) { currentIndex++; shared_ptr expression = term(); + // has grouped expression failed? + if (!expression->isValid()) + return expression; if (tokens.at(currentIndex).getKind() == Token::Kind::RIGHT_PAREN) { currentIndex++; return make_shared(Expression::Kind::GROUPING, token, expression, nullptr); } } - return Expression::Invalid; + return make_shared(Expression::Kind::INVALID, token, nullptr, nullptr); } shared_ptr Parser::matchBinary(shared_ptr left) { Token token = tokens.at(currentIndex); - if (token.isOneOf({Token::Kind::PLUS, Token::Kind::MINUS, Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT})) { + if (token.isOfKind({Token::Kind::PLUS, Token::Kind::MINUS, Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT})) { currentIndex++; shared_ptr right = factor(); + // Has right expression failed? + if (!right->isValid()) + return right; return make_shared(Expression::Kind::BINARY, token, left, right); } - return Expression::Invalid; + return make_shared(Expression::Kind::INVALID, token, nullptr, nullptr); } \ No newline at end of file diff --git a/src/Parser.h b/src/Parser.h index f6172fe..d8029eb 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -14,7 +14,7 @@ private: shared_ptr term(); // +, - shared_ptr factor(); // *, /, % - shared_ptr primary(); + shared_ptr primary(); // integer, () shared_ptr matchInteger(); shared_ptr matchGrouping(); diff --git a/src/Token.cpp b/src/Token.cpp index 55dd160..ec609fa 100644 --- a/src/Token.cpp +++ b/src/Token.cpp @@ -1,27 +1,29 @@ #include "Token.h" -Token Token::Invalid = Token(Token::Kind::INVALID, ""); - -Token::Token(Kind kind, std::string lexme): kind(kind), lexme(lexme) { +Token::Token(Kind kind, string lexme, int line, int column): kind(kind), lexme(lexme), line(line), column(column) { } Token::Kind Token::getKind() { return kind; } -std::string Token::getLexme() { +string Token::getLexme() { return lexme; } -bool Token::operator==(Token const& other) { - return kind == other.kind; +int Token::getLine() { + return line; } -bool Token::operator!=(Token const& other) { - return kind != other.kind; +int Token::getColumn() { + return column; } -bool Token::isOneOf(std::vector kinds) { +bool Token::isValid() { + return kind != Token::Kind::INVALID; +} + +bool Token::isOfKind(vector kinds) { for (Kind &kind : kinds) { if (kind == this->kind) return true; @@ -30,7 +32,7 @@ bool Token::isOneOf(std::vector kinds) { return false; } -std::string Token::toString() { +string Token::toString() { switch (kind) { case PLUS: return "PLUS"; @@ -52,6 +54,8 @@ std::string Token::toString() { return "COMMA"; case INTEGER: return "INTEGER"; + case FUNCTION: + return "FUNCTION"; case NEW_LINE: return "NEW_LINE"; case END: diff --git a/src/Token.h b/src/Token.h index 1a2b844..ab837cc 100644 --- a/src/Token.h +++ b/src/Token.h @@ -3,6 +3,8 @@ #include +using namespace std; + class Token { public: enum Kind { @@ -19,26 +21,29 @@ public: INTEGER, - NEW_LINE, + FUNCTION, + NEW_LINE, END, + INVALID }; private: Kind kind; - std::string lexme; + string lexme; + int line; + int column; public: - Token(Kind kind, std::string lexme); + Token(Kind kind, string lexme, int line, int column); Kind getKind(); - std::string getLexme(); - bool operator==(Token const& other); - bool operator!=(Token const& other); - bool isOneOf(std::vector kinds); - std::string toString(); - - static Token Invalid; + string getLexme(); + int getLine(); + int getColumn(); + bool isValid(); + bool isOfKind(vector kinds); + string toString(); }; #endif \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 9a6a84f..26febac 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -36,12 +36,18 @@ int main(int argc, char **argv) { std::string source = readFile(std::string(argv[1])); Lexer lexer(source); std::vector tokens = lexer.getTokens(); + if (tokens.empty()) { + exit(1); + } for (Token &token : tokens) std::cout << token.toString() << " "; std::cout << std::endl; Parser parser(tokens); shared_ptr expression = parser.getExpression(); + if (!expression) { + exit(1); + } cout << expression->toString() << endl; ModuleBuilder moduleBuilder(expression);