From f9ec29fee82fa5eb6d624e51c27828111aafe37c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Thu, 3 Jul 2025 18:16:09 +0900 Subject: [PATCH] Better statement errors parsing --- src/Error.cpp | 21 ++++- src/Error.h | 22 ++++++ src/Lexer/Lexer.h | 1 - src/Logger.cpp | 102 ++++++++++++++++++++++++- src/Logger.h | 2 + src/Parser/Parser.cpp | 173 ++++++++++++++++++++++++++---------------- src/Parser/Parser.h | 9 ++- 7 files changed, 261 insertions(+), 69 deletions(-) diff --git a/src/Error.cpp b/src/Error.cpp index c1a508e..a4f5b06 100644 --- a/src/Error.cpp +++ b/src/Error.cpp @@ -1,7 +1,14 @@ #include "Error.h" Error::Error(int line, int column, string lexme) : -line(line), column(column), lexme(lexme) { } +kind(ErrorKind::LEXER_ERROR), line(line), column(column), lexme(lexme) { } + +Error::Error(shared_ptr actualToken, optional expectedTokenKind, optional message) : +kind(ErrorKind::PARSER_ERROR), actualToken(actualToken), expectedTokenKind(expectedTokenKind), message(message) { } + +ErrorKind Error::getKind() { + return kind; +} int Error::getLine() { return line; @@ -13,4 +20,16 @@ int Error::getColumn() { string Error::getLexme() { return lexme; +} + +shared_ptr Error::getActualToken() { + return actualToken; +} + +optional Error::getExpectedTokenKind() { + return expectedTokenKind; +} + +optional Error::getMessage() { + return message; } \ No newline at end of file diff --git a/src/Error.h b/src/Error.h index 4770af7..97dc968 100644 --- a/src/Error.h +++ b/src/Error.h @@ -3,19 +3,41 @@ #include +class Token; +enum class TokenKind; + using namespace std; +enum class ErrorKind { + LEXER_ERROR, + PARSER_ERROR +}; + class Error { private: + ErrorKind kind; + int line; int column; string lexme; + shared_ptr actualToken; + optional expectedTokenKind; + optional message; + public: Error(int line, int column, string lexme); + Error(shared_ptr actualToken, optional expectedTokenKind, optional message); + + ErrorKind getKind(); + int getLine(); int getColumn(); string getLexme(); + + shared_ptr getActualToken(); + optional getExpectedTokenKind(); + optional getMessage(); }; #endif \ No newline at end of file diff --git a/src/Lexer/Lexer.h b/src/Lexer/Lexer.h index 297616d..1f7018a 100644 --- a/src/Lexer/Lexer.h +++ b/src/Lexer/Lexer.h @@ -15,7 +15,6 @@ private: int currentIndex; int currentLine; int currentColumn; - vector> errors; shared_ptr nextToken(); diff --git a/src/Logger.cpp b/src/Logger.cpp index f8b9d95..a744500 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -102,6 +102,81 @@ string Logger::toString(shared_ptr token) { } } +string Logger::toString(TokenKind tokenKind) { + switch (tokenKind) { + case TokenKind::PLUS: + return "+"; + case TokenKind::MINUS: + return "-"; + case TokenKind::STAR: + return "*"; + case TokenKind::SLASH: + return "/"; + case TokenKind::PERCENT: + return "%"; + + case TokenKind::EQUAL: + return "="; + case TokenKind::NOT_EQUAL: + return "≠"; + case TokenKind::LESS: + return "<"; + case TokenKind::LESS_EQUAL: + return "≤"; + case TokenKind::GREATER: + return ">"; + case TokenKind::GREATER_EQUAL: + return "≥"; + + case TokenKind::LEFT_PAREN: + return "("; + case TokenKind::RIGHT_PAREN: + return ")"; + case TokenKind::COMMA: + return ","; + case TokenKind::COLON: + return ":"; + case TokenKind::SEMICOLON: + return ";"; + case TokenKind::LEFT_ARROW: + return "←"; + case TokenKind::RIGHT_ARROW: + return "→"; + + case TokenKind::BOOL: + return "LITERAL(BOOLEAN)"; + case TokenKind::INTEGER_DEC: + case TokenKind::INTEGER_HEX: + case TokenKind::INTEGER_BIN: + return "LITERAL(INTEGER)"; + case TokenKind::REAL: + return "LITERAL(REAL)"; + case TokenKind::IDENTIFIER: + return "LITERAL(ID)"; + case TokenKind::TYPE: + return "TYPE"; + + case TokenKind::IF: + return "IF"; + case TokenKind::ELSE: + return "ELSE"; + case TokenKind::FUNCTION: + return "FUN"; + case TokenKind::RETURN: + return "RET"; + case TokenKind::REPEAT: + return "REP"; + + case TokenKind::M_EXTERN: + return "@EXTERN"; + + case TokenKind::NEW_LINE: + return "↲"; + case TokenKind::END: + return "END"; + } +} + string Logger::toString(shared_ptr statement) { switch (statement->getKind()) { case StatementKind::META_EXTERN_FUNCTION: @@ -333,5 +408,30 @@ void Logger::print(vector> statements) { } void Logger::print(shared_ptr error) { - cout << format("Unexpected token \"{}\" at line: {}, column: {}\n", error->getLexme(), error->getLine() + 1, error->getColumn() + 1); + string message; + switch (error->getKind()) { + case ErrorKind::LEXER_ERROR: + message = format("Unexpected token \"{}\" at line: {}, column: {}", error->getLexme(), error->getLine() + 1, error->getColumn() + 1); + break; + case ErrorKind::PARSER_ERROR: + shared_ptr token = error->getActualToken(); + optional expectedTokenKind = error->getExpectedTokenKind(); + optional errorMessage = error->getMessage(); + + if (expectedTokenKind) { + message = format( + "Expected token {} but instead found \"{}\" at line: {}, column: {}", + toString(*expectedTokenKind), token->getLexme(), token->getLine() + 1, token->getColumn() + 1 + ); + } else { + message = format( + "Unexpected token \"{}\" found at line: {}, column: {}", + token->getLexme(), token->getLine() + 1, token->getColumn() + 1 + ); + } + if (errorMessage) + message += format(". {}", *errorMessage); + break; + } + cout << message << endl; } \ No newline at end of file diff --git a/src/Logger.h b/src/Logger.h index a135bf2..9d1735d 100644 --- a/src/Logger.h +++ b/src/Logger.h @@ -4,6 +4,7 @@ #include class Token; +enum class TokenKind; class Statement; class StatementMetaExternFunction; class StatementVariable; @@ -32,6 +33,7 @@ using namespace std; class Logger { private: static string toString(shared_ptr token); + static string toString(TokenKind tokenKind); static string toString(shared_ptr statement); static string toString(shared_ptr statement); diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index f2a38f6..83ab7b3 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -1,5 +1,8 @@ #include "Parser.h" +#include "Error.h" +#include "Logger.h" + #include "Parser/Expression/ExpressionGrouping.h" #include "Parser/Expression/ExpressionLiteral.h" #include "Parser/Expression/ExpressionVariable.h" @@ -27,20 +30,21 @@ vector> Parser::getStatements() { while (!tryMatchingTokenKinds({TokenKind::END}, true, false)) { shared_ptr statement = nextStatement(); - // Abort parsing if we got an error - if (!statement->isValid()) { - //cerr << statement->toString(0); - exit(1); - } - statements.push_back(statement); + if (statement != nullptr) { + statements.push_back(statement); - // Expect new line after statement - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) { - cerr << "Expected new line" << endl; - exit(1); + // Expect new line after statement + if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) + markError(TokenKind::NEW_LINE, {}); } } + if (!errors.empty()) { + for (shared_ptr &error : errors) + Logger::print(error); + exit(1); + } + return statements; } @@ -48,21 +52,23 @@ vector> Parser::getStatements() { // Statement // shared_ptr Parser::nextStatement() { - shared_ptr statement; + shared_ptr statement; + int errorsCount = errors.size(); statement = matchStatementFunction(); - if (statement != nullptr) + if (statement != nullptr || errors.size() > errorsCount) return statement; statement = matchStatementVariable(); - if (statement != nullptr) + if (statement != nullptr || errors.size() > errorsCount) return statement; statement = matchStatementMetaExternFunction(); - if (statement != nullptr) + if (statement != nullptr || errors.size() > errorsCount) return statement; - return matchStatementInvalid("Unexpected token"); + markError({}, {}); + return nullptr; } shared_ptr Parser::nextInBlockStatement() { @@ -88,7 +94,8 @@ shared_ptr Parser::nextInBlockStatement() { if (statement != nullptr) return statement; - return matchStatementInvalid("Unexpected token"); + markError({}, {}); + return nullptr; } shared_ptr Parser::matchStatementMetaExternFunction() { @@ -107,13 +114,17 @@ shared_ptr Parser::matchStatementMetaExternFunction() { if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { do { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) - return matchStatementInvalid("Expected function argument"); + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) { + markError({}, "Expected function argument"); + return nullptr; + } shared_ptr identifierToken = tokens.at(currentIndex++); shared_ptr typeToken = tokens.at(currentIndex++); optional argumentType = valueTypeForToken(typeToken); - if (!argumentType) - return matchStatementInvalid("Invalid argument type"); + if (!argumentType) { + markError(TokenKind::TYPE, {}); + return nullptr; + } arguments.push_back(pair(identifierToken->getLexme(), *argumentType)); } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); @@ -125,8 +136,10 @@ shared_ptr Parser::matchStatementMetaExternFunction() { shared_ptr typeToken = tokens.at(currentIndex); optional type = valueTypeForToken(typeToken); - if (!type) - return matchStatementInvalid("Expected return type"); + if (!type) { + markError(TokenKind::TYPE, {}); + return nullptr; + } returnType = *type; currentIndex++; // type @@ -149,18 +162,23 @@ shared_ptr Parser::matchStatementVariable() { valueType = ValueType::SINT32; else if (valueTypeToken->getLexme().compare("real32") == 0) valueType = ValueType::REAL32; - else - return matchStatementInvalid("Invalid type"); + else { + markError(TokenKind::TYPE, {}); + return nullptr; + } currentIndex++; // type // Expect left arrow - if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) - return matchStatementInvalid("Expected left arrow"); + if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) { + markError(TokenKind::LEFT_ARROW, {}); + return nullptr; + } shared_ptr expression = nextExpression(); - if (expression == nullptr || !expression->isValid()) - return matchStatementInvalid("Invalid expression"); + if (expression == nullptr || !expression->isValid()) { + return nullptr; + } return make_shared(identifierToken->getLexme(), valueType, expression); } @@ -182,13 +200,17 @@ shared_ptr Parser::matchStatementFunction() { if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { do { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) - return matchStatementInvalid("Expected function argument"); + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) { + markError({}, "Expected function argument"); + return nullptr; + } shared_ptr identifierToken = tokens.at(currentIndex++); shared_ptr typeToken = tokens.at(currentIndex++); optional argumentType = valueTypeForToken(typeToken); - if (!argumentType) - return matchStatementInvalid("Invalid argument type"); + if (!argumentType) { + markError(TokenKind::TYPE, {}); + return nullptr; + } arguments.push_back(pair(identifierToken->getLexme(), *argumentType)); } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); @@ -200,24 +222,31 @@ shared_ptr Parser::matchStatementFunction() { shared_ptr typeToken = tokens.at(currentIndex); optional type = valueTypeForToken(typeToken); - if (!type) - return matchStatementInvalid("Expected return type"); + if (!type) { + markError(TokenKind::TYPE, {}); + return nullptr; + } returnType = *type; currentIndex++; // type } // consume new line - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) - return matchStatementInvalid("Expected new line after function declaration"); + if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) { + markError(TokenKind::NEW_LINE, {}); + return nullptr; + } // block statementBlock = matchStatementBlock({TokenKind::SEMICOLON}); - if (statementBlock == nullptr || !statementBlock->isValid()) - return statementBlock ?: matchStatementInvalid(); + if (statementBlock == nullptr) + return nullptr; - if(!tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true)) - return matchStatementInvalid("Expected a \";\" after a function declaration"); + if(!tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true)) { + markError(TokenKind::SEMICOLON, {}); + return nullptr; + } + //return matchStatementInvalid("Expected a \";\" after a function declaration"); return make_shared(name, arguments, returnType, dynamic_pointer_cast(statementBlock)); } @@ -227,16 +256,18 @@ shared_ptr Parser::matchStatementBlock(vector terminalToke while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) { shared_ptr statement = nextInBlockStatement(); - if (statement == nullptr || !statement->isValid()) - return statement ?: matchStatementInvalid("Expected statement"); + if (statement == nullptr) + return nullptr; statements.push_back(statement); if (tryMatchingTokenKinds(terminalTokenKinds, false, false)) break; // except new line - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) - return matchStatementInvalid("Expected new line"); + if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) { + markError(TokenKind::NEW_LINE, {}); + return nullptr; + } } return make_shared(statements); @@ -250,8 +281,8 @@ shared_ptr Parser::matchStatementAssignment() { currentIndex++; // arrow shared_ptr expression = nextExpression(); - if (expression == nullptr || !expression->isValid()) - return matchStatementInvalid("Expected expression"); + if (expression == nullptr) + return nullptr; return make_shared(identifierToken->getLexme(), expression); } @@ -261,8 +292,8 @@ shared_ptr Parser::matchStatementReturn() { return nullptr; shared_ptr expression = nextExpression(); - if (expression != nullptr && !expression->isValid()) - return matchStatementInvalid("Expected expression"); + if (expression == nullptr) + return nullptr; return make_shared(expression); } @@ -285,33 +316,35 @@ shared_ptr Parser::matchStatementRepeat() { if (!tryMatchingTokenKinds({TokenKind::COLON}, false, true)) { // got initial, expect comma - if (initStatement != nullptr && !tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) - return matchStatementInvalid("Expected comma after initial statement"); + if (initStatement != nullptr && !tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) { + markError(TokenKind::COMMA, {}); + return nullptr; + } // optional new line tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // pre condition preConditionExpression = nextExpression(); - if (preConditionExpression != nullptr && !preConditionExpression->isValid()) - return matchStatementInvalid("Expected pre-condition expression"); if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { // got pre-condition, expect comma - if (!tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) - return matchStatementInvalid("Expected comma after pre-condition statement"); + if (!tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) { + markError(TokenKind::COMMA, {}); + return nullptr; + } // optional new line tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // post condition postConditionExpression = nextExpression(); - if (postConditionExpression == nullptr || !postConditionExpression->isValid()) - return matchStatementInvalid("Expected post-condition expression"); // expect colon - if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true)) - return matchStatementInvalid("Expected \":\""); + if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { + markError(TokenKind::COLON, {}); + return nullptr; + } } } @@ -323,8 +356,8 @@ shared_ptr Parser::matchStatementRepeat() { else bodyBlockStatement = matchStatementBlock({TokenKind::NEW_LINE}); - if (bodyBlockStatement == nullptr || !bodyBlockStatement->isValid()) - return bodyBlockStatement ?: matchStatementInvalid("Expected block statement"); + if (bodyBlockStatement == nullptr) + return nullptr; tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true); @@ -342,10 +375,6 @@ shared_ptr Parser::matchStatementExpression() { return make_shared(expression); } -shared_ptr Parser::matchStatementInvalid(string message) { - return make_shared(tokens.at(currentIndex), message); -} - // // Expression // @@ -632,3 +661,19 @@ optional Parser::valueTypeForToken(shared_ptr token) { return {}; } + +void Parser::markError(optional expectedTokenKind, optional message) { + shared_ptr actualToken = tokens.at(currentIndex); + + // Try reaching the next safe token + vector safeKinds = {TokenKind::END}; + if (!actualToken->isOfKind({TokenKind::NEW_LINE})) + safeKinds.push_back(TokenKind::NEW_LINE); + if (!actualToken->isOfKind({TokenKind::SEMICOLON})) + safeKinds.push_back(TokenKind::SEMICOLON); + + while (!tryMatchingTokenKinds(safeKinds, false, true)) + currentIndex++; + + errors.push_back(make_shared(actualToken, expectedTokenKind, message)); +} diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index 6ff976f..cd14e44 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -2,8 +2,11 @@ #define PARSER_H #include +#include "Types.h" -#include "Lexer/Token.h" +class Token; +enum class TokenKind; +class Error; class Expression; class ExpressionInvalid; @@ -17,6 +20,7 @@ class Parser { private: vector> tokens; int currentIndex = 0; + vector> errors; shared_ptr nextStatement(); shared_ptr nextInBlockStatement(); @@ -30,7 +34,6 @@ private: shared_ptr matchStatementReturn(); shared_ptr matchStatementRepeat(); shared_ptr matchStatementExpression(); - shared_ptr matchStatementInvalid(string message = ""); shared_ptr nextExpression(); shared_ptr matchEquality(); // =, != @@ -51,6 +54,8 @@ private: bool tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance); optional valueTypeForToken(shared_ptr token); + void markError(optional expectedTokenKind, optional message); + public: Parser(vector> tokens); vector> getStatements();