Better statement errors parsing

This commit is contained in:
Rafał Grodziński
2025-07-03 18:16:09 +09:00
parent e0081ee12b
commit f9ec29fee8
7 changed files with 261 additions and 69 deletions

View File

@@ -1,7 +1,14 @@
#include "Error.h"
Error::Error(int line, int column, string lexme) :
line(line), column(column), lexme(lexme) { }
kind(ErrorKind::LEXER_ERROR), line(line), column(column), lexme(lexme) { }
Error::Error(shared_ptr<Token> actualToken, optional<TokenKind> expectedTokenKind, optional<string> message) :
kind(ErrorKind::PARSER_ERROR), actualToken(actualToken), expectedTokenKind(expectedTokenKind), message(message) { }
ErrorKind Error::getKind() {
return kind;
}
int Error::getLine() {
return line;
@@ -13,4 +20,16 @@ int Error::getColumn() {
string Error::getLexme() {
return lexme;
}
shared_ptr<Token> Error::getActualToken() {
return actualToken;
}
optional<TokenKind> Error::getExpectedTokenKind() {
return expectedTokenKind;
}
optional<string> Error::getMessage() {
return message;
}

View File

@@ -3,19 +3,41 @@
#include <iostream>
class Token;
enum class TokenKind;
using namespace std;
enum class ErrorKind {
LEXER_ERROR,
PARSER_ERROR
};
class Error {
private:
ErrorKind kind;
int line;
int column;
string lexme;
shared_ptr<Token> actualToken;
optional<TokenKind> expectedTokenKind;
optional<string> message;
public:
Error(int line, int column, string lexme);
Error(shared_ptr<Token> actualToken, optional<TokenKind> expectedTokenKind, optional<string> message);
ErrorKind getKind();
int getLine();
int getColumn();
string getLexme();
shared_ptr<Token> getActualToken();
optional<TokenKind> getExpectedTokenKind();
optional<string> getMessage();
};
#endif

View File

@@ -15,7 +15,6 @@ private:
int currentIndex;
int currentLine;
int currentColumn;
vector<shared_ptr<Error>> errors;
shared_ptr<Token> nextToken();

View File

@@ -102,6 +102,81 @@ string Logger::toString(shared_ptr<Token> token) {
}
}
string Logger::toString(TokenKind tokenKind) {
switch (tokenKind) {
case TokenKind::PLUS:
return "+";
case TokenKind::MINUS:
return "-";
case TokenKind::STAR:
return "*";
case TokenKind::SLASH:
return "/";
case TokenKind::PERCENT:
return "%";
case TokenKind::EQUAL:
return "=";
case TokenKind::NOT_EQUAL:
return "";
case TokenKind::LESS:
return "<";
case TokenKind::LESS_EQUAL:
return "";
case TokenKind::GREATER:
return ">";
case TokenKind::GREATER_EQUAL:
return "";
case TokenKind::LEFT_PAREN:
return "(";
case TokenKind::RIGHT_PAREN:
return ")";
case TokenKind::COMMA:
return ",";
case TokenKind::COLON:
return ":";
case TokenKind::SEMICOLON:
return ";";
case TokenKind::LEFT_ARROW:
return "";
case TokenKind::RIGHT_ARROW:
return "";
case TokenKind::BOOL:
return "LITERAL(BOOLEAN)";
case TokenKind::INTEGER_DEC:
case TokenKind::INTEGER_HEX:
case TokenKind::INTEGER_BIN:
return "LITERAL(INTEGER)";
case TokenKind::REAL:
return "LITERAL(REAL)";
case TokenKind::IDENTIFIER:
return "LITERAL(ID)";
case TokenKind::TYPE:
return "TYPE";
case TokenKind::IF:
return "IF";
case TokenKind::ELSE:
return "ELSE";
case TokenKind::FUNCTION:
return "FUN";
case TokenKind::RETURN:
return "RET";
case TokenKind::REPEAT:
return "REP";
case TokenKind::M_EXTERN:
return "@EXTERN";
case TokenKind::NEW_LINE:
return "";
case TokenKind::END:
return "END";
}
}
string Logger::toString(shared_ptr<Statement> statement) {
switch (statement->getKind()) {
case StatementKind::META_EXTERN_FUNCTION:
@@ -333,5 +408,30 @@ void Logger::print(vector<shared_ptr<Statement>> statements) {
}
void Logger::print(shared_ptr<Error> error) {
cout << format("Unexpected token \"{}\" at line: {}, column: {}\n", error->getLexme(), error->getLine() + 1, error->getColumn() + 1);
string message;
switch (error->getKind()) {
case ErrorKind::LEXER_ERROR:
message = format("Unexpected token \"{}\" at line: {}, column: {}", error->getLexme(), error->getLine() + 1, error->getColumn() + 1);
break;
case ErrorKind::PARSER_ERROR:
shared_ptr<Token> token = error->getActualToken();
optional<TokenKind> expectedTokenKind = error->getExpectedTokenKind();
optional<string> errorMessage = error->getMessage();
if (expectedTokenKind) {
message = format(
"Expected token {} but instead found \"{}\" at line: {}, column: {}",
toString(*expectedTokenKind), token->getLexme(), token->getLine() + 1, token->getColumn() + 1
);
} else {
message = format(
"Unexpected token \"{}\" found at line: {}, column: {}",
token->getLexme(), token->getLine() + 1, token->getColumn() + 1
);
}
if (errorMessage)
message += format(". {}", *errorMessage);
break;
}
cout << message << endl;
}

View File

@@ -4,6 +4,7 @@
#include <vector>
class Token;
enum class TokenKind;
class Statement;
class StatementMetaExternFunction;
class StatementVariable;
@@ -32,6 +33,7 @@ using namespace std;
class Logger {
private:
static string toString(shared_ptr<Token> token);
static string toString(TokenKind tokenKind);
static string toString(shared_ptr<Statement> statement);
static string toString(shared_ptr<StatementMetaExternFunction> statement);

View File

@@ -1,5 +1,8 @@
#include "Parser.h"
#include "Error.h"
#include "Logger.h"
#include "Parser/Expression/ExpressionGrouping.h"
#include "Parser/Expression/ExpressionLiteral.h"
#include "Parser/Expression/ExpressionVariable.h"
@@ -27,20 +30,21 @@ vector<shared_ptr<Statement>> Parser::getStatements() {
while (!tryMatchingTokenKinds({TokenKind::END}, true, false)) {
shared_ptr<Statement> statement = nextStatement();
// Abort parsing if we got an error
if (!statement->isValid()) {
//cerr << statement->toString(0);
exit(1);
}
statements.push_back(statement);
if (statement != nullptr) {
statements.push_back(statement);
// Expect new line after statement
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) {
cerr << "Expected new line" << endl;
exit(1);
// Expect new line after statement
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
markError(TokenKind::NEW_LINE, {});
}
}
if (!errors.empty()) {
for (shared_ptr<Error> &error : errors)
Logger::print(error);
exit(1);
}
return statements;
}
@@ -48,21 +52,23 @@ vector<shared_ptr<Statement>> Parser::getStatements() {
// Statement
//
shared_ptr<Statement> Parser::nextStatement() {
shared_ptr<Statement> statement;
shared_ptr<Statement> statement;
int errorsCount = errors.size();
statement = matchStatementFunction();
if (statement != nullptr)
if (statement != nullptr || errors.size() > errorsCount)
return statement;
statement = matchStatementVariable();
if (statement != nullptr)
if (statement != nullptr || errors.size() > errorsCount)
return statement;
statement = matchStatementMetaExternFunction();
if (statement != nullptr)
if (statement != nullptr || errors.size() > errorsCount)
return statement;
return matchStatementInvalid("Unexpected token");
markError({}, {});
return nullptr;
}
shared_ptr<Statement> Parser::nextInBlockStatement() {
@@ -88,7 +94,8 @@ shared_ptr<Statement> Parser::nextInBlockStatement() {
if (statement != nullptr)
return statement;
return matchStatementInvalid("Unexpected token");
markError({}, {});
return nullptr;
}
shared_ptr<Statement> Parser::matchStatementMetaExternFunction() {
@@ -107,13 +114,17 @@ shared_ptr<Statement> Parser::matchStatementMetaExternFunction() {
if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) {
do {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false))
return matchStatementInvalid("Expected function argument");
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) {
markError({}, "Expected function argument");
return nullptr;
}
shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
shared_ptr<Token> typeToken = tokens.at(currentIndex++);
optional<ValueType> argumentType = valueTypeForToken(typeToken);
if (!argumentType)
return matchStatementInvalid("Invalid argument type");
if (!argumentType) {
markError(TokenKind::TYPE, {});
return nullptr;
}
arguments.push_back(pair<string, ValueType>(identifierToken->getLexme(), *argumentType));
} while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true));
@@ -125,8 +136,10 @@ shared_ptr<Statement> Parser::matchStatementMetaExternFunction() {
shared_ptr<Token> typeToken = tokens.at(currentIndex);
optional<ValueType> type = valueTypeForToken(typeToken);
if (!type)
return matchStatementInvalid("Expected return type");
if (!type) {
markError(TokenKind::TYPE, {});
return nullptr;
}
returnType = *type;
currentIndex++; // type
@@ -149,18 +162,23 @@ shared_ptr<Statement> Parser::matchStatementVariable() {
valueType = ValueType::SINT32;
else if (valueTypeToken->getLexme().compare("real32") == 0)
valueType = ValueType::REAL32;
else
return matchStatementInvalid("Invalid type");
else {
markError(TokenKind::TYPE, {});
return nullptr;
}
currentIndex++; // type
// Expect left arrow
if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true))
return matchStatementInvalid("Expected left arrow");
if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) {
markError(TokenKind::LEFT_ARROW, {});
return nullptr;
}
shared_ptr<Expression> expression = nextExpression();
if (expression == nullptr || !expression->isValid())
return matchStatementInvalid("Invalid expression");
if (expression == nullptr || !expression->isValid()) {
return nullptr;
}
return make_shared<StatementVariable>(identifierToken->getLexme(), valueType, expression);
}
@@ -182,13 +200,17 @@ shared_ptr<Statement> Parser::matchStatementFunction() {
if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) {
do {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false))
return matchStatementInvalid("Expected function argument");
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) {
markError({}, "Expected function argument");
return nullptr;
}
shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
shared_ptr<Token> typeToken = tokens.at(currentIndex++);
optional<ValueType> argumentType = valueTypeForToken(typeToken);
if (!argumentType)
return matchStatementInvalid("Invalid argument type");
if (!argumentType) {
markError(TokenKind::TYPE, {});
return nullptr;
}
arguments.push_back(pair<string, ValueType>(identifierToken->getLexme(), *argumentType));
} while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true));
@@ -200,24 +222,31 @@ shared_ptr<Statement> Parser::matchStatementFunction() {
shared_ptr<Token> typeToken = tokens.at(currentIndex);
optional<ValueType> type = valueTypeForToken(typeToken);
if (!type)
return matchStatementInvalid("Expected return type");
if (!type) {
markError(TokenKind::TYPE, {});
return nullptr;
}
returnType = *type;
currentIndex++; // type
}
// consume new line
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
return matchStatementInvalid("Expected new line after function declaration");
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) {
markError(TokenKind::NEW_LINE, {});
return nullptr;
}
// block
statementBlock = matchStatementBlock({TokenKind::SEMICOLON});
if (statementBlock == nullptr || !statementBlock->isValid())
return statementBlock ?: matchStatementInvalid();
if (statementBlock == nullptr)
return nullptr;
if(!tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true))
return matchStatementInvalid("Expected a \";\" after a function declaration");
if(!tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true)) {
markError(TokenKind::SEMICOLON, {});
return nullptr;
}
//return matchStatementInvalid("Expected a \";\" after a function declaration");
return make_shared<StatementFunction>(name, arguments, returnType, dynamic_pointer_cast<StatementBlock>(statementBlock));
}
@@ -227,16 +256,18 @@ shared_ptr<Statement> Parser::matchStatementBlock(vector<TokenKind> terminalToke
while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) {
shared_ptr<Statement> statement = nextInBlockStatement();
if (statement == nullptr || !statement->isValid())
return statement ?: matchStatementInvalid("Expected statement");
if (statement == nullptr)
return nullptr;
statements.push_back(statement);
if (tryMatchingTokenKinds(terminalTokenKinds, false, false))
break;
// except new line
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
return matchStatementInvalid("Expected new line");
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) {
markError(TokenKind::NEW_LINE, {});
return nullptr;
}
}
return make_shared<StatementBlock>(statements);
@@ -250,8 +281,8 @@ shared_ptr<Statement> Parser::matchStatementAssignment() {
currentIndex++; // arrow
shared_ptr<Expression> expression = nextExpression();
if (expression == nullptr || !expression->isValid())
return matchStatementInvalid("Expected expression");
if (expression == nullptr)
return nullptr;
return make_shared<StatementAssignment>(identifierToken->getLexme(), expression);
}
@@ -261,8 +292,8 @@ shared_ptr<Statement> Parser::matchStatementReturn() {
return nullptr;
shared_ptr<Expression> expression = nextExpression();
if (expression != nullptr && !expression->isValid())
return matchStatementInvalid("Expected expression");
if (expression == nullptr)
return nullptr;
return make_shared<StatementReturn>(expression);
}
@@ -285,33 +316,35 @@ shared_ptr<Statement> Parser::matchStatementRepeat() {
if (!tryMatchingTokenKinds({TokenKind::COLON}, false, true)) {
// got initial, expect comma
if (initStatement != nullptr && !tryMatchingTokenKinds({TokenKind::COMMA}, true, true))
return matchStatementInvalid("Expected comma after initial statement");
if (initStatement != nullptr && !tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) {
markError(TokenKind::COMMA, {});
return nullptr;
}
// optional new line
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
// pre condition
preConditionExpression = nextExpression();
if (preConditionExpression != nullptr && !preConditionExpression->isValid())
return matchStatementInvalid("Expected pre-condition expression");
if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true)) {
// got pre-condition, expect comma
if (!tryMatchingTokenKinds({TokenKind::COMMA}, true, true))
return matchStatementInvalid("Expected comma after pre-condition statement");
if (!tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) {
markError(TokenKind::COMMA, {});
return nullptr;
}
// optional new line
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
// post condition
postConditionExpression = nextExpression();
if (postConditionExpression == nullptr || !postConditionExpression->isValid())
return matchStatementInvalid("Expected post-condition expression");
// expect colon
if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true))
return matchStatementInvalid("Expected \":\"");
if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true)) {
markError(TokenKind::COLON, {});
return nullptr;
}
}
}
@@ -323,8 +356,8 @@ shared_ptr<Statement> Parser::matchStatementRepeat() {
else
bodyBlockStatement = matchStatementBlock({TokenKind::NEW_LINE});
if (bodyBlockStatement == nullptr || !bodyBlockStatement->isValid())
return bodyBlockStatement ?: matchStatementInvalid("Expected block statement");
if (bodyBlockStatement == nullptr)
return nullptr;
tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true);
@@ -342,10 +375,6 @@ shared_ptr<Statement> Parser::matchStatementExpression() {
return make_shared<StatementExpression>(expression);
}
shared_ptr<StatementInvalid> Parser::matchStatementInvalid(string message) {
return make_shared<StatementInvalid>(tokens.at(currentIndex), message);
}
//
// Expression
//
@@ -632,3 +661,19 @@ optional<ValueType> Parser::valueTypeForToken(shared_ptr<Token> token) {
return {};
}
void Parser::markError(optional<TokenKind> expectedTokenKind, optional<string> message) {
shared_ptr<Token> actualToken = tokens.at(currentIndex);
// Try reaching the next safe token
vector<TokenKind> safeKinds = {TokenKind::END};
if (!actualToken->isOfKind({TokenKind::NEW_LINE}))
safeKinds.push_back(TokenKind::NEW_LINE);
if (!actualToken->isOfKind({TokenKind::SEMICOLON}))
safeKinds.push_back(TokenKind::SEMICOLON);
while (!tryMatchingTokenKinds(safeKinds, false, true))
currentIndex++;
errors.push_back(make_shared<Error>(actualToken, expectedTokenKind, message));
}

View File

@@ -2,8 +2,11 @@
#define PARSER_H
#include <vector>
#include "Types.h"
#include "Lexer/Token.h"
class Token;
enum class TokenKind;
class Error;
class Expression;
class ExpressionInvalid;
@@ -17,6 +20,7 @@ class Parser {
private:
vector<shared_ptr<Token>> tokens;
int currentIndex = 0;
vector<shared_ptr<Error>> errors;
shared_ptr<Statement> nextStatement();
shared_ptr<Statement> nextInBlockStatement();
@@ -30,7 +34,6 @@ private:
shared_ptr<Statement> matchStatementReturn();
shared_ptr<Statement> matchStatementRepeat();
shared_ptr<Statement> matchStatementExpression();
shared_ptr<StatementInvalid> matchStatementInvalid(string message = "");
shared_ptr<Expression> nextExpression();
shared_ptr<Expression> matchEquality(); // =, !=
@@ -51,6 +54,8 @@ private:
bool tryMatchingTokenKinds(vector<TokenKind> kinds, bool shouldMatchAll, bool shouldAdvance);
optional<ValueType> valueTypeForToken(shared_ptr<Token> token);
void markError(optional<TokenKind> expectedTokenKind, optional<string> message);
public:
Parser(vector<shared_ptr<Token>> tokens);
vector<shared_ptr<Statement>> getStatements();