Better error reporting

This commit is contained in:
Rafał Grodziński
2025-05-31 23:24:21 +09:00
parent 2b78b5bf23
commit 61e648e55b
9 changed files with 198 additions and 108 deletions

View File

@@ -1,14 +1,13 @@
#include "Expression.h" #include "Expression.h"
std::shared_ptr<Expression> Expression::Invalid = std::make_shared<Expression>(Expression::Kind::INVALID, Token::Invalid, nullptr, nullptr); Expression::Expression(Kind kind, Token token, shared_ptr<Expression> left, shared_ptr<Expression> right): token(token) {
Expression::Expression(Kind kind, Token token, shared_ptr<Expression> left, shared_ptr<Expression> right) {
switch (kind) { switch (kind) {
case LITERAL: case LITERAL:
setupLiteral(token); setupLiteral(token);
break; break;
case GROUPING: case GROUPING:
setupGrouping(token, left); setupGrouping(token, left);
break;
case BINARY: case BINARY:
setupBinary(token, left, right); setupBinary(token, left, right);
break; break;
@@ -18,7 +17,7 @@ Expression::Expression(Kind kind, Token token, shared_ptr<Expression> left, shar
} }
void Expression::setupLiteral(Token token) { void Expression::setupLiteral(Token token) {
bool isKindValid = token.isOneOf({Token::Kind::INTEGER}); bool isKindValid = token.isOfKind({Token::Kind::INTEGER});
if (!isKindValid) if (!isKindValid)
return; return;
@@ -38,7 +37,7 @@ void Expression::setupGrouping(Token token, shared_ptr<Expression> expression) {
} }
void Expression::setupBinary(Token token, shared_ptr<Expression> left, shared_ptr<Expression> right) { void Expression::setupBinary(Token token, shared_ptr<Expression> left, shared_ptr<Expression> right) {
bool isKindValid = token.isOneOf({Token::Kind::PLUS, Token::Kind::MINUS, Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT}); bool isKindValid = token.isOfKind({Token::Kind::PLUS, Token::Kind::MINUS, Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT});
bool isLeftValid = left != nullptr && left->getKind() != Kind::INVALID; bool isLeftValid = left != nullptr && left->getKind() != Kind::INVALID;
bool isRightValid = right != nullptr && right->getKind() != Kind::INVALID; bool isRightValid = right != nullptr && right->getKind() != Kind::INVALID;
@@ -65,6 +64,8 @@ void Expression::setupBinary(Token token, shared_ptr<Expression> left, shared_pt
break; break;
case Token::Kind::INVALID: case Token::Kind::INVALID:
break; break;
default:
exit(1);
} }
this->left = left; this->left = left;
@@ -75,6 +76,10 @@ Expression::Kind Expression::getKind() {
return kind; return kind;
} }
Token Expression::getToken() {
return token;
}
int64_t Expression::getInteger() { int64_t Expression::getInteger() {
return integer; return integer;
} }
@@ -91,15 +96,11 @@ shared_ptr<Expression> Expression::getRight() {
return right; return right;
} }
bool Expression::operator==(Expression const& other) { bool Expression::isValid() {
return kind == other.kind; return kind != Expression::Kind::INVALID;
} }
bool Expression::operator!=(Expression const& other) { string Expression::toString() {
return kind != other.kind;
}
std::string Expression::toString() {
switch (kind) { switch (kind) {
case LITERAL: case LITERAL:
return to_string(integer); return to_string(integer);

View File

@@ -25,6 +25,7 @@ public:
private: private:
Kind kind = INVALID; Kind kind = INVALID;
Token token;
int64_t integer = 0; int64_t integer = 0;
Operator operation = NONE; Operator operation = NONE;
shared_ptr<Expression> left = nullptr; shared_ptr<Expression> left = nullptr;
@@ -37,15 +38,13 @@ private:
public: public:
Expression(Kind kind, Token token, shared_ptr<Expression> left, shared_ptr<Expression> right); Expression(Kind kind, Token token, shared_ptr<Expression> left, shared_ptr<Expression> right);
Kind getKind(); Kind getKind();
Token getToken();
int64_t getInteger(); int64_t getInteger();
Operator getOperator(); Operator getOperator();
shared_ptr<Expression> getLeft(); shared_ptr<Expression> getLeft();
shared_ptr<Expression> getRight(); shared_ptr<Expression> getRight();
bool operator==(Expression const& other); bool isValid();
bool operator!=(Expression const& other);
string toString(); string toString();
static shared_ptr<Expression> Invalid;
}; };
#endif #endif

View File

@@ -1,92 +1,140 @@
#include "Lexer.h" #include "Lexer.h"
Lexer::Lexer(std::string source) : source(source) { Lexer::Lexer(string source): source(source) {
} }
std::vector<Token> Lexer::getTokens() { vector<Token> Lexer::getTokens() {
std::vector<Token> tokens; vector<Token> tokens;
do { do {
Token token = nextToken(); Token token = nextToken();
currentIndex += token.getLexme().length();
if (token.getKind() == Token::Kind::NEW_LINE) // Abort scanning if we got an error
if (token.getKind() == Token::Kind::INVALID) {
cerr << "Unexpected character '" << token.getLexme() << "' at " << token.getLine() << ":" << token.getColumn() << endl;
return vector<Token>();
}
currentIndex += token.getLexme().length();
currentColumn += token.getLexme().length();
if (token.getKind() == Token::Kind::NEW_LINE) {
currentLine++; currentLine++;
currentColumn = 0;
}
// filter out multiple new lines // filter out multiple new lines
if (tokens.empty() || token.getKind() != Token::Kind::NEW_LINE || tokens.back() != token) if (tokens.empty() || token.getKind() != Token::Kind::NEW_LINE || tokens.back().getKind() != token.getKind())
tokens.push_back(token); tokens.push_back(token);
} while (tokens.back().getKind() != Token::Kind::END); } while (tokens.back().getKind() != Token::Kind::END);
return tokens; return tokens;
} }
Token Lexer::nextToken() { Token Lexer::nextToken() {
Token token = Token::Invalid; while (currentIndex < source.length() && isWhiteSpace(currentIndex)) {
while (currentIndex < source.length() && isWhiteSpace(currentIndex))
currentIndex++; currentIndex++;
currentColumn++;
}
do { {
if ((token = matchEnd()) != Token::Invalid) Token token = matchEnd();
break; if (token.isValid())
if ((token = matchSymbol('+', Token::Kind::PLUS)) != Token::Invalid)
break;
if ((token = matchSymbol('-', Token::Kind::MINUS)) != Token::Invalid)
break;
if ((token = matchSymbol('*', Token::Kind::STAR)) != Token::Invalid)
break;
if ((token = matchSymbol('/', Token::Kind::SLASH)) != Token::Invalid)
break;
if ((token = matchSymbol('%', Token::Kind::PERCENT)) != Token::Invalid)
break;
if ((token = matchSymbol('(', Token::Kind::LEFT_PAREN)) != Token::Invalid)
break;
if ((token = matchSymbol(')', Token::Kind::RIGHT_PAREN)) != Token::Invalid)
break;
if ((token = matchSymbol('.', Token::Kind::DOT)) != Token::Invalid)
break;
if ((token = matchSymbol(',', Token::Kind::COMMA)) != Token::Invalid)
break;
if ((token = matchInteger()) != Token::Invalid)
break;
if ((token = matchNewLine()) != Token::Invalid)
break;
token = matchInvalid();
} while(false);
return token; return token;
} }
{
Token token = matchSymbol('+', Token::Kind::PLUS);
if (token.isValid())
return token;
}
{
Token token = matchSymbol('-', Token::Kind::MINUS);
if (token.isValid())
return token;
}
{
Token token = matchSymbol('*', Token::Kind::STAR);
if (token.isValid())
return token;
}
{
Token token = matchSymbol('/', Token::Kind::SLASH);
if (token.isValid())
return token;
}
{
Token token =matchSymbol('%', Token::Kind::PERCENT);
if (token.isValid())
return token;
}
{
Token token = matchSymbol('(', Token::Kind::LEFT_PAREN);
if (token.isValid())
return token;
}
{
Token token = matchSymbol(')', Token::Kind::RIGHT_PAREN);
if (token.isValid())
return token;
}
{
Token token =matchSymbol('.', Token::Kind::DOT);
if (token.isValid())
return token;
}
{
Token token = matchSymbol(',', Token::Kind::COMMA);
if (token.isValid())
return token;
}
{
Token token = matchInteger();
if (token.isValid())
return token;
}
{
Token token = matchKeyword("fun", Token::Kind::FUNCTION);
if (token.isValid())
return token;
}
{
Token token = matchNewLine();
if (token.isValid())
return token;
}
return matchInvalid();
}
Token Lexer::matchEnd() { Token Lexer::matchEnd() {
if (currentIndex >= source.length()) if (currentIndex >= source.length())
return Token(Token::Kind::END, ""); return Token(Token::Kind::END, "", currentLine, currentColumn);
return Token::Invalid; return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
} }
Token Lexer::matchNewLine() { Token Lexer::matchNewLine() {
if (isNewLine(currentIndex)) if (isNewLine(currentIndex))
return Token(Token::Kind::NEW_LINE, "\n"); return Token(Token::Kind::NEW_LINE, "\n", currentLine, currentColumn);
return Token::Invalid; return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
} }
Token Lexer::matchSymbol(char symbol, Token::Kind kind) { Token Lexer::matchSymbol(char symbol, Token::Kind kind) {
if (source.at(currentIndex) == symbol) if (source.at(currentIndex) == symbol)
return Token(kind, std::string(1, symbol)); return Token(kind, string(1, symbol), currentLine, currentColumn);
return Token::Invalid; return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
} }
Token Lexer::matchInteger() { Token Lexer::matchInteger() {
@@ -96,15 +144,24 @@ Token Lexer::matchInteger() {
nextIndex++; nextIndex++;
if (nextIndex == currentIndex) if (nextIndex == currentIndex)
return Token::Invalid; return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
std::string lexme = source.substr(currentIndex, nextIndex - currentIndex); string lexme = source.substr(currentIndex, nextIndex - currentIndex);
return Token(Token::Kind::INTEGER, lexme); return Token(Token::Kind::INTEGER, lexme, currentLine, currentColumn);
}
Token Lexer::matchKeyword(string keyword, Token::Kind kind) {
bool isMatching = source.compare(currentIndex, keyword.length(), keyword) == 0;
bool isSeparated = (currentIndex + keyword.length() >= source.length()) || isWhiteSpace(currentIndex + keyword.length()) || isNewLine(currentIndex + keyword.length());
if (isMatching && isSeparated)
return Token(Token::Kind::FUNCTION, keyword, currentLine, currentColumn);
else
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
} }
Token Lexer::matchInvalid() { Token Lexer::matchInvalid() {
char symbol = source.at(currentIndex); return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
return Token(Token::Kind::INVALID, std::string(1, symbol));
} }
bool Lexer::isWhiteSpace(int index) { bool Lexer::isWhiteSpace(int index) {

View File

@@ -1,14 +1,18 @@
#ifndef LEXER_H #ifndef LEXER_H
#define LEXER_H #define LEXER_H
#include <vector>
#include "Token.h" #include "Token.h"
#include <vector>
using namespace std;
class Lexer { class Lexer {
private: private:
std::string source; string source;
int currentIndex = 0; int currentIndex = 0;
int currentLine = 0; int currentLine = 0;
int currentColumn = 0;
Token nextToken(); Token nextToken();
Token matchEnd(); Token matchEnd();
@@ -16,14 +20,15 @@ private:
Token matchInvalid(); Token matchInvalid();
Token matchSymbol(char symbol, Token::Kind kind); Token matchSymbol(char symbol, Token::Kind kind);
Token matchInteger(); Token matchInteger();
Token matchKeyword(string keyword, Token::Kind kind);
bool isWhiteSpace(int index); bool isWhiteSpace(int index);
bool isNewLine(int index); bool isNewLine(int index);
bool isDigit(int index); bool isDigit(int index);
public: public:
Lexer(std::string source); Lexer(string source);
std::vector<Token> getTokens(); vector<Token> getTokens();
}; };
#endif #endif

View File

@@ -4,13 +4,18 @@ Parser::Parser(vector<Token> tokens): tokens(tokens) {
} }
shared_ptr<Expression> Parser::getExpression() { shared_ptr<Expression> Parser::getExpression() {
return term(); shared_ptr<Expression> expression = term();
if (!expression->isValid()) {
cerr << "Unexpected token '" << expression->getToken().getLexme() << "' at " << expression->getToken().getLine() << ":" << expression->getToken().getColumn() << endl;
return nullptr;
}
return expression;
} }
shared_ptr<Expression> Parser::term() { shared_ptr<Expression> Parser::term() {
shared_ptr<Expression> expression = factor(); shared_ptr<Expression> expression = factor();
while (tokens.at(currentIndex).isOneOf({Token::Kind::PLUS, Token::Kind::MINUS})) { while (tokens.at(currentIndex).isOfKind({Token::Kind::PLUS, Token::Kind::MINUS})) {
expression = matchBinary(expression); expression = matchBinary(expression);
} }
@@ -20,7 +25,7 @@ shared_ptr<Expression> Parser::term() {
shared_ptr<Expression> Parser::factor() { shared_ptr<Expression> Parser::factor() {
shared_ptr<Expression> expression = primary(); shared_ptr<Expression> expression = primary();
while (tokens.at(currentIndex).isOneOf({Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT})) { while (tokens.at(currentIndex).isOfKind({Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT})) {
expression = matchBinary(expression); expression = matchBinary(expression);
} }
@@ -28,19 +33,21 @@ shared_ptr<Expression> Parser::factor() {
} }
shared_ptr<Expression> Parser::primary() { shared_ptr<Expression> Parser::primary() {
shared_ptr<Expression> expression = Expression::Invalid; {
shared_ptr<Expression> expression = matchInteger();
do { if (expression->isValid())
if((expression = matchInteger()) != Expression::Invalid)
break;
if((expression = matchGrouping()) != Expression::Invalid)
break;
} while(false);
return expression; return expression;
} }
{
shared_ptr<Expression> expression = matchGrouping();
if (expression->isValid())
return expression;
}
return make_shared<Expression>(Expression::Kind::INVALID, tokens.at(currentIndex), nullptr, nullptr);
}
shared_ptr<Expression> Parser::matchInteger() { shared_ptr<Expression> Parser::matchInteger() {
Token token = tokens.at(currentIndex); Token token = tokens.at(currentIndex);
if (token.getKind() == Token::Kind::INTEGER) { if (token.getKind() == Token::Kind::INTEGER) {
@@ -48,7 +55,7 @@ shared_ptr<Expression> Parser::matchInteger() {
return make_shared<Expression>(Expression::Kind::LITERAL, token, nullptr, nullptr); return make_shared<Expression>(Expression::Kind::LITERAL, token, nullptr, nullptr);
} }
return Expression::Invalid; return make_shared<Expression>(Expression::Kind::INVALID, token, nullptr, nullptr);
} }
shared_ptr<Expression> Parser::matchGrouping() { shared_ptr<Expression> Parser::matchGrouping() {
@@ -56,22 +63,28 @@ shared_ptr<Expression> Parser::matchGrouping() {
if (token.getKind() == Token::Kind::LEFT_PAREN) { if (token.getKind() == Token::Kind::LEFT_PAREN) {
currentIndex++; currentIndex++;
shared_ptr<Expression> expression = term(); shared_ptr<Expression> expression = term();
// has grouped expression failed?
if (!expression->isValid())
return expression;
if (tokens.at(currentIndex).getKind() == Token::Kind::RIGHT_PAREN) { if (tokens.at(currentIndex).getKind() == Token::Kind::RIGHT_PAREN) {
currentIndex++; currentIndex++;
return make_shared<Expression>(Expression::Kind::GROUPING, token, expression, nullptr); return make_shared<Expression>(Expression::Kind::GROUPING, token, expression, nullptr);
} }
} }
return Expression::Invalid; return make_shared<Expression>(Expression::Kind::INVALID, token, nullptr, nullptr);
} }
shared_ptr<Expression> Parser::matchBinary(shared_ptr<Expression> left) { shared_ptr<Expression> Parser::matchBinary(shared_ptr<Expression> left) {
Token token = tokens.at(currentIndex); Token token = tokens.at(currentIndex);
if (token.isOneOf({Token::Kind::PLUS, Token::Kind::MINUS, Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT})) { if (token.isOfKind({Token::Kind::PLUS, Token::Kind::MINUS, Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT})) {
currentIndex++; currentIndex++;
shared_ptr<Expression> right = factor(); shared_ptr<Expression> right = factor();
// Has right expression failed?
if (!right->isValid())
return right;
return make_shared<Expression>(Expression::Kind::BINARY, token, left, right); return make_shared<Expression>(Expression::Kind::BINARY, token, left, right);
} }
return Expression::Invalid; return make_shared<Expression>(Expression::Kind::INVALID, token, nullptr, nullptr);
} }

View File

@@ -14,7 +14,7 @@ private:
shared_ptr<Expression> term(); // +, - shared_ptr<Expression> term(); // +, -
shared_ptr<Expression> factor(); // *, /, % shared_ptr<Expression> factor(); // *, /, %
shared_ptr<Expression> primary(); shared_ptr<Expression> primary(); // integer, ()
shared_ptr<Expression> matchInteger(); shared_ptr<Expression> matchInteger();
shared_ptr<Expression> matchGrouping(); shared_ptr<Expression> matchGrouping();

View File

@@ -1,27 +1,29 @@
#include "Token.h" #include "Token.h"
Token Token::Invalid = Token(Token::Kind::INVALID, ""); Token::Token(Kind kind, string lexme, int line, int column): kind(kind), lexme(lexme), line(line), column(column) {
Token::Token(Kind kind, std::string lexme): kind(kind), lexme(lexme) {
} }
Token::Kind Token::getKind() { Token::Kind Token::getKind() {
return kind; return kind;
} }
std::string Token::getLexme() { string Token::getLexme() {
return lexme; return lexme;
} }
bool Token::operator==(Token const& other) { int Token::getLine() {
return kind == other.kind; return line;
} }
bool Token::operator!=(Token const& other) { int Token::getColumn() {
return kind != other.kind; return column;
} }
bool Token::isOneOf(std::vector<Kind> kinds) { bool Token::isValid() {
return kind != Token::Kind::INVALID;
}
bool Token::isOfKind(vector<Kind> kinds) {
for (Kind &kind : kinds) { for (Kind &kind : kinds) {
if (kind == this->kind) if (kind == this->kind)
return true; return true;
@@ -30,7 +32,7 @@ bool Token::isOneOf(std::vector<Kind> kinds) {
return false; return false;
} }
std::string Token::toString() { string Token::toString() {
switch (kind) { switch (kind) {
case PLUS: case PLUS:
return "PLUS"; return "PLUS";
@@ -52,6 +54,8 @@ std::string Token::toString() {
return "COMMA"; return "COMMA";
case INTEGER: case INTEGER:
return "INTEGER"; return "INTEGER";
case FUNCTION:
return "FUNCTION";
case NEW_LINE: case NEW_LINE:
return "NEW_LINE"; return "NEW_LINE";
case END: case END:

View File

@@ -3,6 +3,8 @@
#include <iostream> #include <iostream>
using namespace std;
class Token { class Token {
public: public:
enum Kind { enum Kind {
@@ -19,26 +21,29 @@ public:
INTEGER, INTEGER,
NEW_LINE, FUNCTION,
NEW_LINE,
END, END,
INVALID INVALID
}; };
private: private:
Kind kind; Kind kind;
std::string lexme; string lexme;
int line;
int column;
public: public:
Token(Kind kind, std::string lexme); Token(Kind kind, string lexme, int line, int column);
Kind getKind(); Kind getKind();
std::string getLexme(); string getLexme();
bool operator==(Token const& other); int getLine();
bool operator!=(Token const& other); int getColumn();
bool isOneOf(std::vector<Kind> kinds); bool isValid();
std::string toString(); bool isOfKind(vector<Kind> kinds);
string toString();
static Token Invalid;
}; };
#endif #endif

View File

@@ -36,12 +36,18 @@ int main(int argc, char **argv) {
std::string source = readFile(std::string(argv[1])); std::string source = readFile(std::string(argv[1]));
Lexer lexer(source); Lexer lexer(source);
std::vector<Token> tokens = lexer.getTokens(); std::vector<Token> tokens = lexer.getTokens();
if (tokens.empty()) {
exit(1);
}
for (Token &token : tokens) for (Token &token : tokens)
std::cout << token.toString() << " "; std::cout << token.toString() << " ";
std::cout << std::endl; std::cout << std::endl;
Parser parser(tokens); Parser parser(tokens);
shared_ptr<Expression> expression = parser.getExpression(); shared_ptr<Expression> expression = parser.getExpression();
if (!expression) {
exit(1);
}
cout << expression->toString() << endl; cout << expression->toString() << endl;
ModuleBuilder moduleBuilder(expression); ModuleBuilder moduleBuilder(expression);