Better error handling in lexer

This commit is contained in:
Rafał Grodziński
2025-07-03 12:56:48 +09:00
parent 3dc513871f
commit 36a89a811a
6 changed files with 86 additions and 23 deletions

16
src/Error.cpp Normal file
View File

@@ -0,0 +1,16 @@
#include "Error.h"
Error::Error(int line, int column, string lexme) :
line(line), column(column), lexme(lexme) { }
int Error::getLine() {
return line;
}
int Error::getColumn() {
return column;
}
string Error::getLexme() {
return lexme;
}

21
src/Error.h Normal file
View File

@@ -0,0 +1,21 @@
#ifndef ERROR_H
#define ERROR_H
#include <iostream>
using namespace std;
class Error {
private:
int line;
int column;
string lexme;
public:
Error(int line, int column, string lexme);
int getLine();
int getColumn();
string getLexme();
};
#endif

View File

@@ -1,5 +1,9 @@
#include "Lexer.h"
#include "Token.h"
#include "Error.h"
#include "Logger.h"
Lexer::Lexer(string source): source(source) {
}
@@ -8,21 +12,14 @@ vector<shared_ptr<Token>> Lexer::getTokens() {
currentLine = 0;
currentColumn = 0;
errors.clear();
vector<shared_ptr<Token>> tokens;
shared_ptr<Token> token;
do {
token = nextToken();
// Got a nullptr, shouldn't have happened
if (!token) {
cerr << "Failed to scan tokens" << endl;
exit(1);
}
// Abort scanning if we got an error
if (!token->isValid()) {
cerr << "Unexpected character '" << token->getLexme() << "' at " << token->getLine() << ":" << token->getColumn() << endl;
exit(1);
}
if (token == nullptr)
continue;
// Don't add new line as the first token
if (tokens.empty() && token->isOfKind({TokenKind::NEW_LINE}))
@@ -35,7 +32,14 @@ vector<shared_ptr<Token>> Lexer::getTokens() {
// filter out multiple new lines
if (tokens.empty() || token->getKind() != TokenKind::NEW_LINE || tokens.back()->getKind() != token->getKind())
tokens.push_back(token);
} while (token->getKind() != TokenKind::END);
} while (token == nullptr || token->getKind() != TokenKind::END);
if (!errors.empty()) {
for (shared_ptr<Error> &error : errors)
Logger::print(error);
exit(1);
}
return tokens;
}
@@ -265,7 +269,8 @@ shared_ptr<Token> Lexer::nextToken() {
if (token != nullptr)
return token;
return matchInvalid();
markError();
return nullptr;
}
shared_ptr<Token> Lexer::match(TokenKind kind, string lexme, bool needsSeparator) {
@@ -359,8 +364,10 @@ shared_ptr<Token> Lexer::matchReal() {
while (nextIndex < source.length() && isDecDigit(nextIndex))
nextIndex++;
if (!isSeparator(nextIndex))
return matchInvalid();
if (!isSeparator(nextIndex)) {
markError();
return nullptr;
}
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
shared_ptr<Token> token = make_shared<Token>(TokenKind::REAL, lexme, currentLine, currentColumn);
@@ -390,10 +397,6 @@ shared_ptr<Token> Lexer::matchEnd() {
return nullptr;
}
shared_ptr<Token> Lexer::matchInvalid() {
return make_shared<Token>(TokenKind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
}
bool Lexer::isWhiteSpace(int index) {
char character = source.at(index);
return character == ' ' || character == '\t';
@@ -451,7 +454,7 @@ bool Lexer::isSeparator(int index) {
}
}
void Lexer::advanceWithToken(shared_ptr<Token> token) {
void Lexer::advanceWithToken(shared_ptr<Token> token) {
if (token->getKind() == TokenKind::NEW_LINE) {
currentLine++;
currentColumn = 0;
@@ -459,4 +462,13 @@ bool Lexer::isSeparator(int index) {
currentColumn += token->getLexme().length();
}
currentIndex += token->getLexme().length();
}
}
void Lexer::markError() {
int startIndex = currentIndex;
do {
currentIndex++;
} while (!isSeparator(currentIndex));
errors.push_back(make_shared<Error>(currentLine, currentColumn, source.substr(startIndex, currentIndex - startIndex)));
currentIndex++;
}

View File

@@ -3,7 +3,9 @@
#include <vector>
#include "Token.h"
class Token;
enum class TokenKind;
class Error;
using namespace std;
@@ -14,6 +16,8 @@ private:
int currentLine;
int currentColumn;
vector<shared_ptr<Error>> errors;
shared_ptr<Token> nextToken();
shared_ptr<Token> match(TokenKind kind, string lexme, bool needsSeparator);
shared_ptr<Token> matchIntegerDec();
@@ -22,7 +26,6 @@ private:
shared_ptr<Token> matchReal();
shared_ptr<Token> matchIdentifier();
shared_ptr<Token> matchEnd();
shared_ptr<Token> matchInvalid();
bool isWhiteSpace(int index);
bool isDecDigit(int index);
@@ -32,6 +35,8 @@ private:
bool isSeparator(int index);
void advanceWithToken(shared_ptr<Token> token);
void markError();
public:
Lexer(string source);
vector<shared_ptr<Token>> getTokens();

View File

@@ -23,6 +23,8 @@
#include "Parser/Expression/ExpressionCall.h"
#include "Parser/Expression/ExpressionBlock.h"
#include "Error.h"
string Logger::toString(shared_ptr<Token> token) {
switch (token->getKind()) {
case TokenKind::PLUS:
@@ -330,4 +332,8 @@ void Logger::print(vector<shared_ptr<Statement>> statements) {
for (shared_ptr<Statement> &statement : statements) {
cout << toString(statement) << endl << endl;
}
}
void Logger::print(shared_ptr<Error> error) {
cout << format("Unexpected token \"{}\" at line: {}, column: {}\n", error->getLexme(), error->getLine() + 1, error->getColumn() + 1);
}

View File

@@ -25,6 +25,8 @@ class ExpressionBlock;
enum class ValueType;
class Error;
using namespace std;
class Logger {
@@ -55,6 +57,7 @@ private:
public:
static void print(vector<shared_ptr<Token>> tokens);
static void print(vector<shared_ptr<Statement>> statements);
static void print(shared_ptr<Error> error);
};
#endif