Better error handling in lexer
This commit is contained in:
16
src/Error.cpp
Normal file
16
src/Error.cpp
Normal file
@@ -0,0 +1,16 @@
|
||||
#include "Error.h"
|
||||
|
||||
Error::Error(int line, int column, string lexme) :
|
||||
line(line), column(column), lexme(lexme) { }
|
||||
|
||||
int Error::getLine() {
|
||||
return line;
|
||||
}
|
||||
|
||||
int Error::getColumn() {
|
||||
return column;
|
||||
}
|
||||
|
||||
string Error::getLexme() {
|
||||
return lexme;
|
||||
}
|
||||
21
src/Error.h
Normal file
21
src/Error.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef ERROR_H
|
||||
#define ERROR_H
|
||||
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
class Error {
|
||||
private:
|
||||
int line;
|
||||
int column;
|
||||
string lexme;
|
||||
|
||||
public:
|
||||
Error(int line, int column, string lexme);
|
||||
int getLine();
|
||||
int getColumn();
|
||||
string getLexme();
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,5 +1,9 @@
|
||||
#include "Lexer.h"
|
||||
|
||||
#include "Token.h"
|
||||
#include "Error.h"
|
||||
#include "Logger.h"
|
||||
|
||||
Lexer::Lexer(string source): source(source) {
|
||||
}
|
||||
|
||||
@@ -8,21 +12,14 @@ vector<shared_ptr<Token>> Lexer::getTokens() {
|
||||
currentLine = 0;
|
||||
currentColumn = 0;
|
||||
|
||||
errors.clear();
|
||||
|
||||
vector<shared_ptr<Token>> tokens;
|
||||
shared_ptr<Token> token;
|
||||
do {
|
||||
token = nextToken();
|
||||
// Got a nullptr, shouldn't have happened
|
||||
if (!token) {
|
||||
cerr << "Failed to scan tokens" << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Abort scanning if we got an error
|
||||
if (!token->isValid()) {
|
||||
cerr << "Unexpected character '" << token->getLexme() << "' at " << token->getLine() << ":" << token->getColumn() << endl;
|
||||
exit(1);
|
||||
}
|
||||
if (token == nullptr)
|
||||
continue;
|
||||
|
||||
// Don't add new line as the first token
|
||||
if (tokens.empty() && token->isOfKind({TokenKind::NEW_LINE}))
|
||||
@@ -35,7 +32,14 @@ vector<shared_ptr<Token>> Lexer::getTokens() {
|
||||
// filter out multiple new lines
|
||||
if (tokens.empty() || token->getKind() != TokenKind::NEW_LINE || tokens.back()->getKind() != token->getKind())
|
||||
tokens.push_back(token);
|
||||
} while (token->getKind() != TokenKind::END);
|
||||
} while (token == nullptr || token->getKind() != TokenKind::END);
|
||||
|
||||
if (!errors.empty()) {
|
||||
for (shared_ptr<Error> &error : errors)
|
||||
Logger::print(error);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
@@ -265,7 +269,8 @@ shared_ptr<Token> Lexer::nextToken() {
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
return matchInvalid();
|
||||
markError();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
shared_ptr<Token> Lexer::match(TokenKind kind, string lexme, bool needsSeparator) {
|
||||
@@ -359,8 +364,10 @@ shared_ptr<Token> Lexer::matchReal() {
|
||||
while (nextIndex < source.length() && isDecDigit(nextIndex))
|
||||
nextIndex++;
|
||||
|
||||
if (!isSeparator(nextIndex))
|
||||
return matchInvalid();
|
||||
if (!isSeparator(nextIndex)) {
|
||||
markError();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
|
||||
shared_ptr<Token> token = make_shared<Token>(TokenKind::REAL, lexme, currentLine, currentColumn);
|
||||
@@ -390,10 +397,6 @@ shared_ptr<Token> Lexer::matchEnd() {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
shared_ptr<Token> Lexer::matchInvalid() {
|
||||
return make_shared<Token>(TokenKind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
|
||||
}
|
||||
|
||||
bool Lexer::isWhiteSpace(int index) {
|
||||
char character = source.at(index);
|
||||
return character == ' ' || character == '\t';
|
||||
@@ -451,7 +454,7 @@ bool Lexer::isSeparator(int index) {
|
||||
}
|
||||
}
|
||||
|
||||
void Lexer::advanceWithToken(shared_ptr<Token> token) {
|
||||
void Lexer::advanceWithToken(shared_ptr<Token> token) {
|
||||
if (token->getKind() == TokenKind::NEW_LINE) {
|
||||
currentLine++;
|
||||
currentColumn = 0;
|
||||
@@ -459,4 +462,13 @@ bool Lexer::isSeparator(int index) {
|
||||
currentColumn += token->getLexme().length();
|
||||
}
|
||||
currentIndex += token->getLexme().length();
|
||||
}
|
||||
}
|
||||
|
||||
void Lexer::markError() {
|
||||
int startIndex = currentIndex;
|
||||
do {
|
||||
currentIndex++;
|
||||
} while (!isSeparator(currentIndex));
|
||||
errors.push_back(make_shared<Error>(currentLine, currentColumn, source.substr(startIndex, currentIndex - startIndex)));
|
||||
currentIndex++;
|
||||
}
|
||||
|
||||
@@ -3,7 +3,9 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "Token.h"
|
||||
class Token;
|
||||
enum class TokenKind;
|
||||
class Error;
|
||||
|
||||
using namespace std;
|
||||
|
||||
@@ -14,6 +16,8 @@ private:
|
||||
int currentLine;
|
||||
int currentColumn;
|
||||
|
||||
vector<shared_ptr<Error>> errors;
|
||||
|
||||
shared_ptr<Token> nextToken();
|
||||
shared_ptr<Token> match(TokenKind kind, string lexme, bool needsSeparator);
|
||||
shared_ptr<Token> matchIntegerDec();
|
||||
@@ -22,7 +26,6 @@ private:
|
||||
shared_ptr<Token> matchReal();
|
||||
shared_ptr<Token> matchIdentifier();
|
||||
shared_ptr<Token> matchEnd();
|
||||
shared_ptr<Token> matchInvalid();
|
||||
|
||||
bool isWhiteSpace(int index);
|
||||
bool isDecDigit(int index);
|
||||
@@ -32,6 +35,8 @@ private:
|
||||
bool isSeparator(int index);
|
||||
void advanceWithToken(shared_ptr<Token> token);
|
||||
|
||||
void markError();
|
||||
|
||||
public:
|
||||
Lexer(string source);
|
||||
vector<shared_ptr<Token>> getTokens();
|
||||
|
||||
@@ -23,6 +23,8 @@
|
||||
#include "Parser/Expression/ExpressionCall.h"
|
||||
#include "Parser/Expression/ExpressionBlock.h"
|
||||
|
||||
#include "Error.h"
|
||||
|
||||
string Logger::toString(shared_ptr<Token> token) {
|
||||
switch (token->getKind()) {
|
||||
case TokenKind::PLUS:
|
||||
@@ -330,4 +332,8 @@ void Logger::print(vector<shared_ptr<Statement>> statements) {
|
||||
for (shared_ptr<Statement> &statement : statements) {
|
||||
cout << toString(statement) << endl << endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::print(shared_ptr<Error> error) {
|
||||
cout << format("Unexpected token \"{}\" at line: {}, column: {}\n", error->getLexme(), error->getLine() + 1, error->getColumn() + 1);
|
||||
}
|
||||
@@ -25,6 +25,8 @@ class ExpressionBlock;
|
||||
|
||||
enum class ValueType;
|
||||
|
||||
class Error;
|
||||
|
||||
using namespace std;
|
||||
|
||||
class Logger {
|
||||
@@ -55,6 +57,7 @@ private:
|
||||
public:
|
||||
static void print(vector<shared_ptr<Token>> tokens);
|
||||
static void print(vector<shared_ptr<Statement>> statements);
|
||||
static void print(shared_ptr<Error> error);
|
||||
};
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user