diff --git a/src/Error.cpp b/src/Error.cpp new file mode 100644 index 0000000..c1a508e --- /dev/null +++ b/src/Error.cpp @@ -0,0 +1,16 @@ +#include "Error.h" + +Error::Error(int line, int column, string lexme) : +line(line), column(column), lexme(lexme) { } + +int Error::getLine() { + return line; +} + +int Error::getColumn() { + return column; +} + +string Error::getLexme() { + return lexme; +} \ No newline at end of file diff --git a/src/Error.h b/src/Error.h new file mode 100644 index 0000000..4770af7 --- /dev/null +++ b/src/Error.h @@ -0,0 +1,21 @@ +#ifndef ERROR_H +#define ERROR_H + +#include + +using namespace std; + +class Error { +private: + int line; + int column; + string lexme; + +public: + Error(int line, int column, string lexme); + int getLine(); + int getColumn(); + string getLexme(); +}; + +#endif \ No newline at end of file diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index d017d6f..c808cb2 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -1,5 +1,9 @@ #include "Lexer.h" +#include "Token.h" +#include "Error.h" +#include "Logger.h" + Lexer::Lexer(string source): source(source) { } @@ -8,21 +12,14 @@ vector> Lexer::getTokens() { currentLine = 0; currentColumn = 0; + errors.clear(); + vector> tokens; shared_ptr token; do { token = nextToken(); - // Got a nullptr, shouldn't have happened - if (!token) { - cerr << "Failed to scan tokens" << endl; - exit(1); - } - - // Abort scanning if we got an error - if (!token->isValid()) { - cerr << "Unexpected character '" << token->getLexme() << "' at " << token->getLine() << ":" << token->getColumn() << endl; - exit(1); - } + if (token == nullptr) + continue; // Don't add new line as the first token if (tokens.empty() && token->isOfKind({TokenKind::NEW_LINE})) @@ -35,7 +32,14 @@ vector> Lexer::getTokens() { // filter out multiple new lines if (tokens.empty() || token->getKind() != TokenKind::NEW_LINE || tokens.back()->getKind() != token->getKind()) tokens.push_back(token); - } while (token->getKind() != TokenKind::END); + } while (token == nullptr || token->getKind() != TokenKind::END); + + if (!errors.empty()) { + for (shared_ptr &error : errors) + Logger::print(error); + exit(1); + } + return tokens; } @@ -265,7 +269,8 @@ shared_ptr Lexer::nextToken() { if (token != nullptr) return token; - return matchInvalid(); + markError(); + return nullptr; } shared_ptr Lexer::match(TokenKind kind, string lexme, bool needsSeparator) { @@ -359,8 +364,10 @@ shared_ptr Lexer::matchReal() { while (nextIndex < source.length() && isDecDigit(nextIndex)) nextIndex++; - if (!isSeparator(nextIndex)) - return matchInvalid(); + if (!isSeparator(nextIndex)) { + markError(); + return nullptr; + } string lexme = source.substr(currentIndex, nextIndex - currentIndex); shared_ptr token = make_shared(TokenKind::REAL, lexme, currentLine, currentColumn); @@ -390,10 +397,6 @@ shared_ptr Lexer::matchEnd() { return nullptr; } -shared_ptr Lexer::matchInvalid() { - return make_shared(TokenKind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); -} - bool Lexer::isWhiteSpace(int index) { char character = source.at(index); return character == ' ' || character == '\t'; @@ -451,7 +454,7 @@ bool Lexer::isSeparator(int index) { } } - void Lexer::advanceWithToken(shared_ptr token) { +void Lexer::advanceWithToken(shared_ptr token) { if (token->getKind() == TokenKind::NEW_LINE) { currentLine++; currentColumn = 0; @@ -459,4 +462,13 @@ bool Lexer::isSeparator(int index) { currentColumn += token->getLexme().length(); } currentIndex += token->getLexme().length(); - } +} + +void Lexer::markError() { + int startIndex = currentIndex; + do { + currentIndex++; + } while (!isSeparator(currentIndex)); + errors.push_back(make_shared(currentLine, currentColumn, source.substr(startIndex, currentIndex - startIndex))); + currentIndex++; +} diff --git a/src/Lexer/Lexer.h b/src/Lexer/Lexer.h index c117b08..297616d 100644 --- a/src/Lexer/Lexer.h +++ b/src/Lexer/Lexer.h @@ -3,7 +3,9 @@ #include -#include "Token.h" +class Token; +enum class TokenKind; +class Error; using namespace std; @@ -14,6 +16,8 @@ private: int currentLine; int currentColumn; + vector> errors; + shared_ptr nextToken(); shared_ptr match(TokenKind kind, string lexme, bool needsSeparator); shared_ptr matchIntegerDec(); @@ -22,7 +26,6 @@ private: shared_ptr matchReal(); shared_ptr matchIdentifier(); shared_ptr matchEnd(); - shared_ptr matchInvalid(); bool isWhiteSpace(int index); bool isDecDigit(int index); @@ -32,6 +35,8 @@ private: bool isSeparator(int index); void advanceWithToken(shared_ptr token); + void markError(); + public: Lexer(string source); vector> getTokens(); diff --git a/src/Logger.cpp b/src/Logger.cpp index fd438d8..edd28f7 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -23,6 +23,8 @@ #include "Parser/Expression/ExpressionCall.h" #include "Parser/Expression/ExpressionBlock.h" +#include "Error.h" + string Logger::toString(shared_ptr token) { switch (token->getKind()) { case TokenKind::PLUS: @@ -330,4 +332,8 @@ void Logger::print(vector> statements) { for (shared_ptr &statement : statements) { cout << toString(statement) << endl << endl; } +} + +void Logger::print(shared_ptr error) { + cout << format("Unexpected token \"{}\" at line: {}, column: {}\n", error->getLexme(), error->getLine() + 1, error->getColumn() + 1); } \ No newline at end of file diff --git a/src/Logger.h b/src/Logger.h index cc71037..a135bf2 100644 --- a/src/Logger.h +++ b/src/Logger.h @@ -25,6 +25,8 @@ class ExpressionBlock; enum class ValueType; +class Error; + using namespace std; class Logger { @@ -55,6 +57,7 @@ private: public: static void print(vector> tokens); static void print(vector> statements); + static void print(shared_ptr error); }; #endif \ No newline at end of file