Better token errors handling

This commit is contained in:
Rafał Grodziński
2025-07-03 15:08:23 +09:00
parent 36a89a811a
commit d9ce92c9e0

View File

@@ -18,20 +18,19 @@ vector<shared_ptr<Token>> Lexer::getTokens() {
shared_ptr<Token> token; shared_ptr<Token> token;
do { do {
token = nextToken(); token = nextToken();
if (token == nullptr) if (token != nullptr) {
continue; // Don't add new line as the first token
if (tokens.empty() && token->isOfKind({TokenKind::NEW_LINE}))
continue;
// Insert an additional new line just before end
if (token->getKind() == TokenKind::END && tokens.back()->getKind() != TokenKind::NEW_LINE)
tokens.push_back(make_shared<Token>(TokenKind::NEW_LINE, "\n", token->getLine(), token->getColumn()));
// Don't add new line as the first token // filter out multiple new lines
if (tokens.empty() && token->isOfKind({TokenKind::NEW_LINE})) if (tokens.empty() || token->getKind() != TokenKind::NEW_LINE || tokens.back()->getKind() != token->getKind())
continue; tokens.push_back(token);
}
// Insert an additional new line just before end
if (token->getKind() == TokenKind::END && tokens.back()->getKind() != TokenKind::NEW_LINE)
tokens.push_back(make_shared<Token>(TokenKind::NEW_LINE, "\n", token->getLine(), token->getColumn()));
// filter out multiple new lines
if (tokens.empty() || token->getKind() != TokenKind::NEW_LINE || tokens.back()->getKind() != token->getKind())
tokens.push_back(token);
} while (token == nullptr || token->getKind() != TokenKind::END); } while (token == nullptr || token->getKind() != TokenKind::END);
if (!errors.empty()) { if (!errors.empty()) {
@@ -53,7 +52,7 @@ shared_ptr<Token> Lexer::nextToken() {
shared_ptr<Token> token; shared_ptr<Token> token;
// ignore // comment // ignore // comment
token = match(TokenKind::INVALID, "//", false); token = match(TokenKind::END, "//", false); // dummy token kind
if (token) { if (token) {
currentIndex += 2; currentIndex += 2;
do { do {
@@ -73,7 +72,7 @@ shared_ptr<Token> Lexer::nextToken() {
} }
// ignore /* */ comment // ignore /* */ comment
token = match(TokenKind::INVALID, "/*", false); token = match(TokenKind::END, "/*", false); // dummy token kind
if (token) { if (token) {
shared_ptr<Token> newLineToken = nullptr; // we want to return the first new line we come accross shared_ptr<Token> newLineToken = nullptr; // we want to return the first new line we come accross
int depth = 1; // so we can embed comments inside each other int depth = 1; // so we can embed comments inside each other
@@ -82,23 +81,25 @@ shared_ptr<Token> Lexer::nextToken() {
token = match(TokenKind::NEW_LINE, "\n", false); token = match(TokenKind::NEW_LINE, "\n", false);
newLineToken = newLineToken ? newLineToken : token; newLineToken = newLineToken ? newLineToken : token;
if (token) { if (token) {
continue;; continue;
} }
// eof // eof
token = matchEnd(); token = matchEnd();
if (token) if (token) {
return make_shared<Token>(TokenKind::INVALID, "", currentLine, currentColumn); markError();
return token;
}
// go deeper // go deeper
token = match(TokenKind::INVALID, "/*", false); token = match(TokenKind::END, "/*", false); // dummy token kind
if (token) { if (token) {
depth++; depth++;
continue; continue;
} }
// go back // go back
token = match(TokenKind::INVALID, "*/", false); token = match(TokenKind::END, "*/", false); // dummy token kind
if (token) { if (token) {
depth--; depth--;
} }
@@ -274,6 +275,9 @@ shared_ptr<Token> Lexer::nextToken() {
} }
shared_ptr<Token> Lexer::match(TokenKind kind, string lexme, bool needsSeparator) { shared_ptr<Token> Lexer::match(TokenKind kind, string lexme, bool needsSeparator) {
if (currentIndex + lexme.length() > source.length())
return nullptr;
bool isMatching = source.compare(currentIndex, lexme.length(), lexme) == 0; bool isMatching = source.compare(currentIndex, lexme.length(), lexme) == 0;
bool isSeparatorSatisfied = !needsSeparator || isSeparator(currentIndex + lexme.length()); bool isSeparatorSatisfied = !needsSeparator || isSeparator(currentIndex + lexme.length());
@@ -466,9 +470,16 @@ void Lexer::advanceWithToken(shared_ptr<Token> token) {
void Lexer::markError() { void Lexer::markError() {
int startIndex = currentIndex; int startIndex = currentIndex;
do { int startColumn = currentColumn;
currentIndex++; string lexme;
} while (!isSeparator(currentIndex)); if (currentIndex < source.length()) {
errors.push_back(make_shared<Error>(currentLine, currentColumn, source.substr(startIndex, currentIndex - startIndex))); do {
currentIndex++; currentIndex++;
currentColumn++;
} while (!isSeparator(currentIndex));
lexme = source.substr(startIndex, currentIndex - startIndex);
} else {
lexme = "EOF";
}
errors.push_back(make_shared<Error>(currentLine, startColumn, lexme));
} }