Handle comments

This commit is contained in:
Rafał Grodziński
2025-06-02 20:16:55 +09:00
parent 5e3ef33f15
commit f5952ad3ee
2 changed files with 95 additions and 30 deletions

View File

@@ -20,14 +20,6 @@ vector<shared_ptr<Token>> Lexer::getTokens() {
exit(1); exit(1);
} }
currentIndex += token->getLexme().length();
currentColumn += token->getLexme().length();
if (token->getKind() == Token::Kind::NEW_LINE) {
currentLine++;
currentColumn = 0;
}
// filter out multiple new lines // filter out multiple new lines
if (tokens.empty() || token->getKind() != Token::Kind::NEW_LINE || tokens.back()->getKind() != token->getKind()) if (tokens.empty() || token->getKind() != Token::Kind::NEW_LINE || tokens.back()->getKind() != token->getKind())
tokens.push_back(token); tokens.push_back(token);
@@ -35,7 +27,8 @@ vector<shared_ptr<Token>> Lexer::getTokens() {
return tokens; return tokens;
} }
shared_ptr<Token> Lexer::nextToken() { shared_ptr<Token> Lexer::nextToken() {
// Ignore white spaces
while (currentIndex < source.length() && isWhiteSpace(currentIndex)) { while (currentIndex < source.length() && isWhiteSpace(currentIndex)) {
currentIndex++; currentIndex++;
currentColumn++; currentColumn++;
@@ -43,6 +36,69 @@ shared_ptr<Token> Lexer::nextToken() {
shared_ptr<Token> token; shared_ptr<Token> token;
// ignore // comment
token = match(Token::Kind::INVALID, "//", false);
if (token) {
currentIndex += 2;
do {
// new line
token = match(Token::Kind::NEW_LINE, "\n", false);
if (token != nullptr)
return token;
// eof
token = matchEnd();
if (token != nullptr)
return token;
// if either not found, go to then next character
currentIndex++;
} while(true);
}
// ignore /* */ comment
token = match(Token::Kind::INVALID, "/*", false);
if (token) {
shared_ptr<Token> newLineToken = nullptr; // we want to return the first new line we come accross
int depth = 1; // so we can embed comments inside each other
do {
// new line
token = match(Token::Kind::NEW_LINE, "\n", false);
newLineToken = newLineToken ? newLineToken : token;
if (token) {
continue;;
}
// eof
token = matchEnd();
if (token)
return make_shared<Token>(Token::Kind::INVALID, "", currentLine, currentColumn);
// go deeper
token = match(Token::Kind::INVALID, "/*", false);
if (token) {
depth++;
continue;
}
// go back
token = match(Token::Kind::INVALID, "*/", false);
if (token) {
depth--;
}
if (depth > 0) {
currentIndex++;
currentColumn++;
}
} while(depth > 0);
if (newLineToken)
return newLineToken;
else
return nextToken(); // gets rid of remaining white spaces without repeating the code
}
// arithmetic // arithmetic
token = match(Token::Kind::PLUS, "+", false); token = match(Token::Kind::PLUS, "+", false);
if (token != nullptr) if (token != nullptr)
@@ -126,15 +182,11 @@ shared_ptr<Token> Lexer::nextToken() {
return token; return token;
// new line // new line
token = match(Token::Kind::NEW_LINE, "\r\n", false);
if (token != nullptr)
return token;
token = match(Token::Kind::NEW_LINE, "\n", false); token = match(Token::Kind::NEW_LINE, "\n", false);
if (token != nullptr) if (token != nullptr)
return token; return token;
// other // eof
token = matchEnd(); token = matchEnd();
if (token != nullptr) if (token != nullptr)
return token; return token;
@@ -146,10 +198,12 @@ shared_ptr<Token> Lexer::match(Token::Kind kind, string lexme, bool needsSeparat
bool isMatching = source.compare(currentIndex, lexme.length(), lexme) == 0; bool isMatching = source.compare(currentIndex, lexme.length(), lexme) == 0;
bool isSeparatorSatisfied = !needsSeparator || isSeparator(currentIndex + lexme.length()); bool isSeparatorSatisfied = !needsSeparator || isSeparator(currentIndex + lexme.length());
if (isMatching && isSeparatorSatisfied) if (!isMatching || !isSeparatorSatisfied)
return make_shared<Token>(kind, lexme, currentLine, currentColumn);
else
return nullptr; return nullptr;
shared_ptr<Token> token = make_shared<Token>(kind, lexme, currentLine, currentColumn);
advanceWithToken(token);
return token;
} }
shared_ptr<Token> Lexer::matchInteger() { shared_ptr<Token> Lexer::matchInteger() {
@@ -162,7 +216,9 @@ shared_ptr<Token> Lexer::matchInteger() {
return nullptr; return nullptr;
string lexme = source.substr(currentIndex, nextIndex - currentIndex); string lexme = source.substr(currentIndex, nextIndex - currentIndex);
return make_shared<Token>(Token::Kind::INTEGER, lexme, currentLine, currentColumn); shared_ptr<Token> token = make_shared<Token>(Token::Kind::INTEGER, lexme, currentLine, currentColumn);
advanceWithToken(token);
return token;
} }
shared_ptr<Token> Lexer::matchIdentifier() { shared_ptr<Token> Lexer::matchIdentifier() {
@@ -175,7 +231,9 @@ shared_ptr<Token> Lexer::matchIdentifier() {
return nullptr; return nullptr;
string lexme = source.substr(currentIndex, nextIndex - currentIndex); string lexme = source.substr(currentIndex, nextIndex - currentIndex);
return make_shared<Token>(Token::Kind::IDENTIFIER, lexme, currentLine, currentColumn); shared_ptr<Token> token = make_shared<Token>(Token::Kind::IDENTIFIER, lexme, currentLine, currentColumn);
advanceWithToken(token);
return token;
} }
shared_ptr<Token> Lexer::matchEnd() { shared_ptr<Token> Lexer::matchEnd() {
@@ -229,9 +287,18 @@ bool Lexer::isSeparator(int index) {
case ' ': case ' ':
case '\t': case '\t':
case '\n': case '\n':
case '\r';
return true; return true;
default: default:
return false; return false;
} }
} }
void Lexer::advanceWithToken(shared_ptr<Token> token) {
if (token->getKind() == Token::Kind::NEW_LINE) {
currentLine++;
currentColumn = 0;
} else {
currentColumn += token->getLexme().length();
}
currentIndex += token->getLexme().length();
}

View File

@@ -14,24 +14,22 @@ private:
int currentLine = 0; int currentLine = 0;
int currentColumn = 0; int currentColumn = 0;
Token nextToken(); shared_ptr<Token> nextToken();
Token matchEnd(); shared_ptr<Token> match(Token::Kind kind, string lexme, bool needsSeparator);
Token matchNewLine(); shared_ptr<Token> matchInteger();
Token matchInvalid(); shared_ptr<Token> matchIdentifier();
Token matchSymbol(char symbol, Token::Kind kind); shared_ptr<Token> matchEnd();
Token matchKeyword(string keyword, Token::Kind kind); shared_ptr<Token> matchInvalid();
Token matchInteger();
Token matchIdentifier();
bool isWhiteSpace(int index); bool isWhiteSpace(int index);
bool isNewLine(int index);
bool isDigit(int index); bool isDigit(int index);
bool isIdentifier(int index); bool isIdentifier(int index);
bool isSeparator(int index); bool isSeparator(int index);
void advanceWithToken(shared_ptr<Token> token);
public: public:
Lexer(string source); Lexer(string source);
vector<Token> getTokens(); vector<shared_ptr<Token>> getTokens();
}; };
#endif #endif