Handle comments

2025-06-02 20:16:55 +09:00
parent 5e3ef33f15
commit f5952ad3ee
2 changed files with 95 additions and 30 deletions
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@@ -20,14 +20,6 @@ vector<shared_ptr<Token>> Lexer::getTokens() {
            exit(1);
         }
        
-        currentIndex += token->getLexme().length();
-        currentColumn += token->getLexme().length();
-
-        if (token->getKind() == Token::Kind::NEW_LINE) {
-            currentLine++;
-            currentColumn = 0;
-        }
-        
        // filter out multiple new lines
        if (tokens.empty() || token->getKind() != Token::Kind::NEW_LINE || tokens.back()->getKind() != token->getKind())
            tokens.push_back(token);
@@ -35,7 +27,8 @@ vector<shared_ptr<Token>> Lexer::getTokens() {
    return tokens;
 }

-shared_ptr<Token> Lexer::nextToken() {    
+shared_ptr<Token> Lexer::nextToken() {
+    // Ignore white spaces
    while (currentIndex < source.length() && isWhiteSpace(currentIndex)) {
        currentIndex++;
        currentColumn++;
@@ -43,6 +36,69 @@ shared_ptr<Token> Lexer::nextToken() {

    shared_ptr<Token> token;

+    // ignore // comment
+    token = match(Token::Kind::INVALID, "//", false);
+    if (token) {
+        currentIndex += 2;
+        do {
+            // new line
+            token = match(Token::Kind::NEW_LINE, "\n", false);
+            if (token != nullptr)
+                return token;
+    
+            // eof
+            token = matchEnd();
+            if (token != nullptr)
+                return token;
+
+            // if either not found, go to then next character
+            currentIndex++;
+        } while(true);
+    }
+
+    // ignore /* */ comment
+    token = match(Token::Kind::INVALID, "/*", false);
+    if (token) {
+        shared_ptr<Token> newLineToken = nullptr; // we want to return the first new line we come accross
+        int depth = 1; // so we can embed comments inside each other
+        do {
+            // new line
+            token = match(Token::Kind::NEW_LINE, "\n", false);
+            newLineToken = newLineToken ? newLineToken : token;
+            if (token) {
+                continue;;
+            }
+
+            // eof
+            token = matchEnd();
+            if (token)
+                return make_shared<Token>(Token::Kind::INVALID, "", currentLine, currentColumn);
+
+            // go deeper
+            token = match(Token::Kind::INVALID, "/*", false);
+            if (token) {
+                depth++;
+                continue;
+            }
+
+            // go back
+            token = match(Token::Kind::INVALID, "*/", false);
+            if (token) {
+                depth--;
+            }
+
+            if (depth > 0) {
+                currentIndex++;
+                currentColumn++;
+            }
+        } while(depth > 0);
+
+        if (newLineToken)
+            return newLineToken;
+        else
+            return nextToken(); // gets rid of remaining white spaces without repeating the code
+    }
+
    // arithmetic
    token = match(Token::Kind::PLUS, "+", false);
    if (token != nullptr)
@@ -126,15 +182,11 @@ shared_ptr<Token> Lexer::nextToken() {
        return token;

    // new line
-    token = match(Token::Kind::NEW_LINE, "\r\n", false);
-    if (token != nullptr)
-        return token;
-
    token = match(Token::Kind::NEW_LINE, "\n", false);
    if (token != nullptr)
        return token;
    
-    // other
+    // eof
    token = matchEnd();
    if (token != nullptr)
        return token;
@@ -146,10 +198,12 @@ shared_ptr<Token> Lexer::match(Token::Kind kind, string lexme, bool needsSeparat
    bool isMatching = source.compare(currentIndex, lexme.length(), lexme) == 0;
    bool isSeparatorSatisfied = !needsSeparator || isSeparator(currentIndex + lexme.length());

-    if (isMatching && isSeparatorSatisfied)
-        return make_shared<Token>(kind, lexme, currentLine, currentColumn);
-    else
+    if (!isMatching || !isSeparatorSatisfied)
        return nullptr;
+
+    shared_ptr<Token> token = make_shared<Token>(kind, lexme, currentLine, currentColumn);
+    advanceWithToken(token);
+    return token;
 }

 shared_ptr<Token> Lexer::matchInteger() {
@@ -162,7 +216,9 @@ shared_ptr<Token> Lexer::matchInteger() {
        return nullptr;
    
    string lexme = source.substr(currentIndex, nextIndex - currentIndex);
-    return make_shared<Token>(Token::Kind::INTEGER, lexme, currentLine, currentColumn);
+    shared_ptr<Token> token = make_shared<Token>(Token::Kind::INTEGER, lexme, currentLine, currentColumn);
+    advanceWithToken(token);
+    return token;
 }

 shared_ptr<Token> Lexer::matchIdentifier() {
@@ -175,7 +231,9 @@ shared_ptr<Token> Lexer::matchIdentifier() {
        return nullptr;

    string lexme = source.substr(currentIndex, nextIndex - currentIndex);
-    return make_shared<Token>(Token::Kind::IDENTIFIER, lexme, currentLine, currentColumn);
+    shared_ptr<Token> token = make_shared<Token>(Token::Kind::IDENTIFIER, lexme, currentLine, currentColumn);
+    advanceWithToken(token);
+    return token;
 }

 shared_ptr<Token> Lexer::matchEnd() {
@@ -229,9 +287,18 @@ bool Lexer::isSeparator(int index) {
        case ' ':
        case '\t':
        case '\n':
-        case '\r';
            return true;
        default:
            return false;
    }
 }
+
+ void Lexer::advanceWithToken(shared_ptr<Token> token) {
+    if (token->getKind() == Token::Kind::NEW_LINE) {
+        currentLine++;
+        currentColumn = 0;
+    } else {
+        currentColumn += token->getLexme().length();
+    }
+    currentIndex += token->getLexme().length();
+ }
--- a/src/Lexer.h
+++ b/src/Lexer.h
@@ -14,24 +14,22 @@ private:
    int currentLine = 0;
    int currentColumn = 0;

-    Token nextToken();
-    Token matchEnd();
-    Token matchNewLine();
-    Token matchInvalid();
-    Token matchSymbol(char symbol, Token::Kind kind);
-    Token matchKeyword(string keyword, Token::Kind kind);
-    Token matchInteger();
-    Token matchIdentifier();
+    shared_ptr<Token> nextToken();
+    shared_ptr<Token> match(Token::Kind kind, string lexme, bool needsSeparator);
+    shared_ptr<Token> matchInteger();
+    shared_ptr<Token> matchIdentifier();
+    shared_ptr<Token> matchEnd();
+    shared_ptr<Token> matchInvalid();

    bool isWhiteSpace(int index);
-    bool isNewLine(int index);
    bool isDigit(int index);
    bool isIdentifier(int index);
    bool isSeparator(int index);
+    void advanceWithToken(shared_ptr<Token> token);

 public:
    Lexer(string source);
-    vector<Token> getTokens();
+    vector<shared_ptr<Token>> getTokens();
 };

 #endif