Scan function

2025-06-01 10:22:42 +09:00
parent 61e648e55b
commit 05e3def411
4 changed files with 82 additions and 17 deletions
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@@ -84,13 +84,25 @@ Token Lexer::nextToken() {
    }

    {
-        Token token =matchSymbol('.', Token::Kind::DOT);
+        Token token = matchSymbol(':', Token::Kind::COLON);
        if (token.isValid())
            return token;
    }

    {
-        Token token = matchSymbol(',', Token::Kind::COMMA);
+        Token token = matchSymbol(';', Token::Kind::SEMICOLON);
+        if (token.isValid())
+            return token;
+    }
+
+    {
+        Token token = matchKeyword("fun", Token::Kind::FUNCTION);
+        if (token.isValid())
+            return token;
+    }
+
+    {
+        Token token = matchKeyword("ret", Token::Kind::RETURN);
        if (token.isValid())
            return token;
    }
@@ -102,7 +114,7 @@ Token Lexer::nextToken() {
    }

    {
-        Token token = matchKeyword("fun", Token::Kind::FUNCTION);
+        Token token = matchIdentifier();
        if (token.isValid())
            return token;
    }
@@ -137,27 +149,39 @@ Token Lexer::matchSymbol(char symbol, Token::Kind kind) {
    return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
 }

+Token Lexer::matchKeyword(string keyword, Token::Kind kind) {
+    bool isMatching = source.compare(currentIndex, keyword.length(), keyword) == 0;
+
+    if (isMatching && isSeparator(currentIndex + keyword.length()))
+        return Token(kind, keyword, currentLine, currentColumn);
+    else
+        return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
+}
+
 Token Lexer::matchInteger() {
    int nextIndex = currentIndex;

    while (nextIndex < source.length() && isDigit(nextIndex))
        nextIndex++;
    
-    if (nextIndex == currentIndex)
+    if (nextIndex == currentIndex || !isSeparator(nextIndex))
        return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
    
    string lexme = source.substr(currentIndex, nextIndex - currentIndex);
    return Token(Token::Kind::INTEGER, lexme, currentLine, currentColumn);
 }

-Token Lexer::matchKeyword(string keyword, Token::Kind kind) {
-    bool isMatching = source.compare(currentIndex, keyword.length(), keyword) == 0;
-    bool isSeparated = (currentIndex + keyword.length() >= source.length()) || isWhiteSpace(currentIndex + keyword.length()) || isNewLine(currentIndex + keyword.length());
+Token Lexer::matchIdentifier() {
+    int nextIndex = currentIndex;

-    if (isMatching && isSeparated)
-        return Token(Token::Kind::FUNCTION, keyword, currentLine, currentColumn);
-    else
+    while (nextIndex < source.length() && isIdentifier(nextIndex))
+        nextIndex++;
+
+    if (nextIndex == currentIndex || !isSeparator(nextIndex))
        return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
+
+    string lexme = source.substr(currentIndex, nextIndex - currentIndex);
+    return Token(Token::Kind::IDENTIFIER, lexme, currentLine, currentColumn);
 }

 Token Lexer::matchInvalid() {
@@ -178,3 +202,35 @@ bool Lexer::isDigit(int index) {
    char character = source.at(index);
    return character >= '0' && character <= '9';
 }
+
+bool Lexer::isIdentifier(int index) {
+    char character = source.at(index);
+    bool isDigit = character >= '0' && character <= '9';
+    bool isAlpha = character >= 'a' && character <= 'z' || character >= 'A' && character <= 'Z';
+    bool isAlowedSymbol = character == '_';
+
+    return isDigit || isAlpha || isAlowedSymbol;
+}
+
+bool Lexer::isSeparator(int index) {
+    if (index >= source.length())
+        return true;
+
+    char character = source.at(index);
+    switch (character) {
+        case '+':
+        case '-':
+        case '*':
+        case '/':
+        case '%':
+        case '(':
+        case ')':
+        case ':':
+        case ' ':
+        case '\t':
+        case '\n':
+            return true;
+        default:
+            return false;
+    }
+}
--- a/src/Lexer.h
+++ b/src/Lexer.h
@@ -19,12 +19,15 @@ private:
    Token matchNewLine();
    Token matchInvalid();
    Token matchSymbol(char symbol, Token::Kind kind);
-    Token matchInteger();
    Token matchKeyword(string keyword, Token::Kind kind);
+    Token matchInteger();
+    Token matchIdentifier();

    bool isWhiteSpace(int index);
    bool isNewLine(int index);
    bool isDigit(int index);
+    bool isIdentifier(int index);
+    bool isSeparator(int index);

 public:
    Lexer(string source);
--- a/src/Token.cpp
+++ b/src/Token.cpp
@@ -48,14 +48,18 @@ string Token::toString() {
            return "LEFT_PAREN";
        case RIGHT_PAREN:
            return "RIGHT_PAREN";
-        case DOT:
-            return "DOT";
-        case COMMA:
-            return "COMMA";
+        case COLON:
+            return "COLON";
+        case SEMICOLON:
+            return "SEMICOLON";
        case INTEGER:
            return "INTEGER";
+        case IDENTIFIER:
+            return "IDENTIFIER";
        case FUNCTION:
            return "FUNCTION";
+        case RETURN:
+            return "RETURN";
        case NEW_LINE:
            return "NEW_LINE";
        case END:
--- a/src/Token.h
+++ b/src/Token.h
@@ -16,12 +16,14 @@ public:
        
        LEFT_PAREN,
        RIGHT_PAREN,
-        DOT,
-        COMMA,
+        COLON,
+        SEMICOLON,

        INTEGER,
+        IDENTIFIER,

        FUNCTION,
+        RETURN,

        NEW_LINE,
        END,