Scan function

This commit is contained in:
Rafał Grodziński
2025-06-01 10:22:42 +09:00
parent 61e648e55b
commit 05e3def411
4 changed files with 82 additions and 17 deletions

View File

@@ -84,13 +84,25 @@ Token Lexer::nextToken() {
} }
{ {
Token token =matchSymbol('.', Token::Kind::DOT); Token token = matchSymbol(':', Token::Kind::COLON);
if (token.isValid()) if (token.isValid())
return token; return token;
} }
{ {
Token token = matchSymbol(',', Token::Kind::COMMA); Token token = matchSymbol(';', Token::Kind::SEMICOLON);
if (token.isValid())
return token;
}
{
Token token = matchKeyword("fun", Token::Kind::FUNCTION);
if (token.isValid())
return token;
}
{
Token token = matchKeyword("ret", Token::Kind::RETURN);
if (token.isValid()) if (token.isValid())
return token; return token;
} }
@@ -102,7 +114,7 @@ Token Lexer::nextToken() {
} }
{ {
Token token = matchKeyword("fun", Token::Kind::FUNCTION); Token token = matchIdentifier();
if (token.isValid()) if (token.isValid())
return token; return token;
} }
@@ -137,27 +149,39 @@ Token Lexer::matchSymbol(char symbol, Token::Kind kind) {
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
} }
Token Lexer::matchKeyword(string keyword, Token::Kind kind) {
bool isMatching = source.compare(currentIndex, keyword.length(), keyword) == 0;
if (isMatching && isSeparator(currentIndex + keyword.length()))
return Token(kind, keyword, currentLine, currentColumn);
else
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
}
Token Lexer::matchInteger() { Token Lexer::matchInteger() {
int nextIndex = currentIndex; int nextIndex = currentIndex;
while (nextIndex < source.length() && isDigit(nextIndex)) while (nextIndex < source.length() && isDigit(nextIndex))
nextIndex++; nextIndex++;
if (nextIndex == currentIndex) if (nextIndex == currentIndex || !isSeparator(nextIndex))
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
string lexme = source.substr(currentIndex, nextIndex - currentIndex); string lexme = source.substr(currentIndex, nextIndex - currentIndex);
return Token(Token::Kind::INTEGER, lexme, currentLine, currentColumn); return Token(Token::Kind::INTEGER, lexme, currentLine, currentColumn);
} }
Token Lexer::matchKeyword(string keyword, Token::Kind kind) { Token Lexer::matchIdentifier() {
bool isMatching = source.compare(currentIndex, keyword.length(), keyword) == 0; int nextIndex = currentIndex;
bool isSeparated = (currentIndex + keyword.length() >= source.length()) || isWhiteSpace(currentIndex + keyword.length()) || isNewLine(currentIndex + keyword.length());
if (isMatching && isSeparated) while (nextIndex < source.length() && isIdentifier(nextIndex))
return Token(Token::Kind::FUNCTION, keyword, currentLine, currentColumn); nextIndex++;
else
if (nextIndex == currentIndex || !isSeparator(nextIndex))
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
return Token(Token::Kind::IDENTIFIER, lexme, currentLine, currentColumn);
} }
Token Lexer::matchInvalid() { Token Lexer::matchInvalid() {
@@ -178,3 +202,35 @@ bool Lexer::isDigit(int index) {
char character = source.at(index); char character = source.at(index);
return character >= '0' && character <= '9'; return character >= '0' && character <= '9';
} }
bool Lexer::isIdentifier(int index) {
char character = source.at(index);
bool isDigit = character >= '0' && character <= '9';
bool isAlpha = character >= 'a' && character <= 'z' || character >= 'A' && character <= 'Z';
bool isAlowedSymbol = character == '_';
return isDigit || isAlpha || isAlowedSymbol;
}
bool Lexer::isSeparator(int index) {
if (index >= source.length())
return true;
char character = source.at(index);
switch (character) {
case '+':
case '-':
case '*':
case '/':
case '%':
case '(':
case ')':
case ':':
case ' ':
case '\t':
case '\n':
return true;
default:
return false;
}
}

View File

@@ -19,12 +19,15 @@ private:
Token matchNewLine(); Token matchNewLine();
Token matchInvalid(); Token matchInvalid();
Token matchSymbol(char symbol, Token::Kind kind); Token matchSymbol(char symbol, Token::Kind kind);
Token matchInteger();
Token matchKeyword(string keyword, Token::Kind kind); Token matchKeyword(string keyword, Token::Kind kind);
Token matchInteger();
Token matchIdentifier();
bool isWhiteSpace(int index); bool isWhiteSpace(int index);
bool isNewLine(int index); bool isNewLine(int index);
bool isDigit(int index); bool isDigit(int index);
bool isIdentifier(int index);
bool isSeparator(int index);
public: public:
Lexer(string source); Lexer(string source);

View File

@@ -48,14 +48,18 @@ string Token::toString() {
return "LEFT_PAREN"; return "LEFT_PAREN";
case RIGHT_PAREN: case RIGHT_PAREN:
return "RIGHT_PAREN"; return "RIGHT_PAREN";
case DOT: case COLON:
return "DOT"; return "COLON";
case COMMA: case SEMICOLON:
return "COMMA"; return "SEMICOLON";
case INTEGER: case INTEGER:
return "INTEGER"; return "INTEGER";
case IDENTIFIER:
return "IDENTIFIER";
case FUNCTION: case FUNCTION:
return "FUNCTION"; return "FUNCTION";
case RETURN:
return "RETURN";
case NEW_LINE: case NEW_LINE:
return "NEW_LINE"; return "NEW_LINE";
case END: case END:

View File

@@ -16,12 +16,14 @@ public:
LEFT_PAREN, LEFT_PAREN,
RIGHT_PAREN, RIGHT_PAREN,
DOT, COLON,
COMMA, SEMICOLON,
INTEGER, INTEGER,
IDENTIFIER,
FUNCTION, FUNCTION,
RETURN,
NEW_LINE, NEW_LINE,
END, END,