From 05e3def411e23f8ac8a598ecaf939fa1fc5d09d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Sun, 1 Jun 2025 10:22:42 +0900 Subject: [PATCH] Scan function --- src/Lexer.cpp | 76 ++++++++++++++++++++++++++++++++++++++++++++------- src/Lexer.h | 5 +++- src/Token.cpp | 12 +++++--- src/Token.h | 6 ++-- 4 files changed, 82 insertions(+), 17 deletions(-) diff --git a/src/Lexer.cpp b/src/Lexer.cpp index a6a68c8..5050068 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -84,13 +84,25 @@ Token Lexer::nextToken() { } { - Token token =matchSymbol('.', Token::Kind::DOT); + Token token = matchSymbol(':', Token::Kind::COLON); if (token.isValid()) return token; } { - Token token = matchSymbol(',', Token::Kind::COMMA); + Token token = matchSymbol(';', Token::Kind::SEMICOLON); + if (token.isValid()) + return token; + } + + { + Token token = matchKeyword("fun", Token::Kind::FUNCTION); + if (token.isValid()) + return token; + } + + { + Token token = matchKeyword("ret", Token::Kind::RETURN); if (token.isValid()) return token; } @@ -102,7 +114,7 @@ Token Lexer::nextToken() { } { - Token token = matchKeyword("fun", Token::Kind::FUNCTION); + Token token = matchIdentifier(); if (token.isValid()) return token; } @@ -137,27 +149,39 @@ Token Lexer::matchSymbol(char symbol, Token::Kind kind) { return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); } +Token Lexer::matchKeyword(string keyword, Token::Kind kind) { + bool isMatching = source.compare(currentIndex, keyword.length(), keyword) == 0; + + if (isMatching && isSeparator(currentIndex + keyword.length())) + return Token(kind, keyword, currentLine, currentColumn); + else + return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); +} + Token Lexer::matchInteger() { int nextIndex = currentIndex; while (nextIndex < source.length() && isDigit(nextIndex)) nextIndex++; - if (nextIndex == currentIndex) + if (nextIndex == currentIndex || !isSeparator(nextIndex)) return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); string lexme = source.substr(currentIndex, nextIndex - currentIndex); return Token(Token::Kind::INTEGER, lexme, currentLine, currentColumn); } -Token Lexer::matchKeyword(string keyword, Token::Kind kind) { - bool isMatching = source.compare(currentIndex, keyword.length(), keyword) == 0; - bool isSeparated = (currentIndex + keyword.length() >= source.length()) || isWhiteSpace(currentIndex + keyword.length()) || isNewLine(currentIndex + keyword.length()); +Token Lexer::matchIdentifier() { + int nextIndex = currentIndex; - if (isMatching && isSeparated) - return Token(Token::Kind::FUNCTION, keyword, currentLine, currentColumn); - else + while (nextIndex < source.length() && isIdentifier(nextIndex)) + nextIndex++; + + if (nextIndex == currentIndex || !isSeparator(nextIndex)) return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn); + + string lexme = source.substr(currentIndex, nextIndex - currentIndex); + return Token(Token::Kind::IDENTIFIER, lexme, currentLine, currentColumn); } Token Lexer::matchInvalid() { @@ -178,3 +202,35 @@ bool Lexer::isDigit(int index) { char character = source.at(index); return character >= '0' && character <= '9'; } + +bool Lexer::isIdentifier(int index) { + char character = source.at(index); + bool isDigit = character >= '0' && character <= '9'; + bool isAlpha = character >= 'a' && character <= 'z' || character >= 'A' && character <= 'Z'; + bool isAlowedSymbol = character == '_'; + + return isDigit || isAlpha || isAlowedSymbol; +} + +bool Lexer::isSeparator(int index) { + if (index >= source.length()) + return true; + + char character = source.at(index); + switch (character) { + case '+': + case '-': + case '*': + case '/': + case '%': + case '(': + case ')': + case ':': + case ' ': + case '\t': + case '\n': + return true; + default: + return false; + } +} diff --git a/src/Lexer.h b/src/Lexer.h index 9d663dc..e35dbad 100644 --- a/src/Lexer.h +++ b/src/Lexer.h @@ -19,12 +19,15 @@ private: Token matchNewLine(); Token matchInvalid(); Token matchSymbol(char symbol, Token::Kind kind); - Token matchInteger(); Token matchKeyword(string keyword, Token::Kind kind); + Token matchInteger(); + Token matchIdentifier(); bool isWhiteSpace(int index); bool isNewLine(int index); bool isDigit(int index); + bool isIdentifier(int index); + bool isSeparator(int index); public: Lexer(string source); diff --git a/src/Token.cpp b/src/Token.cpp index ec609fa..b56c462 100644 --- a/src/Token.cpp +++ b/src/Token.cpp @@ -48,14 +48,18 @@ string Token::toString() { return "LEFT_PAREN"; case RIGHT_PAREN: return "RIGHT_PAREN"; - case DOT: - return "DOT"; - case COMMA: - return "COMMA"; + case COLON: + return "COLON"; + case SEMICOLON: + return "SEMICOLON"; case INTEGER: return "INTEGER"; + case IDENTIFIER: + return "IDENTIFIER"; case FUNCTION: return "FUNCTION"; + case RETURN: + return "RETURN"; case NEW_LINE: return "NEW_LINE"; case END: diff --git a/src/Token.h b/src/Token.h index ab837cc..3206f91 100644 --- a/src/Token.h +++ b/src/Token.h @@ -16,12 +16,14 @@ public: LEFT_PAREN, RIGHT_PAREN, - DOT, - COMMA, + COLON, + SEMICOLON, INTEGER, + IDENTIFIER, FUNCTION, + RETURN, NEW_LINE, END,