From e9f13e0a908087ab39a3b40754a5e63eea6c997b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Fri, 4 Jul 2025 09:20:23 +0900 Subject: [PATCH] Parse char types --- src/Lexer/Lexer.cpp | 25 +++++++++++++++++ src/Lexer/Lexer.h | 1 + src/Lexer/Token.cpp | 1 + src/Lexer/Token.h | 1 + src/Logger.cpp | 2 ++ src/Parser/Expression/ExpressionLiteral.cpp | 30 +++++++++++++++++++++ 6 files changed, 60 insertions(+) diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index 8d16cef..3eae847 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -237,6 +237,10 @@ shared_ptr Lexer::nextToken() { if (token != nullptr) return token; + token = matchIntegerChar(); + if (token != nullptr) + return token; + // type token = match(TokenKind::TYPE, "bool", true); if (token != nullptr) @@ -354,6 +358,27 @@ shared_ptr Lexer::matchIntegerBin() { return token; } +shared_ptr Lexer::matchIntegerChar() { + int nextIndex = currentIndex; + + if (currentIndex >= source.size() || source.at(nextIndex) != '\'') + return nullptr; + + bool isClosing = false; + do { + nextIndex++; + isClosing = source.at(nextIndex) == '\'' && source.at(nextIndex - 1) != '\\'; + } while (nextIndex < source.length() && !isClosing); + + if (!isClosing) + return nullptr; + + string lexme = source.substr(currentIndex, nextIndex - currentIndex + 1); + shared_ptr token = make_shared(TokenKind::INTEGER_CHAR, lexme, currentLine, currentColumn); + advanceWithToken(token); + return token; +} + shared_ptr Lexer::matchReal() { int nextIndex = currentIndex; diff --git a/src/Lexer/Lexer.h b/src/Lexer/Lexer.h index 1f7018a..f385030 100644 --- a/src/Lexer/Lexer.h +++ b/src/Lexer/Lexer.h @@ -22,6 +22,7 @@ private: shared_ptr matchIntegerDec(); shared_ptr matchIntegerHex(); shared_ptr matchIntegerBin(); + shared_ptr matchIntegerChar(); shared_ptr matchReal(); shared_ptr matchIdentifier(); shared_ptr matchEnd(); diff --git a/src/Lexer/Token.cpp b/src/Lexer/Token.cpp index 2c1c229..78874d5 100644 --- a/src/Lexer/Token.cpp +++ b/src/Lexer/Token.cpp @@ -40,6 +40,7 @@ vector Token::tokensLiteral = { TokenKind::INTEGER_DEC, TokenKind::INTEGER_HEX, TokenKind::INTEGER_BIN, + TokenKind::INTEGER_CHAR, TokenKind::REAL }; diff --git a/src/Lexer/Token.h b/src/Lexer/Token.h index 353832a..3c28693 100644 --- a/src/Lexer/Token.h +++ b/src/Lexer/Token.h @@ -39,6 +39,7 @@ enum class TokenKind { INTEGER_DEC, INTEGER_HEX, INTEGER_BIN, + INTEGER_CHAR, REAL, IDENTIFIER, TYPE, diff --git a/src/Logger.cpp b/src/Logger.cpp index a744500..c333c4f 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -74,6 +74,8 @@ string Logger::toString(shared_ptr token) { return "INT_HEX(" + token->getLexme() + ")"; case TokenKind::INTEGER_BIN: return "INT_BIN(" + token->getLexme() + ")"; + case TokenKind::INTEGER_CHAR: + return "INT_CHAR(" + token->getLexme() + ")"; case TokenKind::REAL: return "REAL(" + token->getLexme() + ")"; case TokenKind::IDENTIFIER: diff --git a/src/Parser/Expression/ExpressionLiteral.cpp b/src/Parser/Expression/ExpressionLiteral.cpp index df85403..ee1c084 100644 --- a/src/Parser/Expression/ExpressionLiteral.cpp +++ b/src/Parser/Expression/ExpressionLiteral.cpp @@ -32,6 +32,36 @@ Expression(ExpressionKind::LITERAL, ValueType::NONE) { valueType = ValueType::SINT32; break; } + case TokenKind::INTEGER_CHAR: { + string charString = token->getLexme(); + + valueType = ValueType::SINT32; + if (charString.length() == 3) { + sint32Value = charString[1]; + } else if (charString.length() == 4 && charString[1] == '\\') { + switch (charString[2]) { + case 'b': + sint32Value = '\b'; + break; + case 'n': + sint32Value = '\n'; + break; + case 't': + sint32Value = '\t'; + break; + case '\\': + sint32Value = '\\'; + break; + case '\'': + sint32Value = '\''; + break; + case '\"': + sint32Value = '\"'; + break; + } + } + break; + } case TokenKind::REAL: real32Value = stof(token->getLexme()); valueType = ValueType::REAL32;