From 9e7747dcbc3a7e74bf1c0ed525214c52c7225a57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 8 Jul 2025 17:31:56 +0900 Subject: [PATCH] More complex type parsing --- src/Compiler/ModuleBuilder.cpp | 14 ++++- src/Compiler/ModuleBuilder.h | 3 +- src/Lexer/Lexer.cpp | 32 ++++++---- src/Lexer/Lexer.h | 2 + src/Logger.cpp | 8 +++ src/Parser/Expression/ExpressionLiteral.cpp | 12 ++-- src/Parser/Parser.cpp | 70 ++++++++++++++------- src/Parser/Parser.h | 6 +- src/Parser/ValueType.cpp | 32 +++++----- src/Parser/ValueType.h | 11 +++- 10 files changed, 128 insertions(+), 62 deletions(-) diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index 244049f..b438225 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -150,6 +150,10 @@ void ModuleBuilder::buildVarDeclaration(shared_ptr statement) builder->CreateStore(ar, arAlloca);*/ } +void ModuleBuilder::buildArrayDeclaration(shared_ptr statement) { + +} + void ModuleBuilder::buildAssignment(shared_ptr statement) { llvm::AllocaInst *alloca = getAlloca(statement->getName()); if (alloca == nullptr) @@ -251,7 +255,7 @@ llvm::Value *ModuleBuilder::valueForExpression(shared_ptr expression case ExpressionKind::LITERAL: return valueForLiteral(dynamic_pointer_cast(expression)); case ExpressionKind::ARRAY_LITERAL: - return valueForArrayLiteral(dynamic_pointer_cast(expression)); + return nullptr;// valuesForArrayLiteral(dynamic_pointer_cast(expression)); case ExpressionKind::GROUPING: return valueForExpression(dynamic_pointer_cast(expression)->getExpression()); case ExpressionKind::BINARY: @@ -284,8 +288,12 @@ llvm::Value *ModuleBuilder::valueForLiteral(shared_ptr expres } } -llvm::Value *ModuleBuilder::valueForArrayLiteral(shared_ptr expression) { - return nullptr; +vector ModuleBuilder::valuesForArrayLiteral(shared_ptr expression) { + vector values; + for (shared_ptr &expression : expression->getExpressions()) { + values.push_back(valueForExpression(expression)); + } + return values; } llvm::Value *ModuleBuilder::valueForGrouping(shared_ptr expression) { diff --git a/src/Compiler/ModuleBuilder.h b/src/Compiler/ModuleBuilder.h index ceccd92..76e724c 100644 --- a/src/Compiler/ModuleBuilder.h +++ b/src/Compiler/ModuleBuilder.h @@ -62,6 +62,7 @@ private: void buildStatement(shared_ptr statement); void buildFunctionDeclaration(shared_ptr statement); void buildVarDeclaration(shared_ptr statement); + void buildArrayDeclaration(shared_ptr statement); void buildAssignment(shared_ptr statement); void buildBlock(shared_ptr statement); void buildReturn(shared_ptr statement); @@ -71,7 +72,7 @@ private: llvm::Value *valueForExpression(shared_ptr expression); llvm::Value *valueForLiteral(shared_ptr expression); - llvm::Value *valueForArrayLiteral(shared_ptr expression); + vector valuesForArrayLiteral(shared_ptr expression); llvm::Value *valueForGrouping(shared_ptr expression); llvm::Value *valueForBinary(shared_ptr expression); llvm::Value *valueForBinaryBool(ExpressionBinaryOperation operation, llvm::Value *leftValue, llvm::Value *rightValue); diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index 8e6d246..b2a1b71 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -13,9 +13,9 @@ vector> Lexer::getTokens() { currentLine = 0; currentColumn = 0; + tokens.clear(); errors.clear(); - - vector> tokens; + shared_ptr token; do { token = nextToken(); @@ -251,15 +251,7 @@ shared_ptr Lexer::nextToken() { return token; // type - token = match(TokenKind::TYPE, "bool", true); - if (token != nullptr) - return token; - - token = match(TokenKind::TYPE, "sint32", true); - if (token != nullptr) - return token; - - token = match(TokenKind::TYPE, "real32", true); + token = matchType(); if (token != nullptr) return token; @@ -428,6 +420,24 @@ shared_ptr Lexer::matchIdentifier() { return token; } +shared_ptr Lexer::matchType() { + int nextIndex = currentIndex; + + if (tokens.empty() || !tokens.back()->isOfKind({TokenKind::IDENTIFIER, TokenKind::LESS, TokenKind::RIGHT_ARROW})) + return nullptr; + + while (nextIndex < source.length() && isIdentifier(nextIndex)) + nextIndex++; + + if (nextIndex == currentIndex || !isSeparator(nextIndex)) + return nullptr; + + string lexme = source.substr(currentIndex, nextIndex - currentIndex); + shared_ptr token = make_shared(TokenKind::TYPE, lexme, currentLine, currentColumn); + advanceWithToken(token); + return token; +} + shared_ptr Lexer::matchEnd() { if (currentIndex >= source.length()) return make_shared(TokenKind::END, "", currentLine, currentColumn); diff --git a/src/Lexer/Lexer.h b/src/Lexer/Lexer.h index f385030..41393c8 100644 --- a/src/Lexer/Lexer.h +++ b/src/Lexer/Lexer.h @@ -15,6 +15,7 @@ private: int currentIndex; int currentLine; int currentColumn; + vector> tokens; vector> errors; shared_ptr nextToken(); @@ -24,6 +25,7 @@ private: shared_ptr matchIntegerBin(); shared_ptr matchIntegerChar(); shared_ptr matchReal(); + shared_ptr matchType(); shared_ptr matchIdentifier(); shared_ptr matchEnd(); diff --git a/src/Logger.cpp b/src/Logger.cpp index 84c1e7a..cc48ef9 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -140,6 +140,10 @@ string Logger::toString(TokenKind tokenKind) { return "("; case TokenKind::RIGHT_PAREN: return ")"; + case TokenKind::LEFT_SQUARE_BRACKET: + return "["; + case TokenKind::RIGHT_SQUARE_BRACKET: + return "]"; case TokenKind::COMMA: return ","; case TokenKind::COLON: @@ -196,6 +200,8 @@ string Logger::toString(shared_ptr valueType) { return "SINT32"; case ValueTypeKind::REAL32: return "REAL32"; + case ValueTypeKind::DATA: + return "[]"; } } @@ -386,6 +392,8 @@ string Logger::toString(shared_ptr expression) { return to_string(expression->getSint32Value()); case ValueTypeKind::REAL32: return to_string(expression->getReal32Value()); + default: + return "?"; } } diff --git a/src/Parser/Expression/ExpressionLiteral.cpp b/src/Parser/Expression/ExpressionLiteral.cpp index 3bd506f..de518d5 100644 --- a/src/Parser/Expression/ExpressionLiteral.cpp +++ b/src/Parser/Expression/ExpressionLiteral.cpp @@ -11,20 +11,20 @@ Expression(ExpressionKind::LITERAL, nullptr) { switch (token->getKind()) { case TokenKind::BOOL: boolValue = token->getLexme().compare("true") == 0; - valueType = ValueType::valueTypeForToken(token); + valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; case TokenKind::INTEGER_DEC: { string numString = token->getLexme(); erase(numString, '_'); sint32Value = stoi(numString, nullptr, 10); - valueType = ValueType::valueTypeForToken(token); + valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; } case TokenKind::INTEGER_HEX: { string numString = token->getLexme(); erase(numString, '_'); sint32Value = stoi(numString, nullptr, 16); - valueType = ValueType::valueTypeForToken(token); + valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; } case TokenKind::INTEGER_BIN: { @@ -32,13 +32,13 @@ Expression(ExpressionKind::LITERAL, nullptr) { erase(numString, '_'); numString = numString.substr(2, numString.size()-1); sint32Value = stoi(numString, nullptr, 2); - valueType = ValueType::valueTypeForToken(token); + valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; } case TokenKind::INTEGER_CHAR: { string charString = token->getLexme(); - valueType = ValueType::valueTypeForToken(token); + valueType = ValueType::valueTypeForToken(token, nullptr, 0); if (charString.length() == 3) { sint32Value = charString[1]; } else if (charString.length() == 4 && charString[1] == '\\') { @@ -67,7 +67,7 @@ Expression(ExpressionKind::LITERAL, nullptr) { } case TokenKind::REAL: real32Value = stof(token->getLexme()); - valueType = ValueType::valueTypeForToken(token); + valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; default: exit(1); diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 3ac67ca..441464a 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -123,7 +123,7 @@ shared_ptr Parser::matchStatementMetaExternFunction() { } shared_ptr identifierToken = tokens.at(currentIndex++); shared_ptr argumentTypeToken = tokens.at(currentIndex++); - shared_ptr argumentType = ValueType::valueTypeForToken(argumentTypeToken); + shared_ptr argumentType = matchValueType(); if (argumentType == nullptr) { markError(TokenKind::TYPE, {}); return nullptr; @@ -137,8 +137,8 @@ shared_ptr Parser::matchStatementMetaExternFunction() { if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - shared_ptr returnTypeToken = tokens.at(currentIndex); - returnType = ValueType::valueTypeForToken(returnTypeToken); + //shared_ptr returnTypeToken = tokens.at(currentIndex); + returnType = matchValueType(); if (returnType == nullptr) { markError(TokenKind::TYPE, {}); return nullptr; @@ -155,21 +155,7 @@ shared_ptr Parser::matchStatementVariable() { return nullptr; shared_ptr identifierToken = tokens.at(currentIndex++); - shared_ptr valueTypeToken = tokens.at(currentIndex); - - shared_ptr valueType; - if (valueTypeToken->getLexme().compare("bool") == 0) - valueType = ValueType::BOOL; - else if (valueTypeToken->getLexme().compare("sint32") == 0) - valueType = ValueType::SINT32; - else if (valueTypeToken->getLexme().compare("real32") == 0) - valueType = ValueType::REAL32; - else { - markError(TokenKind::TYPE, {}); - return nullptr; - } - - currentIndex++; // type + shared_ptr valueType = matchValueType(); // Expect left arrow if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) { @@ -207,7 +193,7 @@ shared_ptr Parser::matchStatementFunction() { } shared_ptr identifierToken = tokens.at(currentIndex++); shared_ptr argumentTypeToken = tokens.at(currentIndex++); - shared_ptr argumentType = ValueType::valueTypeForToken(argumentTypeToken); + shared_ptr argumentType = matchValueType(); if (argumentType == nullptr) { markError(TokenKind::TYPE, {}); return nullptr; @@ -221,14 +207,12 @@ shared_ptr Parser::matchStatementFunction() { if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - shared_ptr returnTypeToken = tokens.at(currentIndex); - returnType = ValueType::valueTypeForToken(returnTypeToken); + //shared_ptr returnTypeToken = tokens.at(currentIndex); + returnType = matchValueType(); if (returnType == nullptr) { markError(TokenKind::TYPE, {}); return nullptr; } - - currentIndex++; // type } // consume new line @@ -638,6 +622,46 @@ shared_ptr Parser::matchExpressionBlock(vector terminalTo return make_shared(statements); } +shared_ptr Parser::matchValueType() { + if (!tryMatchingTokenKinds({TokenKind::TYPE}, true, false)) + return nullptr; + shared_ptr typeToken = tokens.at(currentIndex++); + shared_ptr subType; + int valueArg = 0; + + if (tryMatchingTokenKinds({TokenKind::LESS}, true, true)) { + if (!tryMatchingTokenKinds({TokenKind::TYPE}, true, false)) { + markError(TokenKind::TYPE, {}); + return nullptr; + } + subType = matchValueType(); + if (subType == nullptr) + return subType; + + if (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) { + if (!tryMatchingTokenKinds({TokenKind::INTEGER_DEC, TokenKind::INTEGER_HEX, TokenKind::INTEGER_BIN, TokenKind::INTEGER_CHAR}, false, false)) { + markError({}, "Expected integer literal"); + return nullptr; + } + shared_ptr expressionValue = matchExpressionLiteral(); + if (expressionValue == nullptr) { + markError({}, "Expected integer literal"); + return nullptr; + } + + valueArg = dynamic_pointer_cast(expressionValue)->getSint32Value(); + } + + + if (!tryMatchingTokenKinds({TokenKind::GREATER}, true, true)) { + markError(TokenKind::GREATER, {}); + return nullptr; + } + } + + return ValueType::valueTypeForToken(typeToken, subType, valueArg); +} + bool Parser::tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance) { int requiredCount = shouldMatchAll ? kinds.size() : 1; if (currentIndex + requiredCount > tokens.size()) diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index d2db35f..952a087 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -3,9 +3,11 @@ #include +class Error; + class Token; enum class TokenKind; -class Error; +class ValueType; class Expression; class Statement; @@ -47,6 +49,8 @@ private: shared_ptr matchExpressionBinary(shared_ptr left); shared_ptr matchExpressionBlock(vector terminalTokenKinds); + shared_ptr matchValueType(); + bool tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance); void markError(optional expectedTokenKind, optional message); diff --git a/src/Parser/ValueType.cpp b/src/Parser/ValueType.cpp index abd399f..a5418b8 100644 --- a/src/Parser/ValueType.cpp +++ b/src/Parser/ValueType.cpp @@ -2,36 +2,38 @@ #include "Lexer/Token.h" -shared_ptr ValueType::NONE = make_shared(ValueTypeKind::NONE); -shared_ptr ValueType::BOOL = make_shared(ValueTypeKind::BOOL); -shared_ptr ValueType::SINT32 = make_shared(ValueTypeKind::SINT32); -shared_ptr ValueType::REAL32 = make_shared(ValueTypeKind::REAL32); +shared_ptr ValueType::NONE = make_shared(ValueTypeKind::NONE, nullptr, 0); +shared_ptr ValueType::BOOL = make_shared(ValueTypeKind::BOOL, nullptr, 0); +shared_ptr ValueType::SINT32 = make_shared(ValueTypeKind::SINT32, nullptr, 0); +shared_ptr ValueType::REAL32 = make_shared(ValueTypeKind::REAL32, nullptr, 0); -ValueType::ValueType(ValueTypeKind kind): -kind(kind) { } +ValueType::ValueType(ValueTypeKind kind, shared_ptr subType, int valueArg): +kind(kind), subType(subType), valueArg(valueArg) { } -shared_ptr ValueType::valueTypeForToken(shared_ptr token) { +shared_ptr ValueType::valueTypeForToken(shared_ptr token, shared_ptr subType, int valueArg) { switch (token->getKind()) { case TokenKind::TYPE: { string lexme = token->getLexme(); if (lexme.compare("bool") == 0) - return make_shared(ValueTypeKind::BOOL); + return make_shared(ValueTypeKind::BOOL, subType, valueArg); else if (lexme.compare("sint32") == 0) - return make_shared(ValueTypeKind::SINT32); + return make_shared(ValueTypeKind::SINT32, subType, valueArg); else if (lexme.compare("real32") == 0) - return make_shared(ValueTypeKind::REAL32); + return make_shared(ValueTypeKind::REAL32, subType, valueArg); + else if (lexme.compare("data") == 0) + return make_shared(ValueTypeKind::DATA, subType, valueArg); else return nullptr; } case TokenKind::BOOL: - return make_shared(ValueTypeKind::BOOL); + return make_shared(ValueTypeKind::BOOL, nullptr, 0); case TokenKind::INTEGER_DEC: case TokenKind::INTEGER_HEX: case TokenKind::INTEGER_BIN: case TokenKind::INTEGER_CHAR: - return make_shared(ValueTypeKind::SINT32); + return make_shared(ValueTypeKind::SINT32, nullptr, 0); case TokenKind::REAL: - return make_shared(ValueTypeKind::REAL32); + return make_shared(ValueTypeKind::REAL32, nullptr, 0); default: return nullptr; } @@ -39,4 +41,6 @@ shared_ptr ValueType::valueTypeForToken(shared_ptr token) { ValueTypeKind ValueType::getKind() { return kind; -} \ No newline at end of file +} + +shared_ptr getSubType() {} \ No newline at end of file diff --git a/src/Parser/ValueType.h b/src/Parser/ValueType.h index 2540cd8..ce118eb 100644 --- a/src/Parser/ValueType.h +++ b/src/Parser/ValueType.h @@ -11,22 +11,27 @@ enum class ValueTypeKind { NONE, BOOL, SINT32, - REAL32 + REAL32, + DATA }; class ValueType { private: ValueTypeKind kind; + shared_ptr subType; + int valueArg; public: static shared_ptr NONE; static shared_ptr BOOL; static shared_ptr SINT32; static shared_ptr REAL32; - static shared_ptr valueTypeForToken(shared_ptr token); + static shared_ptr valueTypeForToken(shared_ptr token, shared_ptr subType, int valueArg); - ValueType(ValueTypeKind kind); + ValueType(ValueTypeKind kind, shared_ptr subType, int valueArg); ValueTypeKind getKind(); + shared_ptr getSubType(); + int getValueArg(); }; #endif \ No newline at end of file