From 18dd7d05d41b07b8c73276ca4410666e657f136e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Mon, 7 Jul 2025 14:53:56 +0900 Subject: [PATCH 01/10] Lex and parse array expression --- src/Compiler/ModuleBuilder.cpp | 22 +++++++++++++ src/Compiler/ModuleBuilder.h | 2 ++ src/Lexer/Lexer.cpp | 10 ++++++ src/Lexer/Token.h | 2 ++ src/Logger.cpp | 19 +++++++++++ src/Logger.h | 2 ++ src/Parser/Expression/Expression.h | 1 + .../Expression/ExpressionArrayLiteral.cpp | 8 +++++ .../Expression/ExpressionArrayLiteral.h | 15 +++++++++ src/Parser/Expression/ExpressionLiteral.h | 9 ++++-- src/Parser/Parser.cpp | 32 ++++++++++++++++--- src/Parser/Parser.h | 1 + 12 files changed, 117 insertions(+), 6 deletions(-) create mode 100644 src/Parser/Expression/ExpressionArrayLiteral.cpp create mode 100644 src/Parser/Expression/ExpressionArrayLiteral.h diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index 5777fe5..244049f 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -6,6 +6,7 @@ #include "Parser/Expression/ExpressionGrouping.h" #include "Parser/Expression/ExpressionLiteral.h" +#include "Parser/Expression/ExpressionArrayLiteral.h" #include "Parser/Expression/ExpressionVariable.h" #include "Parser/Expression/ExpressionCall.h" #include "Parser/Expression/ExpressionIfElse.h" @@ -132,6 +133,21 @@ void ModuleBuilder::buildVarDeclaration(shared_ptr statement) if (!setAlloca(statement->getName(), alloca)) return; builder->CreateStore(value, alloca); + + /*auto *aType = llvm::ArrayType::get(typeSint32, 7); + llvm::AllocaInst *allocaArr = builder->CreateAlloca(aType, nullptr, statement->getName() + "_Arr"); + + //llvm::AllocaInst *allocaBrr = builder->CreateAlloca(typeSint32, nullptr, statement->getName() + "_Arr"); + vector values; + auto *bType = llvm::ArrayType::get(typeSint32, 9); + for (int i=0; i<9; i++) { + llvm::Constant *cnst = llvm::ConstantInt::get(typeSint32, i, true); + values.push_back(cnst); + } + llvm::Constant *ar = (llvm::ConstantArray *)llvm::ConstantArray::get(bType, values); + //auto vAr = ar->getAggregateElement(0); + llvm::AllocaInst *arAlloca = builder->CreateAlloca(bType, nullptr, "arBtype"); + builder->CreateStore(ar, arAlloca);*/ } void ModuleBuilder::buildAssignment(shared_ptr statement) { @@ -234,6 +250,8 @@ llvm::Value *ModuleBuilder::valueForExpression(shared_ptr expression switch (expression->getKind()) { case ExpressionKind::LITERAL: return valueForLiteral(dynamic_pointer_cast(expression)); + case ExpressionKind::ARRAY_LITERAL: + return valueForArrayLiteral(dynamic_pointer_cast(expression)); case ExpressionKind::GROUPING: return valueForExpression(dynamic_pointer_cast(expression)->getExpression()); case ExpressionKind::BINARY: @@ -266,6 +284,10 @@ llvm::Value *ModuleBuilder::valueForLiteral(shared_ptr expres } } +llvm::Value *ModuleBuilder::valueForArrayLiteral(shared_ptr expression) { + return nullptr; +} + llvm::Value *ModuleBuilder::valueForGrouping(shared_ptr expression) { return valueForExpression(expression->getExpression()); } diff --git a/src/Compiler/ModuleBuilder.h b/src/Compiler/ModuleBuilder.h index ddc70df..ceccd92 100644 --- a/src/Compiler/ModuleBuilder.h +++ b/src/Compiler/ModuleBuilder.h @@ -17,6 +17,7 @@ class ValueType; class Expression; class ExpressionGrouping; class ExpressionLiteral; +class ExpressionArrayLiteral; class ExpressionVariable; class ExpressionCall; class ExpressionIfElse; @@ -70,6 +71,7 @@ private: llvm::Value *valueForExpression(shared_ptr expression); llvm::Value *valueForLiteral(shared_ptr expression); + llvm::Value *valueForArrayLiteral(shared_ptr expression); llvm::Value *valueForGrouping(shared_ptr expression); llvm::Value *valueForBinary(shared_ptr expression); llvm::Value *valueForBinaryBool(ExpressionBinaryOperation operation, llvm::Value *leftValue, llvm::Value *rightValue); diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index 8d4ec3a..8e6d246 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -126,6 +126,14 @@ shared_ptr Lexer::nextToken() { if (token != nullptr) return token; + token = match(TokenKind::LEFT_SQUARE_BRACKET, "[", false); + if (token != nullptr) + return token; + + token = match(TokenKind::RIGHT_SQUARE_BRACKET, "]", false); + if (token != nullptr) + return token; + token = match(TokenKind::COMMA, ",", false); if (token != nullptr) return token; @@ -472,6 +480,8 @@ bool Lexer::isSeparator(int index) { case '>': case '(': case ')': + case '[': + case ']': case ',': case ':': case ';': diff --git a/src/Lexer/Token.h b/src/Lexer/Token.h index 1987b11..7c15be1 100644 --- a/src/Lexer/Token.h +++ b/src/Lexer/Token.h @@ -21,6 +21,8 @@ enum class TokenKind { LEFT_PAREN, RIGHT_PAREN, + LEFT_SQUARE_BRACKET, + RIGHT_SQUARE_BRACKET, COMMA, COLON, SEMICOLON, diff --git a/src/Logger.cpp b/src/Logger.cpp index b776a35..84c1e7a 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -23,6 +23,7 @@ #include "Parser/Expression/ExpressionVariable.h" #include "Parser/Expression/ExpressionGrouping.h" #include "Parser/Expression/ExpressionLiteral.h" +#include "Parser/Expression/ExpressionArrayLiteral.h" #include "Parser/Expression/ExpressionCall.h" #include "Parser/Expression/ExpressionBlock.h" @@ -56,6 +57,10 @@ string Logger::toString(shared_ptr token) { return "("; case TokenKind::RIGHT_PAREN: return ")"; + case TokenKind::LEFT_SQUARE_BRACKET: + return "["; + case TokenKind::RIGHT_SQUARE_BRACKET: + return "]"; case TokenKind::COMMA: return ","; case TokenKind::COLON: @@ -308,6 +313,8 @@ string Logger::toString(shared_ptr expression) { return toString(dynamic_pointer_cast(expression)); case ExpressionKind::LITERAL: return toString(dynamic_pointer_cast(expression)); + case ExpressionKind::ARRAY_LITERAL: + return toString(dynamic_pointer_cast(expression)); case ExpressionKind::CALL: return toString(dynamic_pointer_cast(expression)); case ExpressionKind::BLOCK: @@ -382,6 +389,18 @@ string Logger::toString(shared_ptr expression) { } } +string Logger::toString(shared_ptr expression) { + string text; + text += "["; + for (int i=0; igetExpressions().size(); i++) { + text += toString(expression->getExpressions().at(i)); + if (i < expression->getExpressions().size() - 1) + text += ", "; + } + text += "]"; + return text; +} + string Logger::toString(shared_ptr expression) { string argsString; for (int i = 0; i < expression->getArgumentExpressions().size(); i++) { diff --git a/src/Logger.h b/src/Logger.h index 0ffcdff..d5f1eba 100644 --- a/src/Logger.h +++ b/src/Logger.h @@ -23,6 +23,7 @@ class ExpressionIfElse; class ExpressionVariable; class ExpressionGrouping; class ExpressionLiteral; +class ExpressionArrayLiteral; class ExpressionCall; class ExpressionBlock; @@ -52,6 +53,7 @@ private: static string toString(shared_ptr expression); static string toString(shared_ptr expression); static string toString(shared_ptr expression); + static string toString(shared_ptr expression); static string toString(shared_ptr expression); static string toString(shared_ptr expression); diff --git a/src/Parser/Expression/Expression.h b/src/Parser/Expression/Expression.h index 249f606..66bd170 100644 --- a/src/Parser/Expression/Expression.h +++ b/src/Parser/Expression/Expression.h @@ -10,6 +10,7 @@ using namespace std; enum class ExpressionKind { LITERAL, + ARRAY_LITERAL, GROUPING, BINARY, IF_ELSE, diff --git a/src/Parser/Expression/ExpressionArrayLiteral.cpp b/src/Parser/Expression/ExpressionArrayLiteral.cpp new file mode 100644 index 0000000..2e6a7e5 --- /dev/null +++ b/src/Parser/Expression/ExpressionArrayLiteral.cpp @@ -0,0 +1,8 @@ +#include "ExpressionArrayLiteral.h" + +ExpressionArrayLiteral::ExpressionArrayLiteral(vector> expressions): +Expression(ExpressionKind::ARRAY_LITERAL, nullptr), expressions(expressions) { } + +vector> ExpressionArrayLiteral::getExpressions() { + return expressions; +} \ No newline at end of file diff --git a/src/Parser/Expression/ExpressionArrayLiteral.h b/src/Parser/Expression/ExpressionArrayLiteral.h new file mode 100644 index 0000000..ae56585 --- /dev/null +++ b/src/Parser/Expression/ExpressionArrayLiteral.h @@ -0,0 +1,15 @@ +#ifndef EXPRESSION_ARRAY_LITERAL_H +#define EXPRESSION_ARRAY_LITERAL_H + +#include "Expression.h" + +class ExpressionArrayLiteral: public Expression { +private: + vector> expressions; + +public: + ExpressionArrayLiteral(vector> expressions); + vector> getExpressions(); +}; + +#endif \ No newline at end of file diff --git a/src/Parser/Expression/ExpressionLiteral.h b/src/Parser/Expression/ExpressionLiteral.h index 0a9dfb2..a693cd0 100644 --- a/src/Parser/Expression/ExpressionLiteral.h +++ b/src/Parser/Expression/ExpressionLiteral.h @@ -1,4 +1,7 @@ -#include "Parser/Expression/Expression.h" +#ifndef EXPRESSION_LITERAL_H +#define EXPRESSION_LITERAL_H + +#include "Expression.h" class ExpressionLiteral: public Expression { private: @@ -12,4 +15,6 @@ public: bool getBoolValue(); int32_t getSint32Value(); float getReal32Value(); -}; \ No newline at end of file +}; + +#endif \ No newline at end of file diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 979b15c..3ac67ca 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -8,6 +8,7 @@ #include "Parser/Expression/ExpressionGrouping.h" #include "Parser/Expression/ExpressionLiteral.h" +#include "Parser/Expression/ExpressionArrayLiteral.h" #include "Parser/Expression/ExpressionVariable.h" #include "Parser/Expression/ExpressionCall.h" #include "Parser/Expression/ExpressionIfElse.h" @@ -261,10 +262,8 @@ shared_ptr Parser::matchStatementBlock(vector terminalToke break; // except new line - if (statement != nullptr && !tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) { + if (statement != nullptr && !tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) markError(TokenKind::NEW_LINE, {}); - return nullptr; - } } return make_shared(statements); @@ -446,6 +445,10 @@ shared_ptr Parser::matchPrimary() { if (expression != nullptr) return expression; + expression = matchExpressionArrayLiteral(); + if (expression != nullptr) + return expression; + expression = matchExpressionCall(); if (expression != nullptr) return expression; @@ -458,7 +461,6 @@ shared_ptr Parser::matchPrimary() { } shared_ptr Parser::matchExpressionGrouping() { - shared_ptr token = tokens.at(currentIndex); if (tryMatchingTokenKinds({TokenKind::LEFT_PAREN}, true, true)) { shared_ptr expression = matchTerm(); // has grouped expression failed? @@ -483,6 +485,28 @@ shared_ptr Parser::matchExpressionLiteral() { return nullptr; } +shared_ptr Parser::matchExpressionArrayLiteral() { + if (!tryMatchingTokenKinds({TokenKind::LEFT_SQUARE_BRACKET}, true, true)) + return nullptr; + + vector> expressions; + if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { + do { + shared_ptr expression = nextExpression(); + if (expression != nullptr) + expressions.push_back(expression); + } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); + + if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { + markError(TokenKind::RIGHT_SQUARE_BRACKET, {}); + return nullptr; + } + } + + + return make_shared(expressions); +} + shared_ptr Parser::matchExpressionVariable() { shared_ptr token = tokens.at(currentIndex); diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index 79dddee..d2db35f 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -40,6 +40,7 @@ private: shared_ptr matchExpressionGrouping(); shared_ptr matchExpressionLiteral(); + shared_ptr matchExpressionArrayLiteral(); shared_ptr matchExpressionVariable(); shared_ptr matchExpressionCall(); shared_ptr matchExpressionIfElse(); From 9e7747dcbc3a7e74bf1c0ed525214c52c7225a57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 8 Jul 2025 17:31:56 +0900 Subject: [PATCH 02/10] More complex type parsing --- src/Compiler/ModuleBuilder.cpp | 14 ++++- src/Compiler/ModuleBuilder.h | 3 +- src/Lexer/Lexer.cpp | 32 ++++++---- src/Lexer/Lexer.h | 2 + src/Logger.cpp | 8 +++ src/Parser/Expression/ExpressionLiteral.cpp | 12 ++-- src/Parser/Parser.cpp | 70 ++++++++++++++------- src/Parser/Parser.h | 6 +- src/Parser/ValueType.cpp | 32 +++++----- src/Parser/ValueType.h | 11 +++- 10 files changed, 128 insertions(+), 62 deletions(-) diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index 244049f..b438225 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -150,6 +150,10 @@ void ModuleBuilder::buildVarDeclaration(shared_ptr statement) builder->CreateStore(ar, arAlloca);*/ } +void ModuleBuilder::buildArrayDeclaration(shared_ptr statement) { + +} + void ModuleBuilder::buildAssignment(shared_ptr statement) { llvm::AllocaInst *alloca = getAlloca(statement->getName()); if (alloca == nullptr) @@ -251,7 +255,7 @@ llvm::Value *ModuleBuilder::valueForExpression(shared_ptr expression case ExpressionKind::LITERAL: return valueForLiteral(dynamic_pointer_cast(expression)); case ExpressionKind::ARRAY_LITERAL: - return valueForArrayLiteral(dynamic_pointer_cast(expression)); + return nullptr;// valuesForArrayLiteral(dynamic_pointer_cast(expression)); case ExpressionKind::GROUPING: return valueForExpression(dynamic_pointer_cast(expression)->getExpression()); case ExpressionKind::BINARY: @@ -284,8 +288,12 @@ llvm::Value *ModuleBuilder::valueForLiteral(shared_ptr expres } } -llvm::Value *ModuleBuilder::valueForArrayLiteral(shared_ptr expression) { - return nullptr; +vector ModuleBuilder::valuesForArrayLiteral(shared_ptr expression) { + vector values; + for (shared_ptr &expression : expression->getExpressions()) { + values.push_back(valueForExpression(expression)); + } + return values; } llvm::Value *ModuleBuilder::valueForGrouping(shared_ptr expression) { diff --git a/src/Compiler/ModuleBuilder.h b/src/Compiler/ModuleBuilder.h index ceccd92..76e724c 100644 --- a/src/Compiler/ModuleBuilder.h +++ b/src/Compiler/ModuleBuilder.h @@ -62,6 +62,7 @@ private: void buildStatement(shared_ptr statement); void buildFunctionDeclaration(shared_ptr statement); void buildVarDeclaration(shared_ptr statement); + void buildArrayDeclaration(shared_ptr statement); void buildAssignment(shared_ptr statement); void buildBlock(shared_ptr statement); void buildReturn(shared_ptr statement); @@ -71,7 +72,7 @@ private: llvm::Value *valueForExpression(shared_ptr expression); llvm::Value *valueForLiteral(shared_ptr expression); - llvm::Value *valueForArrayLiteral(shared_ptr expression); + vector valuesForArrayLiteral(shared_ptr expression); llvm::Value *valueForGrouping(shared_ptr expression); llvm::Value *valueForBinary(shared_ptr expression); llvm::Value *valueForBinaryBool(ExpressionBinaryOperation operation, llvm::Value *leftValue, llvm::Value *rightValue); diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index 8e6d246..b2a1b71 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -13,9 +13,9 @@ vector> Lexer::getTokens() { currentLine = 0; currentColumn = 0; + tokens.clear(); errors.clear(); - - vector> tokens; + shared_ptr token; do { token = nextToken(); @@ -251,15 +251,7 @@ shared_ptr Lexer::nextToken() { return token; // type - token = match(TokenKind::TYPE, "bool", true); - if (token != nullptr) - return token; - - token = match(TokenKind::TYPE, "sint32", true); - if (token != nullptr) - return token; - - token = match(TokenKind::TYPE, "real32", true); + token = matchType(); if (token != nullptr) return token; @@ -428,6 +420,24 @@ shared_ptr Lexer::matchIdentifier() { return token; } +shared_ptr Lexer::matchType() { + int nextIndex = currentIndex; + + if (tokens.empty() || !tokens.back()->isOfKind({TokenKind::IDENTIFIER, TokenKind::LESS, TokenKind::RIGHT_ARROW})) + return nullptr; + + while (nextIndex < source.length() && isIdentifier(nextIndex)) + nextIndex++; + + if (nextIndex == currentIndex || !isSeparator(nextIndex)) + return nullptr; + + string lexme = source.substr(currentIndex, nextIndex - currentIndex); + shared_ptr token = make_shared(TokenKind::TYPE, lexme, currentLine, currentColumn); + advanceWithToken(token); + return token; +} + shared_ptr Lexer::matchEnd() { if (currentIndex >= source.length()) return make_shared(TokenKind::END, "", currentLine, currentColumn); diff --git a/src/Lexer/Lexer.h b/src/Lexer/Lexer.h index f385030..41393c8 100644 --- a/src/Lexer/Lexer.h +++ b/src/Lexer/Lexer.h @@ -15,6 +15,7 @@ private: int currentIndex; int currentLine; int currentColumn; + vector> tokens; vector> errors; shared_ptr nextToken(); @@ -24,6 +25,7 @@ private: shared_ptr matchIntegerBin(); shared_ptr matchIntegerChar(); shared_ptr matchReal(); + shared_ptr matchType(); shared_ptr matchIdentifier(); shared_ptr matchEnd(); diff --git a/src/Logger.cpp b/src/Logger.cpp index 84c1e7a..cc48ef9 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -140,6 +140,10 @@ string Logger::toString(TokenKind tokenKind) { return "("; case TokenKind::RIGHT_PAREN: return ")"; + case TokenKind::LEFT_SQUARE_BRACKET: + return "["; + case TokenKind::RIGHT_SQUARE_BRACKET: + return "]"; case TokenKind::COMMA: return ","; case TokenKind::COLON: @@ -196,6 +200,8 @@ string Logger::toString(shared_ptr valueType) { return "SINT32"; case ValueTypeKind::REAL32: return "REAL32"; + case ValueTypeKind::DATA: + return "[]"; } } @@ -386,6 +392,8 @@ string Logger::toString(shared_ptr expression) { return to_string(expression->getSint32Value()); case ValueTypeKind::REAL32: return to_string(expression->getReal32Value()); + default: + return "?"; } } diff --git a/src/Parser/Expression/ExpressionLiteral.cpp b/src/Parser/Expression/ExpressionLiteral.cpp index 3bd506f..de518d5 100644 --- a/src/Parser/Expression/ExpressionLiteral.cpp +++ b/src/Parser/Expression/ExpressionLiteral.cpp @@ -11,20 +11,20 @@ Expression(ExpressionKind::LITERAL, nullptr) { switch (token->getKind()) { case TokenKind::BOOL: boolValue = token->getLexme().compare("true") == 0; - valueType = ValueType::valueTypeForToken(token); + valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; case TokenKind::INTEGER_DEC: { string numString = token->getLexme(); erase(numString, '_'); sint32Value = stoi(numString, nullptr, 10); - valueType = ValueType::valueTypeForToken(token); + valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; } case TokenKind::INTEGER_HEX: { string numString = token->getLexme(); erase(numString, '_'); sint32Value = stoi(numString, nullptr, 16); - valueType = ValueType::valueTypeForToken(token); + valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; } case TokenKind::INTEGER_BIN: { @@ -32,13 +32,13 @@ Expression(ExpressionKind::LITERAL, nullptr) { erase(numString, '_'); numString = numString.substr(2, numString.size()-1); sint32Value = stoi(numString, nullptr, 2); - valueType = ValueType::valueTypeForToken(token); + valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; } case TokenKind::INTEGER_CHAR: { string charString = token->getLexme(); - valueType = ValueType::valueTypeForToken(token); + valueType = ValueType::valueTypeForToken(token, nullptr, 0); if (charString.length() == 3) { sint32Value = charString[1]; } else if (charString.length() == 4 && charString[1] == '\\') { @@ -67,7 +67,7 @@ Expression(ExpressionKind::LITERAL, nullptr) { } case TokenKind::REAL: real32Value = stof(token->getLexme()); - valueType = ValueType::valueTypeForToken(token); + valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; default: exit(1); diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 3ac67ca..441464a 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -123,7 +123,7 @@ shared_ptr Parser::matchStatementMetaExternFunction() { } shared_ptr identifierToken = tokens.at(currentIndex++); shared_ptr argumentTypeToken = tokens.at(currentIndex++); - shared_ptr argumentType = ValueType::valueTypeForToken(argumentTypeToken); + shared_ptr argumentType = matchValueType(); if (argumentType == nullptr) { markError(TokenKind::TYPE, {}); return nullptr; @@ -137,8 +137,8 @@ shared_ptr Parser::matchStatementMetaExternFunction() { if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - shared_ptr returnTypeToken = tokens.at(currentIndex); - returnType = ValueType::valueTypeForToken(returnTypeToken); + //shared_ptr returnTypeToken = tokens.at(currentIndex); + returnType = matchValueType(); if (returnType == nullptr) { markError(TokenKind::TYPE, {}); return nullptr; @@ -155,21 +155,7 @@ shared_ptr Parser::matchStatementVariable() { return nullptr; shared_ptr identifierToken = tokens.at(currentIndex++); - shared_ptr valueTypeToken = tokens.at(currentIndex); - - shared_ptr valueType; - if (valueTypeToken->getLexme().compare("bool") == 0) - valueType = ValueType::BOOL; - else if (valueTypeToken->getLexme().compare("sint32") == 0) - valueType = ValueType::SINT32; - else if (valueTypeToken->getLexme().compare("real32") == 0) - valueType = ValueType::REAL32; - else { - markError(TokenKind::TYPE, {}); - return nullptr; - } - - currentIndex++; // type + shared_ptr valueType = matchValueType(); // Expect left arrow if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) { @@ -207,7 +193,7 @@ shared_ptr Parser::matchStatementFunction() { } shared_ptr identifierToken = tokens.at(currentIndex++); shared_ptr argumentTypeToken = tokens.at(currentIndex++); - shared_ptr argumentType = ValueType::valueTypeForToken(argumentTypeToken); + shared_ptr argumentType = matchValueType(); if (argumentType == nullptr) { markError(TokenKind::TYPE, {}); return nullptr; @@ -221,14 +207,12 @@ shared_ptr Parser::matchStatementFunction() { if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - shared_ptr returnTypeToken = tokens.at(currentIndex); - returnType = ValueType::valueTypeForToken(returnTypeToken); + //shared_ptr returnTypeToken = tokens.at(currentIndex); + returnType = matchValueType(); if (returnType == nullptr) { markError(TokenKind::TYPE, {}); return nullptr; } - - currentIndex++; // type } // consume new line @@ -638,6 +622,46 @@ shared_ptr Parser::matchExpressionBlock(vector terminalTo return make_shared(statements); } +shared_ptr Parser::matchValueType() { + if (!tryMatchingTokenKinds({TokenKind::TYPE}, true, false)) + return nullptr; + shared_ptr typeToken = tokens.at(currentIndex++); + shared_ptr subType; + int valueArg = 0; + + if (tryMatchingTokenKinds({TokenKind::LESS}, true, true)) { + if (!tryMatchingTokenKinds({TokenKind::TYPE}, true, false)) { + markError(TokenKind::TYPE, {}); + return nullptr; + } + subType = matchValueType(); + if (subType == nullptr) + return subType; + + if (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) { + if (!tryMatchingTokenKinds({TokenKind::INTEGER_DEC, TokenKind::INTEGER_HEX, TokenKind::INTEGER_BIN, TokenKind::INTEGER_CHAR}, false, false)) { + markError({}, "Expected integer literal"); + return nullptr; + } + shared_ptr expressionValue = matchExpressionLiteral(); + if (expressionValue == nullptr) { + markError({}, "Expected integer literal"); + return nullptr; + } + + valueArg = dynamic_pointer_cast(expressionValue)->getSint32Value(); + } + + + if (!tryMatchingTokenKinds({TokenKind::GREATER}, true, true)) { + markError(TokenKind::GREATER, {}); + return nullptr; + } + } + + return ValueType::valueTypeForToken(typeToken, subType, valueArg); +} + bool Parser::tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance) { int requiredCount = shouldMatchAll ? kinds.size() : 1; if (currentIndex + requiredCount > tokens.size()) diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index d2db35f..952a087 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -3,9 +3,11 @@ #include +class Error; + class Token; enum class TokenKind; -class Error; +class ValueType; class Expression; class Statement; @@ -47,6 +49,8 @@ private: shared_ptr matchExpressionBinary(shared_ptr left); shared_ptr matchExpressionBlock(vector terminalTokenKinds); + shared_ptr matchValueType(); + bool tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance); void markError(optional expectedTokenKind, optional message); diff --git a/src/Parser/ValueType.cpp b/src/Parser/ValueType.cpp index abd399f..a5418b8 100644 --- a/src/Parser/ValueType.cpp +++ b/src/Parser/ValueType.cpp @@ -2,36 +2,38 @@ #include "Lexer/Token.h" -shared_ptr ValueType::NONE = make_shared(ValueTypeKind::NONE); -shared_ptr ValueType::BOOL = make_shared(ValueTypeKind::BOOL); -shared_ptr ValueType::SINT32 = make_shared(ValueTypeKind::SINT32); -shared_ptr ValueType::REAL32 = make_shared(ValueTypeKind::REAL32); +shared_ptr ValueType::NONE = make_shared(ValueTypeKind::NONE, nullptr, 0); +shared_ptr ValueType::BOOL = make_shared(ValueTypeKind::BOOL, nullptr, 0); +shared_ptr ValueType::SINT32 = make_shared(ValueTypeKind::SINT32, nullptr, 0); +shared_ptr ValueType::REAL32 = make_shared(ValueTypeKind::REAL32, nullptr, 0); -ValueType::ValueType(ValueTypeKind kind): -kind(kind) { } +ValueType::ValueType(ValueTypeKind kind, shared_ptr subType, int valueArg): +kind(kind), subType(subType), valueArg(valueArg) { } -shared_ptr ValueType::valueTypeForToken(shared_ptr token) { +shared_ptr ValueType::valueTypeForToken(shared_ptr token, shared_ptr subType, int valueArg) { switch (token->getKind()) { case TokenKind::TYPE: { string lexme = token->getLexme(); if (lexme.compare("bool") == 0) - return make_shared(ValueTypeKind::BOOL); + return make_shared(ValueTypeKind::BOOL, subType, valueArg); else if (lexme.compare("sint32") == 0) - return make_shared(ValueTypeKind::SINT32); + return make_shared(ValueTypeKind::SINT32, subType, valueArg); else if (lexme.compare("real32") == 0) - return make_shared(ValueTypeKind::REAL32); + return make_shared(ValueTypeKind::REAL32, subType, valueArg); + else if (lexme.compare("data") == 0) + return make_shared(ValueTypeKind::DATA, subType, valueArg); else return nullptr; } case TokenKind::BOOL: - return make_shared(ValueTypeKind::BOOL); + return make_shared(ValueTypeKind::BOOL, nullptr, 0); case TokenKind::INTEGER_DEC: case TokenKind::INTEGER_HEX: case TokenKind::INTEGER_BIN: case TokenKind::INTEGER_CHAR: - return make_shared(ValueTypeKind::SINT32); + return make_shared(ValueTypeKind::SINT32, nullptr, 0); case TokenKind::REAL: - return make_shared(ValueTypeKind::REAL32); + return make_shared(ValueTypeKind::REAL32, nullptr, 0); default: return nullptr; } @@ -39,4 +41,6 @@ shared_ptr ValueType::valueTypeForToken(shared_ptr token) { ValueTypeKind ValueType::getKind() { return kind; -} \ No newline at end of file +} + +shared_ptr getSubType() {} \ No newline at end of file diff --git a/src/Parser/ValueType.h b/src/Parser/ValueType.h index 2540cd8..ce118eb 100644 --- a/src/Parser/ValueType.h +++ b/src/Parser/ValueType.h @@ -11,22 +11,27 @@ enum class ValueTypeKind { NONE, BOOL, SINT32, - REAL32 + REAL32, + DATA }; class ValueType { private: ValueTypeKind kind; + shared_ptr subType; + int valueArg; public: static shared_ptr NONE; static shared_ptr BOOL; static shared_ptr SINT32; static shared_ptr REAL32; - static shared_ptr valueTypeForToken(shared_ptr token); + static shared_ptr valueTypeForToken(shared_ptr token, shared_ptr subType, int valueArg); - ValueType(ValueTypeKind kind); + ValueType(ValueTypeKind kind, shared_ptr subType, int valueArg); ValueTypeKind getKind(); + shared_ptr getSubType(); + int getValueArg(); }; #endif \ No newline at end of file From b980fd8753cd26cf59001cd97fc11310503d1b3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 8 Jul 2025 18:49:39 +0900 Subject: [PATCH 03/10] Working array init --- src/Compiler/ModuleBuilder.cpp | 59 ++++++++++++++++++++++------------ src/Compiler/ModuleBuilder.h | 3 +- src/Parser/ValueType.cpp | 8 ++++- 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index b438225..defeae3 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -127,27 +127,27 @@ void ModuleBuilder::buildFunctionDeclaration(shared_ptr state } void ModuleBuilder::buildVarDeclaration(shared_ptr statement) { - llvm::Value *value = valueForExpression(statement->getExpression()); - llvm::AllocaInst *alloca = builder->CreateAlloca(typeForValueType(statement->getValueType()), nullptr, statement->getName()); + if (statement->getValueType()->getKind() == ValueTypeKind::DATA) { + vector values = valuesForExpression(statement->getExpression()); - if (!setAlloca(statement->getName(), alloca)) - return; - builder->CreateStore(value, alloca); + llvm::ArrayType *type = (llvm::ArrayType *)typeForValueType(statement->getValueType(), values.size()); + llvm::AllocaInst *alloca = builder->CreateAlloca(type, nullptr, statement->getName()); + for (int i=0; i < type->getNumElements(); i++) { + llvm::Value *index[] = { + builder->getInt32(0), + builder->getInt32(i) + }; + llvm::Value *elementPtr = builder->CreateGEP(type, alloca, index, format("{}_{}", statement->getName(), i)); + builder->CreateStore(values[i], elementPtr); + } + } else { + llvm::Value *value = valueForExpression(statement->getExpression()); + llvm::AllocaInst *alloca = builder->CreateAlloca(typeForValueType(statement->getValueType(), 0), nullptr, statement->getName()); - /*auto *aType = llvm::ArrayType::get(typeSint32, 7); - llvm::AllocaInst *allocaArr = builder->CreateAlloca(aType, nullptr, statement->getName() + "_Arr"); - - //llvm::AllocaInst *allocaBrr = builder->CreateAlloca(typeSint32, nullptr, statement->getName() + "_Arr"); - vector values; - auto *bType = llvm::ArrayType::get(typeSint32, 9); - for (int i=0; i<9; i++) { - llvm::Constant *cnst = llvm::ConstantInt::get(typeSint32, i, true); - values.push_back(cnst); + if (!setAlloca(statement->getName(), alloca)) + return; + builder->CreateStore(value, alloca); } - llvm::Constant *ar = (llvm::ConstantArray *)llvm::ConstantArray::get(bType, values); - //auto vAr = ar->getAggregateElement(0); - llvm::AllocaInst *arAlloca = builder->CreateAlloca(bType, nullptr, "arBtype"); - builder->CreateStore(ar, arAlloca);*/ } void ModuleBuilder::buildArrayDeclaration(shared_ptr statement) { @@ -254,8 +254,7 @@ llvm::Value *ModuleBuilder::valueForExpression(shared_ptr expression switch (expression->getKind()) { case ExpressionKind::LITERAL: return valueForLiteral(dynamic_pointer_cast(expression)); - case ExpressionKind::ARRAY_LITERAL: - return nullptr;// valuesForArrayLiteral(dynamic_pointer_cast(expression)); + case ExpressionKind::GROUPING: return valueForExpression(dynamic_pointer_cast(expression)->getExpression()); case ExpressionKind::BINARY: @@ -272,6 +271,16 @@ llvm::Value *ModuleBuilder::valueForExpression(shared_ptr expression } } +vector ModuleBuilder::valuesForExpression(shared_ptr expression) { + switch (expression->getKind()) { + case ExpressionKind::ARRAY_LITERAL: + return valuesForArrayLiteral(dynamic_pointer_cast(expression)); + default: + markError(0, 0, "Unexpected expression"); + return vector(); + } +} + llvm::Value *ModuleBuilder::valueForLiteral(shared_ptr expression) { if (expression->getValueType() == nullptr) return llvm::UndefValue::get(typeVoid); @@ -505,7 +514,7 @@ llvm::Function* ModuleBuilder::getFun(string name) { return nullptr; } -llvm::Type *ModuleBuilder::typeForValueType(shared_ptr valueType) { +llvm::Type *ModuleBuilder::typeForValueType(shared_ptr valueType, int count) { switch (valueType->getKind()) { case ValueTypeKind::NONE: return typeVoid; @@ -515,6 +524,14 @@ llvm::Type *ModuleBuilder::typeForValueType(shared_ptr valueType) { return typeSint32; case ValueTypeKind::REAL32: return typeReal32; + case ValueTypeKind::DATA: { + if (valueType->getSubType() == nullptr) + return nullptr; + if (valueType->getValueArg() > 0) + count = valueType->getValueArg(); + return llvm::ArrayType::get(typeForValueType(valueType->getSubType(), count), count); + return nullptr; + } } } diff --git a/src/Compiler/ModuleBuilder.h b/src/Compiler/ModuleBuilder.h index 76e724c..4550822 100644 --- a/src/Compiler/ModuleBuilder.h +++ b/src/Compiler/ModuleBuilder.h @@ -71,6 +71,7 @@ private: void buildExpression(shared_ptr statement); llvm::Value *valueForExpression(shared_ptr expression); + vector valuesForExpression(shared_ptr expression); llvm::Value *valueForLiteral(shared_ptr expression); vector valuesForArrayLiteral(shared_ptr expression); llvm::Value *valueForGrouping(shared_ptr expression); @@ -88,7 +89,7 @@ private: bool setFun(string name, llvm::Function *fun); llvm::Function *getFun(string name); - llvm::Type *typeForValueType(shared_ptr valueType); + llvm::Type *typeForValueType(shared_ptr valueType, int count = 0); void markError(int line, int column, string message); diff --git a/src/Parser/ValueType.cpp b/src/Parser/ValueType.cpp index a5418b8..6f5945d 100644 --- a/src/Parser/ValueType.cpp +++ b/src/Parser/ValueType.cpp @@ -43,4 +43,10 @@ ValueTypeKind ValueType::getKind() { return kind; } -shared_ptr getSubType() {} \ No newline at end of file +shared_ptr ValueType::getSubType() { + return subType; +} + +int ValueType::getValueArg() { + return valueArg; +} \ No newline at end of file From 1c19600430a015a712d1c3d2b50193a18f3de770 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 8 Jul 2025 19:20:31 +0900 Subject: [PATCH 04/10] Fixed parser --- .gitignore | 12 +++++++++--- .vscode/launch.json | 2 +- src/Parser/Parser.cpp | 10 +++++----- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 0f82e52..6ec6bb1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,12 @@ -.DS_Store +# ignore files without extensions +* +!*.* +# brb build artifiacts *.o -brb + +.DS_Store .vscode/settings.json + +# project build artifacts *.dSYM -build/ \ No newline at end of file +build/ diff --git a/.vscode/launch.json b/.vscode/launch.json index 2dd8039..4b91f3e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,7 +6,7 @@ "type": "lldb-dap", "request": "launch", "program": "${command:cmake.launchTargetPath}", - "args": ["-v", "${workspaceFolder}/test.brc"], + "args": ["-v", "${workspaceFolder}/samples/test.brc"], "cwd": "${workspaceFolder}", "internalConsoleOptions": "openOnSessionStart", } diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 441464a..177203d 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -122,7 +122,7 @@ shared_ptr Parser::matchStatementMetaExternFunction() { return nullptr; } shared_ptr identifierToken = tokens.at(currentIndex++); - shared_ptr argumentTypeToken = tokens.at(currentIndex++); + //shared_ptr argumentTypeToken = tokens.at(currentIndex++); shared_ptr argumentType = matchValueType(); if (argumentType == nullptr) { markError(TokenKind::TYPE, {}); @@ -143,8 +143,6 @@ shared_ptr Parser::matchStatementMetaExternFunction() { markError(TokenKind::TYPE, {}); return nullptr; } - - currentIndex++; // type } return make_shared(identifierToken->getLexme(), arguments, returnType); @@ -192,7 +190,6 @@ shared_ptr Parser::matchStatementFunction() { return nullptr; } shared_ptr identifierToken = tokens.at(currentIndex++); - shared_ptr argumentTypeToken = tokens.at(currentIndex++); shared_ptr argumentType = matchValueType(); if (argumentType == nullptr) { markError(TokenKind::TYPE, {}); @@ -207,7 +204,6 @@ shared_ptr Parser::matchStatementFunction() { if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - //shared_ptr returnTypeToken = tokens.at(currentIndex); returnType = matchValueType(); if (returnType == nullptr) { markError(TokenKind::TYPE, {}); @@ -703,5 +699,9 @@ void Parser::markError(optional expectedTokenKind, optional m while (!tryMatchingTokenKinds(safeKinds, false, true)) currentIndex++; + // Last END should not be consumed + if (currentIndex > tokens.size() - 1) + currentIndex = tokens.size() - 1; + errors.push_back(Error::parserError(actualToken, expectedTokenKind, message)); } From e53a844a28557951aee5693675a5d4d9b4d0b47c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 8 Jul 2025 22:26:24 +0900 Subject: [PATCH 05/10] String literals --- .gitignore | 9 ++-- samples/test.brc | 7 ++++ src/Lexer/Lexer.cpp | 25 +++++++++++ src/Lexer/Lexer.h | 1 + src/Lexer/Token.cpp | 3 +- src/Lexer/Token.h | 1 + src/Logger.cpp | 4 ++ .../Expression/ExpressionArrayLiteral.cpp | 28 ++++++++++++- .../Expression/ExpressionArrayLiteral.h | 3 ++ src/Parser/Expression/ExpressionLiteral.cpp | 18 ++++++++ src/Parser/Expression/ExpressionLiteral.h | 2 + src/Parser/Parser.cpp | 41 ++++++++++--------- src/Utils.cpp | 34 +++++++++++++++ src/Utils.h | 13 ++++++ 14 files changed, 162 insertions(+), 27 deletions(-) create mode 100644 samples/test.brc create mode 100644 src/Utils.cpp create mode 100644 src/Utils.h diff --git a/.gitignore b/.gitignore index 6ec6bb1..94ff068 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,9 @@ -# ignore files without extensions -* -!*.* -# brb build artifiacts -*.o - .DS_Store .vscode/settings.json # project build artifacts *.dSYM build/ + +# brb build artifiacts +*.o diff --git a/samples/test.brc b/samples/test.brc new file mode 100644 index 0000000..724f225 --- /dev/null +++ b/samples/test.brc @@ -0,0 +1,7 @@ +@extern putchar fun: character sint32 -> sint32 + +main fun -> sint32 + text data <- "Hello string!\n" + + ret 0 +; \ No newline at end of file diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index b2a1b71..c00f19c 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -250,6 +250,10 @@ shared_ptr Lexer::nextToken() { if (token != nullptr) return token; + token = matchString(); + if (token != nullptr) + return token; + // type token = matchType(); if (token != nullptr) @@ -405,6 +409,27 @@ shared_ptr Lexer::matchReal() { return token; } +shared_ptr Lexer::matchString() { + int nextIndex = currentIndex; + + if (currentIndex >= source.size() || source.at(nextIndex) != '\"') + return nullptr; + + bool isClosing = false; + do { + nextIndex++; + isClosing = source.at(nextIndex) == '\"' && source.at(nextIndex - 1) != '\\'; + } while (nextIndex < source.length() && !isClosing); + + if (!isClosing) + return nullptr; + + string lexme = source.substr(currentIndex, nextIndex - currentIndex + 1); + shared_ptr token = make_shared(TokenKind::STRING, lexme, currentLine, currentColumn); + advanceWithToken(token); + return token; +} + shared_ptr Lexer::matchIdentifier() { int nextIndex = currentIndex; diff --git a/src/Lexer/Lexer.h b/src/Lexer/Lexer.h index 41393c8..9cc8cc6 100644 --- a/src/Lexer/Lexer.h +++ b/src/Lexer/Lexer.h @@ -25,6 +25,7 @@ private: shared_ptr matchIntegerBin(); shared_ptr matchIntegerChar(); shared_ptr matchReal(); + shared_ptr matchString(); shared_ptr matchType(); shared_ptr matchIdentifier(); shared_ptr matchEnd(); diff --git a/src/Lexer/Token.cpp b/src/Lexer/Token.cpp index 2d4817d..5fcc1df 100644 --- a/src/Lexer/Token.cpp +++ b/src/Lexer/Token.cpp @@ -41,7 +41,8 @@ vector Token::tokensLiteral = { TokenKind::INTEGER_HEX, TokenKind::INTEGER_BIN, TokenKind::INTEGER_CHAR, - TokenKind::REAL + TokenKind::REAL, + TokenKind::STRING }; Token::Token(TokenKind kind, string lexme, int line, int column): diff --git a/src/Lexer/Token.h b/src/Lexer/Token.h index 7c15be1..2c4ce2e 100644 --- a/src/Lexer/Token.h +++ b/src/Lexer/Token.h @@ -41,6 +41,7 @@ enum class TokenKind { INTEGER_BIN, INTEGER_CHAR, REAL, + STRING, IDENTIFIER, TYPE, diff --git a/src/Logger.cpp b/src/Logger.cpp index cc48ef9..ef0883f 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -84,6 +84,8 @@ string Logger::toString(shared_ptr token) { return "INT_CHAR(" + token->getLexme() + ")"; case TokenKind::REAL: return "REAL(" + token->getLexme() + ")"; + case TokenKind::STRING: + return "STRING(" + token->getLexme() + ")"; case TokenKind::IDENTIFIER: return "ID(" + token->getLexme() + ")"; case TokenKind::TYPE: @@ -164,6 +166,8 @@ string Logger::toString(TokenKind tokenKind) { return "LITERAL(INTEGER)"; case TokenKind::REAL: return "LITERAL(REAL)"; + case TokenKind::STRING: + return "LITERAL(STRING)"; case TokenKind::IDENTIFIER: return "LITERAL(ID)"; case TokenKind::TYPE: diff --git a/src/Parser/Expression/ExpressionArrayLiteral.cpp b/src/Parser/Expression/ExpressionArrayLiteral.cpp index 2e6a7e5..73a7598 100644 --- a/src/Parser/Expression/ExpressionArrayLiteral.cpp +++ b/src/Parser/Expression/ExpressionArrayLiteral.cpp @@ -1,8 +1,34 @@ #include "ExpressionArrayLiteral.h" +#include "Lexer/Token.h" +#include "Parser/Expression/ExpressionLiteral.h" + ExpressionArrayLiteral::ExpressionArrayLiteral(vector> expressions): Expression(ExpressionKind::ARRAY_LITERAL, nullptr), expressions(expressions) { } +shared_ptr ExpressionArrayLiteral::expressionArrayLiteralForExpressions(vector> expressions) { + return make_shared(expressions); +} + +shared_ptr ExpressionArrayLiteral::expressionArrayLiteralForTokenString(shared_ptr tokenString) { + if (tokenString->getKind() != TokenKind::STRING) + return nullptr; + + vector> expressions; + string stringValue = tokenString->getLexme(); + for (int i=1; i token = make_shared(TokenKind::INTEGER_CHAR, lexme, tokenString->getLine(), tokenString->getColumn() + i); + shared_ptr expression = ExpressionLiteral::expressionLiteralForToken(token); + expressions.push_back(expression); + } + return make_shared(expressions); +} + vector> ExpressionArrayLiteral::getExpressions() { return expressions; -} \ No newline at end of file +} \ No newline at end of file diff --git a/src/Parser/Expression/ExpressionArrayLiteral.h b/src/Parser/Expression/ExpressionArrayLiteral.h index ae56585..d2a2faf 100644 --- a/src/Parser/Expression/ExpressionArrayLiteral.h +++ b/src/Parser/Expression/ExpressionArrayLiteral.h @@ -9,6 +9,9 @@ private: public: ExpressionArrayLiteral(vector> expressions); + //ExpressionArrayLiteral(shared_ptr tokenString); + static shared_ptr expressionArrayLiteralForExpressions(vector> expressions); + static shared_ptr expressionArrayLiteralForTokenString(shared_ptr tokenString); vector> getExpressions(); }; diff --git a/src/Parser/Expression/ExpressionLiteral.cpp b/src/Parser/Expression/ExpressionLiteral.cpp index de518d5..a66913c 100644 --- a/src/Parser/Expression/ExpressionLiteral.cpp +++ b/src/Parser/Expression/ExpressionLiteral.cpp @@ -1,8 +1,26 @@ #include "ExpressionLiteral.h" +#include "Utils.h" #include "Lexer/Token.h" #include "Parser/ValueType.h" +shared_ptr ExpressionLiteral::expressionLiteralForToken(shared_ptr token) { + switch (token->getKind()) { + case TokenKind::INTEGER_CHAR: { + string charString = token->getLexme(); + optional charValue = Utils::charStringToInt(charString); + if (!charValue) + return nullptr; + shared_ptr expression = make_shared(); + expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); + expression->sint32Value = *charValue; + return expression; + } + default: + return nullptr; + } +} + ExpressionLiteral::ExpressionLiteral(): Expression(ExpressionKind::LITERAL, nullptr) { } diff --git a/src/Parser/Expression/ExpressionLiteral.h b/src/Parser/Expression/ExpressionLiteral.h index a693cd0..6e61279 100644 --- a/src/Parser/Expression/ExpressionLiteral.h +++ b/src/Parser/Expression/ExpressionLiteral.h @@ -10,6 +10,8 @@ private: float real32Value; public: + static shared_ptr expressionLiteralForToken(shared_ptr token); + ExpressionLiteral(shared_ptr token); ExpressionLiteral(); bool getBoolValue(); diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 177203d..c715d08 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -421,11 +421,11 @@ shared_ptr Parser::matchPrimary() { if (expression != nullptr) return expression; - expression = matchExpressionLiteral(); - if (expression != nullptr) - return expression; - expression = matchExpressionArrayLiteral(); + if (expression != nullptr) + return expression; + + expression = matchExpressionLiteral(); if (expression != nullptr) return expression; @@ -466,25 +466,28 @@ shared_ptr Parser::matchExpressionLiteral() { } shared_ptr Parser::matchExpressionArrayLiteral() { - if (!tryMatchingTokenKinds({TokenKind::LEFT_SQUARE_BRACKET}, true, true)) - return nullptr; - - vector> expressions; - if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { - do { - shared_ptr expression = nextExpression(); - if (expression != nullptr) - expressions.push_back(expression); - } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); - + if (tryMatchingTokenKinds({TokenKind::STRING}, true, false)) { + return ExpressionArrayLiteral::expressionArrayLiteralForTokenString(tokens.at(currentIndex++)); + } else if (tryMatchingTokenKinds({TokenKind::LEFT_SQUARE_BRACKET}, true, true)) { + vector> expressions; if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { - markError(TokenKind::RIGHT_SQUARE_BRACKET, {}); - return nullptr; + do { + shared_ptr expression = nextExpression(); + if (expression != nullptr) + expressions.push_back(expression); + } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); + + if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { + markError(TokenKind::RIGHT_SQUARE_BRACKET, {}); + return nullptr; + } } + + + return make_shared(expressions); } - - return make_shared(expressions); + return nullptr; } shared_ptr Parser::matchExpressionVariable() { diff --git a/src/Utils.cpp b/src/Utils.cpp new file mode 100644 index 0000000..ab42c4c --- /dev/null +++ b/src/Utils.cpp @@ -0,0 +1,34 @@ +#include "Utils.h" + +optional Utils::charStringToInt(string charString) { + switch (charString.length()) { + case 1: + return charString[0]; + case 3: + return charString[1]; + case 4: + charString[0] = charString[1]; + charString[1] = charString[2]; + case 2: + if (charString[0] != '\\') + return {}; + switch (charString[1]) { + case 'b': + return '\b'; + case 'n': + return '\n'; + case 't': + return '\t'; + case '\\': + return '\\'; + case '\'': + return '\''; + case '\"': + return '\"'; + default: + return {}; + } + default: + return {}; + } +} \ No newline at end of file diff --git a/src/Utils.h b/src/Utils.h new file mode 100644 index 0000000..076808d --- /dev/null +++ b/src/Utils.h @@ -0,0 +1,13 @@ +#ifndef UTILS_H +#define UTILS_H + +#include + +using namespace std; + +class Utils { +public: + static optional charStringToInt(string charString); +}; + +#endif \ No newline at end of file From 22f71bdc33a5495f25fcb7821f88ab24b7148dc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 8 Jul 2025 22:35:32 +0900 Subject: [PATCH 06/10] Cleaned up array literal --- src/Parser/Expression/ExpressionArrayLiteral.cpp | 16 +++++++++++----- src/Parser/Expression/ExpressionArrayLiteral.h | 6 +++--- src/Parser/Parser.cpp | 3 +-- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/Parser/Expression/ExpressionArrayLiteral.cpp b/src/Parser/Expression/ExpressionArrayLiteral.cpp index 73a7598..90fb29d 100644 --- a/src/Parser/Expression/ExpressionArrayLiteral.cpp +++ b/src/Parser/Expression/ExpressionArrayLiteral.cpp @@ -3,17 +3,18 @@ #include "Lexer/Token.h" #include "Parser/Expression/ExpressionLiteral.h" -ExpressionArrayLiteral::ExpressionArrayLiteral(vector> expressions): -Expression(ExpressionKind::ARRAY_LITERAL, nullptr), expressions(expressions) { } - shared_ptr ExpressionArrayLiteral::expressionArrayLiteralForExpressions(vector> expressions) { - return make_shared(expressions); + shared_ptr expression = make_shared(); + expression->expressions = expressions; + return expression; } shared_ptr ExpressionArrayLiteral::expressionArrayLiteralForTokenString(shared_ptr tokenString) { if (tokenString->getKind() != TokenKind::STRING) return nullptr; + shared_ptr expression = make_shared(); + vector> expressions; string stringValue = tokenString->getLexme(); for (int i=1; i ExpressionArrayLiteral::expressionArrayLitera shared_ptr expression = ExpressionLiteral::expressionLiteralForToken(token); expressions.push_back(expression); } - return make_shared(expressions); + + expression->expressions = expressions; + return expression; } +ExpressionArrayLiteral::ExpressionArrayLiteral(): +Expression(ExpressionKind::ARRAY_LITERAL, nullptr) { } + vector> ExpressionArrayLiteral::getExpressions() { return expressions; } \ No newline at end of file diff --git a/src/Parser/Expression/ExpressionArrayLiteral.h b/src/Parser/Expression/ExpressionArrayLiteral.h index d2a2faf..3f74e32 100644 --- a/src/Parser/Expression/ExpressionArrayLiteral.h +++ b/src/Parser/Expression/ExpressionArrayLiteral.h @@ -6,12 +6,12 @@ class ExpressionArrayLiteral: public Expression { private: vector> expressions; - + public: - ExpressionArrayLiteral(vector> expressions); - //ExpressionArrayLiteral(shared_ptr tokenString); static shared_ptr expressionArrayLiteralForExpressions(vector> expressions); static shared_ptr expressionArrayLiteralForTokenString(shared_ptr tokenString); + + ExpressionArrayLiteral(); vector> getExpressions(); }; diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index c715d08..1689305 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -483,8 +483,7 @@ shared_ptr Parser::matchExpressionArrayLiteral() { } } - - return make_shared(expressions); + return ExpressionArrayLiteral::expressionArrayLiteralForExpressions(expressions); } return nullptr; From 707a28270427fdc4f2b9b8cf72a7ebafb28ab8e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 8 Jul 2025 22:41:27 +0900 Subject: [PATCH 07/10] Cleaned up literal --- src/Parser/Expression/ExpressionLiteral.cpp | 82 ++++++--------------- src/Parser/Expression/ExpressionLiteral.h | 1 - src/Parser/Parser.cpp | 2 +- 3 files changed, 25 insertions(+), 60 deletions(-) diff --git a/src/Parser/Expression/ExpressionLiteral.cpp b/src/Parser/Expression/ExpressionLiteral.cpp index a66913c..648046e 100644 --- a/src/Parser/Expression/ExpressionLiteral.cpp +++ b/src/Parser/Expression/ExpressionLiteral.cpp @@ -5,93 +5,59 @@ #include "Parser/ValueType.h" shared_ptr ExpressionLiteral::expressionLiteralForToken(shared_ptr token) { - switch (token->getKind()) { - case TokenKind::INTEGER_CHAR: { - string charString = token->getLexme(); - optional charValue = Utils::charStringToInt(charString); - if (!charValue) - return nullptr; - shared_ptr expression = make_shared(); - expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); - expression->sint32Value = *charValue; - return expression; - } - default: - return nullptr; - } -} + shared_ptr expression = make_shared(); -ExpressionLiteral::ExpressionLiteral(): -Expression(ExpressionKind::LITERAL, nullptr) { } - -ExpressionLiteral::ExpressionLiteral(shared_ptr token): -Expression(ExpressionKind::LITERAL, nullptr) { switch (token->getKind()) { case TokenKind::BOOL: - boolValue = token->getLexme().compare("true") == 0; - valueType = ValueType::valueTypeForToken(token, nullptr, 0); + expression->boolValue = token->getLexme().compare("true") == 0; + expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; case TokenKind::INTEGER_DEC: { string numString = token->getLexme(); erase(numString, '_'); - sint32Value = stoi(numString, nullptr, 10); - valueType = ValueType::valueTypeForToken(token, nullptr, 0); + expression->sint32Value = stoi(numString, nullptr, 10); + expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; } case TokenKind::INTEGER_HEX: { string numString = token->getLexme(); erase(numString, '_'); - sint32Value = stoi(numString, nullptr, 16); - valueType = ValueType::valueTypeForToken(token, nullptr, 0); + expression->sint32Value = stoi(numString, nullptr, 16); + expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; } case TokenKind::INTEGER_BIN: { string numString = token->getLexme(); erase(numString, '_'); numString = numString.substr(2, numString.size()-1); - sint32Value = stoi(numString, nullptr, 2); - valueType = ValueType::valueTypeForToken(token, nullptr, 0); + expression->sint32Value = stoi(numString, nullptr, 2); + expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; } case TokenKind::INTEGER_CHAR: { string charString = token->getLexme(); - - valueType = ValueType::valueTypeForToken(token, nullptr, 0); - if (charString.length() == 3) { - sint32Value = charString[1]; - } else if (charString.length() == 4 && charString[1] == '\\') { - switch (charString[2]) { - case 'b': - sint32Value = '\b'; - break; - case 'n': - sint32Value = '\n'; - break; - case 't': - sint32Value = '\t'; - break; - case '\\': - sint32Value = '\\'; - break; - case '\'': - sint32Value = '\''; - break; - case '\"': - sint32Value = '\"'; - break; - } - } - break; + optional charValue = Utils::charStringToInt(charString); + if (!charValue) + return nullptr; + + expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); + expression->sint32Value = *charValue; + return expression; } case TokenKind::REAL: - real32Value = stof(token->getLexme()); - valueType = ValueType::valueTypeForToken(token, nullptr, 0); + expression->real32Value = stof(token->getLexme()); + expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; default: - exit(1); + return nullptr; } + + return expression; } +ExpressionLiteral::ExpressionLiteral(): +Expression(ExpressionKind::LITERAL, nullptr) { } + bool ExpressionLiteral::getBoolValue() { return boolValue; } diff --git a/src/Parser/Expression/ExpressionLiteral.h b/src/Parser/Expression/ExpressionLiteral.h index 6e61279..89b545e 100644 --- a/src/Parser/Expression/ExpressionLiteral.h +++ b/src/Parser/Expression/ExpressionLiteral.h @@ -12,7 +12,6 @@ private: public: static shared_ptr expressionLiteralForToken(shared_ptr token); - ExpressionLiteral(shared_ptr token); ExpressionLiteral(); bool getBoolValue(); int32_t getSint32Value(); diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 1689305..916b078 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -460,7 +460,7 @@ shared_ptr Parser::matchExpressionLiteral() { shared_ptr token = tokens.at(currentIndex); if (tryMatchingTokenKinds(Token::tokensLiteral, false, true)) - return make_shared(token); + return ExpressionLiteral::expressionLiteralForToken(token); return nullptr; } From 870ecce8e4921aba014841c42061d94582c6ec1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 8 Jul 2025 22:49:04 +0900 Subject: [PATCH 08/10] Almost hello world --- .vscode/launch.json | 2 +- samples/hello.brc | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 samples/hello.brc diff --git a/.vscode/launch.json b/.vscode/launch.json index 4b91f3e..5574805 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,7 +6,7 @@ "type": "lldb-dap", "request": "launch", "program": "${command:cmake.launchTargetPath}", - "args": ["-v", "${workspaceFolder}/samples/test.brc"], + "args": ["-v", "${workspaceFolder}/samples/hello.brc"], "cwd": "${workspaceFolder}", "internalConsoleOptions": "openOnSessionStart", } diff --git a/samples/hello.brc b/samples/hello.brc new file mode 100644 index 0000000..0ed1ff9 --- /dev/null +++ b/samples/hello.brc @@ -0,0 +1,12 @@ +@extern putchar fun: character sint32 -> sint32 + +main fun -> sint32 + text data <- "Hello string!\n" + + rep i sint32 <- 0, i < 14: + putchar(text[i]) + i <- i + 1 + ; + + ret 0 +; \ No newline at end of file From 5db6703d48ea300707e586b8cc207a8faf353cf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Wed, 9 Jul 2025 17:47:59 +0900 Subject: [PATCH 09/10] Read array elements --- samples/hello.brc | 6 +-- samples/test.brc | 18 +++++++++ src/Compiler/ModuleBuilder.cpp | 25 ++++++++++--- src/Compiler/ModuleBuilder.h | 1 - src/Logger.cpp | 6 ++- .../Expression/ExpressionArrayLiteral.cpp | 7 ++++ src/Parser/Expression/ExpressionVariable.cpp | 8 +++- src/Parser/Expression/ExpressionVariable.h | 4 +- src/Parser/Parser.cpp | 37 +++++++++++++------ src/Utils.cpp | 2 + 10 files changed, 88 insertions(+), 26 deletions(-) diff --git a/samples/hello.brc b/samples/hello.brc index 0ed1ff9..b8fa9d7 100644 --- a/samples/hello.brc +++ b/samples/hello.brc @@ -1,9 +1,9 @@ @extern putchar fun: character sint32 -> sint32 main fun -> sint32 - text data <- "Hello string!\n" - - rep i sint32 <- 0, i < 14: + text data <- "Hello, world!\n" + + rep i sint32 <- 0, text[i] != 0: putchar(text[i]) i <- i + 1 ; diff --git a/samples/test.brc b/samples/test.brc index 724f225..f71b061 100644 --- a/samples/test.brc +++ b/samples/test.brc @@ -1,5 +1,23 @@ @extern putchar fun: character sint32 -> sint32 +/* +User type + name data + age u32 + successRatio r32 + isActive bool +; +*/ + +/* +i u32 <- 0, rep text[i] != 0: + putchar(text[i]) + i++ +; +*/ + +// text data <- "Hello world!" + main fun -> sint32 text data <- "Hello string!\n" diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index defeae3..0215dda 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -132,12 +132,15 @@ void ModuleBuilder::buildVarDeclaration(shared_ptr statement) llvm::ArrayType *type = (llvm::ArrayType *)typeForValueType(statement->getValueType(), values.size()); llvm::AllocaInst *alloca = builder->CreateAlloca(type, nullptr, statement->getName()); + if (!setAlloca(statement->getName(), alloca)) + return; for (int i=0; i < type->getNumElements(); i++) { llvm::Value *index[] = { builder->getInt32(0), builder->getInt32(i) }; llvm::Value *elementPtr = builder->CreateGEP(type, alloca, index, format("{}_{}", statement->getName(), i)); + builder->CreateStore(values[i], elementPtr); } } else { @@ -150,10 +153,6 @@ void ModuleBuilder::buildVarDeclaration(shared_ptr statement) } } -void ModuleBuilder::buildArrayDeclaration(shared_ptr statement) { - -} - void ModuleBuilder::buildAssignment(shared_ptr statement) { llvm::AllocaInst *alloca = getAlloca(statement->getName()); if (alloca == nullptr) @@ -254,7 +253,6 @@ llvm::Value *ModuleBuilder::valueForExpression(shared_ptr expression switch (expression->getKind()) { case ExpressionKind::LITERAL: return valueForLiteral(dynamic_pointer_cast(expression)); - case ExpressionKind::GROUPING: return valueForExpression(dynamic_pointer_cast(expression)->getExpression()); case ExpressionKind::BINARY: @@ -313,6 +311,9 @@ llvm::Value *ModuleBuilder::valueForBinary(shared_ptr expressi llvm::Value *leftValue = valueForExpression(expression->getLeft()); llvm::Value *rightValue = valueForExpression(expression->getRight()); + if (leftValue == nullptr || rightValue == nullptr) + return nullptr; + llvm::Type *type = leftValue->getType(); if (type == typeBool) { @@ -450,7 +451,19 @@ llvm::Value *ModuleBuilder::valueForVar(shared_ptr expressio if (alloca == nullptr) return nullptr; - return builder->CreateLoad(alloca->getAllocatedType(), alloca, expression->getName()); + if (expression->getIndexExpression()) { + llvm::Value *indexValue = valueForExpression(expression->getIndexExpression()); + llvm::Value *index[] = { + builder->getInt32(0), + indexValue + }; + llvm::ArrayType *type = (llvm::ArrayType *)alloca->getAllocatedType(); + llvm::Value *elementPtr = builder->CreateGEP(type, alloca, index, format("{}[]", expression->getName())); + + return builder->CreateLoad(type->getArrayElementType(), elementPtr); + } else { + return builder->CreateLoad(alloca->getAllocatedType(), alloca, expression->getName()); + } } llvm::Value *ModuleBuilder::valueForCall(shared_ptr expression) { diff --git a/src/Compiler/ModuleBuilder.h b/src/Compiler/ModuleBuilder.h index 4550822..783e984 100644 --- a/src/Compiler/ModuleBuilder.h +++ b/src/Compiler/ModuleBuilder.h @@ -62,7 +62,6 @@ private: void buildStatement(shared_ptr statement); void buildFunctionDeclaration(shared_ptr statement); void buildVarDeclaration(shared_ptr statement); - void buildArrayDeclaration(shared_ptr statement); void buildAssignment(shared_ptr statement); void buildBlock(shared_ptr statement); void buildReturn(shared_ptr statement); diff --git a/src/Logger.cpp b/src/Logger.cpp index ef0883f..9055750 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -376,7 +376,11 @@ string Logger::toString(shared_ptr expression) { } string Logger::toString(shared_ptr expression) { - return format("VAR({})", expression->getName()); + string text = format("VAR({}", expression->getName()); + if (expression->getIndexExpression() != nullptr) + text += format("|{}", toString(expression->getIndexExpression())); + text += ")"; + return text; } string Logger::toString(shared_ptr expression) { diff --git a/src/Parser/Expression/ExpressionArrayLiteral.cpp b/src/Parser/Expression/ExpressionArrayLiteral.cpp index 90fb29d..da5e9df 100644 --- a/src/Parser/Expression/ExpressionArrayLiteral.cpp +++ b/src/Parser/Expression/ExpressionArrayLiteral.cpp @@ -26,6 +26,13 @@ shared_ptr ExpressionArrayLiteral::expressionArrayLitera shared_ptr token = make_shared(TokenKind::INTEGER_CHAR, lexme, tokenString->getLine(), tokenString->getColumn() + i); shared_ptr expression = ExpressionLiteral::expressionLiteralForToken(token); expressions.push_back(expression); + + // add terminal 0 if missing + if (i == stringValue.length() - 2 && lexme.compare("\\0") != 0) { + shared_ptr token = make_shared(TokenKind::INTEGER_CHAR, "\\0", tokenString->getLine(), tokenString->getColumn() + i + lexme.length()); + shared_ptr expression = ExpressionLiteral::expressionLiteralForToken(token); + expressions.push_back(expression); + } } expression->expressions = expressions; diff --git a/src/Parser/Expression/ExpressionVariable.cpp b/src/Parser/Expression/ExpressionVariable.cpp index b75cddf..61a03e2 100644 --- a/src/Parser/Expression/ExpressionVariable.cpp +++ b/src/Parser/Expression/ExpressionVariable.cpp @@ -1,8 +1,12 @@ #include "ExpressionVariable.h" -ExpressionVariable::ExpressionVariable(string name): -Expression(ExpressionKind::VAR, nullptr), name(name) { } +ExpressionVariable::ExpressionVariable(string name, shared_ptr indexExpression): +Expression(ExpressionKind::VAR, nullptr), name(name), indexExpression(indexExpression) { } string ExpressionVariable::getName() { return name; } + +shared_ptr ExpressionVariable::getIndexExpression() { + return indexExpression; +} diff --git a/src/Parser/Expression/ExpressionVariable.h b/src/Parser/Expression/ExpressionVariable.h index bd0b99f..5835d9a 100644 --- a/src/Parser/Expression/ExpressionVariable.h +++ b/src/Parser/Expression/ExpressionVariable.h @@ -3,8 +3,10 @@ class ExpressionVariable: public Expression { private: string name; + shared_ptr indexExpression; public: - ExpressionVariable(string name); + ExpressionVariable(string name, shared_ptr indexExpression); string getName(); + shared_ptr getIndexExpression(); }; \ No newline at end of file diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 916b078..d7aecec 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -292,7 +292,7 @@ shared_ptr Parser::matchStatementRepeat() { // got initial, expect comma if (initStatement != nullptr && !tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) { markError(TokenKind::COMMA, {}); - return nullptr; + goto afterIf; } // optional new line @@ -305,7 +305,7 @@ shared_ptr Parser::matchStatementRepeat() { // got pre-condition, expect comma if (!tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) { markError(TokenKind::COMMA, {}); - return nullptr; + goto afterIf; } // optional new line @@ -317,10 +317,11 @@ shared_ptr Parser::matchStatementRepeat() { // expect colon if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { markError(TokenKind::COLON, {}); - return nullptr; + goto afterIf; } } } + afterIf: isMultiLine = tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); @@ -416,25 +417,26 @@ shared_ptr Parser::matchFactor() { shared_ptr Parser::matchPrimary() { shared_ptr expression; + int errorsCount = errors.size(); expression = matchExpressionGrouping(); - if (expression != nullptr) + if (expression != nullptr || errors.size() > errorsCount) return expression; expression = matchExpressionArrayLiteral(); - if (expression != nullptr) + if (expression != nullptr || errors.size() > errorsCount) return expression; expression = matchExpressionLiteral(); - if (expression != nullptr) + if (expression != nullptr || errors.size() > errorsCount) return expression; expression = matchExpressionCall(); - if (expression != nullptr) + if (expression != nullptr || errors.size() > errorsCount) return expression; expression = matchExpressionVariable(); - if (expression != nullptr) + if (expression != nullptr || errors.size() > errorsCount) return expression; return nullptr; @@ -490,12 +492,23 @@ shared_ptr Parser::matchExpressionArrayLiteral() { } shared_ptr Parser::matchExpressionVariable() { - shared_ptr token = tokens.at(currentIndex); + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER}, true, false)) + return nullptr; + shared_ptr idToken = tokens.at(currentIndex++); + shared_ptr indexExpression; - if (tryMatchingTokenKinds({TokenKind::IDENTIFIER}, true, true)) - return make_shared(token->getLexme()); + if (tryMatchingTokenKinds({TokenKind::LEFT_SQUARE_BRACKET}, true, true)) { + indexExpression = nextExpression(); + if (indexExpression == nullptr) + return nullptr; + + if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { + markError(TokenKind::RIGHT_SQUARE_BRACKET, {}); + return nullptr; + } + } - return nullptr; + return make_shared(idToken->getLexme(), indexExpression); } shared_ptr Parser::matchExpressionCall() { diff --git a/src/Utils.cpp b/src/Utils.cpp index ab42c4c..d3ccdbd 100644 --- a/src/Utils.cpp +++ b/src/Utils.cpp @@ -25,6 +25,8 @@ optional Utils::charStringToInt(string charString) { return '\''; case '\"': return '\"'; + case '0': + return '\0'; default: return {}; } From 8d50e28ac8d2e59dc1192b109cdc234597524cd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Thu, 10 Jul 2025 14:28:13 +0900 Subject: [PATCH 10/10] Array assignment --- src/Compiler/ModuleBuilder.cpp | 15 ++++++++++- src/Parser/Parser.cpp | 26 +++++++++++++++++--- src/Parser/Statement/StatementAssignment.cpp | 8 ++++-- src/Parser/Statement/StatementAssignment.h | 4 ++- 4 files changed, 45 insertions(+), 8 deletions(-) diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index 0215dda..4e1aaa1 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -159,7 +159,20 @@ void ModuleBuilder::buildAssignment(shared_ptr statement) { return; llvm::Value *value = valueForExpression(statement->getExpression()); - builder->CreateStore(value, alloca); + + if (statement->getIndexExpression()) { + llvm::Value *indexValue = valueForExpression(statement->getIndexExpression()); + llvm::Value *index[] = { + builder->getInt32(0), + indexValue + }; + llvm::ArrayType *type = (llvm::ArrayType *)alloca->getAllocatedType(); + llvm::Value *elementPtr = builder->CreateGEP(type, alloca, index, format("{}[]", statement->getName())); + + builder->CreateStore(value, elementPtr); + } else { + builder->CreateStore(value, alloca); + } } void ModuleBuilder::buildBlock(shared_ptr statement) { diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index d7aecec..c8922ed 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -250,17 +250,35 @@ shared_ptr Parser::matchStatementBlock(vector terminalToke } shared_ptr Parser::matchStatementAssignment() { - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::LEFT_ARROW}, true, false)) + int startIndex = currentIndex; + + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER}, true, false)) return nullptr; - shared_ptr identifierToken = tokens.at(currentIndex++); - currentIndex++; // arrow + shared_ptr indexExpression; + + if (tryMatchingTokenKinds({TokenKind::LEFT_SQUARE_BRACKET}, true, true)) { + indexExpression = nextExpression(); + if (indexExpression == nullptr) + return nullptr; + + if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { + markError(TokenKind::RIGHT_SQUARE_BRACKET, {}); + return nullptr; + } + } + + // assignment requires left arrow, otherwise abort + if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) { + currentIndex = startIndex; + return nullptr; + } shared_ptr expression = nextExpression(); if (expression == nullptr) return nullptr; - return make_shared(identifierToken->getLexme(), expression); + return make_shared(identifierToken->getLexme(), indexExpression, expression); } shared_ptr Parser::matchStatementReturn() { diff --git a/src/Parser/Statement/StatementAssignment.cpp b/src/Parser/Statement/StatementAssignment.cpp index 24fcff6..ae0d24f 100644 --- a/src/Parser/Statement/StatementAssignment.cpp +++ b/src/Parser/Statement/StatementAssignment.cpp @@ -1,12 +1,16 @@ #include "StatementAssignment.h" -StatementAssignment::StatementAssignment(string name, shared_ptr expression): -Statement(StatementKind::ASSIGNMENT), name(name), expression(expression) { } +StatementAssignment::StatementAssignment(string name, shared_ptr indexExpression, shared_ptr expression): +Statement(StatementKind::ASSIGNMENT), name(name), indexExpression(indexExpression), expression(expression) { } string StatementAssignment::getName() { return name; } +shared_ptr StatementAssignment::getIndexExpression() { + return indexExpression; +} + shared_ptr StatementAssignment::getExpression() { return expression; } \ No newline at end of file diff --git a/src/Parser/Statement/StatementAssignment.h b/src/Parser/Statement/StatementAssignment.h index 0641e3f..0bb7f63 100644 --- a/src/Parser/Statement/StatementAssignment.h +++ b/src/Parser/Statement/StatementAssignment.h @@ -5,10 +5,12 @@ class Expression; class StatementAssignment: public Statement { private: string name; + shared_ptr indexExpression; shared_ptr expression; public: - StatementAssignment(string name, shared_ptr expression); + StatementAssignment(string name, shared_ptr indexExpressio, shared_ptr expression); string getName(); + shared_ptr getIndexExpression(); shared_ptr getExpression(); }; \ No newline at end of file