From 76f03ef8e36bc210c0d10d091ad57ee12f07dc46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Thu, 7 Aug 2025 09:39:45 +0900 Subject: [PATCH] wip --- samples/test.brc | 2 + src/Parser/Parsee/Parsee.cpp | 19 ++++ src/Parser/Parsee/Parsee.h | 14 ++- src/Parser/Parsee/ParseeGroup.cpp | 10 +- src/Parser/Parsee/ParseeGroup.h | 6 +- src/Parser/Parser.cpp | 164 +++++++++++++++++++----------- src/Parser/Parser.h | 7 +- 7 files changed, 152 insertions(+), 70 deletions(-) diff --git a/samples/test.brc b/samples/test.brc index b30ff0c..0205e7f 100644 --- a/samples/test.brc +++ b/samples/test.brc @@ -1,4 +1,6 @@ User blob + num1 s32 + num2 u8 ; main fun -> u32 diff --git a/src/Parser/Parsee/Parsee.cpp b/src/Parser/Parsee/Parsee.cpp index 3ec80d9..b3a5d21 100644 --- a/src/Parser/Parsee/Parsee.cpp +++ b/src/Parser/Parsee/Parsee.cpp @@ -1,5 +1,16 @@ #include "Parsee.h" +#include "ParseeGroup.h" + +Parsee Parsee::repeatedGroupParsee(ParseeGroup repeatedGroup, bool isRequired, bool shouldReturn, bool shouldFailOnNoMatch) { + Parsee parsee; + parsee.kind = ParseeKind::REPEATED_GROUP; + parsee.repeatedGroup = repeatedGroup; + parsee.isRequired = isRequired; + parsee.shouldFailOnNoMatch = shouldFailOnNoMatch; + return parsee; +} + Parsee Parsee::tokenParsee(TokenKind tokenKind, bool isRequired, bool shouldReturn) { Parsee parsee; parsee.kind = ParseeKind::TOKEN; @@ -27,6 +38,10 @@ Parsee Parsee::expressionParsee(bool isRequired) { Parsee::Parsee() { } +optional Parsee::getRepeatedGroup() { + return repeatedGroup; +} + ParseeKind Parsee::getKind() { return kind; } @@ -41,4 +56,8 @@ bool Parsee::getIsRequired() { bool Parsee::getShouldReturn() { return shouldReturn; +} + +bool Parsee::getShouldFailOnNoMatch() { + return shouldFailOnNoMatch; } \ No newline at end of file diff --git a/src/Parser/Parsee/Parsee.h b/src/Parser/Parsee/Parsee.h index 6dfdf0e..bf6e10d 100644 --- a/src/Parser/Parsee/Parsee.h +++ b/src/Parser/Parsee/Parsee.h @@ -2,10 +2,17 @@ #define PARSEE_H #include +#include + +#include "ParseeGroup.h" enum class TokenKind; +//class ParseeGroup; + +using namespace std; enum class ParseeKind { + REPEATED_GROUP, TOKEN, VALUE_TYPE, EXPRESSION @@ -14,20 +21,25 @@ enum class ParseeKind { class Parsee { private: ParseeKind kind; + optional repeatedGroup; TokenKind tokenKind; bool isRequired; bool shouldReturn; + bool shouldFailOnNoMatch; Parsee(); public: + static Parsee repeatedGroupParsee(ParseeGroup repeatedGroup, bool isRequired, bool shouldReturn, bool shouldFailOnNoMatch); static Parsee tokenParsee(TokenKind tokenKind, bool isRequired, bool shouldReturn); - static Parsee valueTypeParsee(bool isRequired); + static Parsee valueTypeParsee(bool isRequired); static Parsee expressionParsee(bool isRequired); ParseeKind getKind(); + optional getRepeatedGroup(); TokenKind getTokenKind(); bool getIsRequired(); bool getShouldReturn(); + bool getShouldFailOnNoMatch(); }; #endif \ No newline at end of file diff --git a/src/Parser/Parsee/ParseeGroup.cpp b/src/Parser/Parsee/ParseeGroup.cpp index ead5352..d580679 100644 --- a/src/Parser/Parsee/ParseeGroup.cpp +++ b/src/Parser/Parsee/ParseeGroup.cpp @@ -2,19 +2,19 @@ #include "Parsee.h" -ParseeGroup::ParseeGroup(vector parsees, optional repeatedGroup): +ParseeGroup::ParseeGroup(vector parsees)://, optional repeatedGroup): parsees(parsees) { - if (repeatedGroup) { + /*if (repeatedGroup) { this->repeatedGroup = *repeatedGroup; } else { this->repeatedGroup = {}; - } + }*/ } vector ParseeGroup::getParsees() { return parsees; } -optional> ParseeGroup::getRepeatedGroup() { +/*optional> ParseeGroup::getRepeatedGroup() { return repeatedGroup; -} \ No newline at end of file +}*/ \ No newline at end of file diff --git a/src/Parser/Parsee/ParseeGroup.h b/src/Parser/Parsee/ParseeGroup.h index 4e4cc82..1f6681b 100644 --- a/src/Parser/Parsee/ParseeGroup.h +++ b/src/Parser/Parsee/ParseeGroup.h @@ -11,12 +11,12 @@ using namespace std; class ParseeGroup { private: vector parsees; - optional> repeatedGroup; + //optional> repeatedGroup; public: - ParseeGroup(vector parsees, optional repeatedGroup); + ParseeGroup(vector parsees);//, optional repeatedGroup); vector getParsees(); - optional> getRepeatedGroup(); + //optional> getRepeatedGroup(); }; #endif \ No newline at end of file diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 21fdc61..6c48ab1 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -131,8 +131,8 @@ shared_ptr Parser::matchStatementMetaExternFunction() { Parsee::tokenParsee(TokenKind::M_EXTERN, true, false), Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), Parsee::tokenParsee(TokenKind::FUNCTION, true, false) - }, - {} + }/*, + {}*/ ) ); @@ -153,7 +153,7 @@ shared_ptr Parser::matchStatementMetaExternFunction() { Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), Parsee::valueTypeParsee(true) - }, + }/*, ParseeGroup( { Parsee::tokenParsee(TokenKind::COMMA, true, false), @@ -162,7 +162,7 @@ shared_ptr Parser::matchStatementMetaExternFunction() { Parsee::valueTypeParsee(true) }, {} - ) + )*/ ) ); switch (resultsGroup.getKind()) { @@ -187,8 +187,8 @@ shared_ptr Parser::matchStatementMetaExternFunction() { Parsee::tokenParsee(TokenKind::RIGHT_ARROW, true, false), Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), Parsee::valueTypeParsee(true) - }, - {} + }/*, + {}*/ ) ); @@ -213,8 +213,8 @@ shared_ptr Parser::matchStatementVariable() { Parsee::valueTypeParsee(true), Parsee::tokenParsee(TokenKind::LEFT_ARROW, true, false), Parsee::expressionParsee(true) - }, - {} + }/*, + {}*/ ) ); @@ -243,8 +243,8 @@ shared_ptr Parser::matchStatementFunction() { { Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), Parsee::tokenParsee(TokenKind::FUNCTION, true, false) - }, - {} + }/*, + {}*/ ) ); @@ -268,7 +268,8 @@ shared_ptr Parser::matchStatementFunction() { Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), Parsee::valueTypeParsee(true) - }, + }/*, + {} ParseeGroup( { Parsee::tokenParsee(TokenKind::COMMA, true, false), @@ -277,7 +278,7 @@ shared_ptr Parser::matchStatementFunction() { Parsee::valueTypeParsee(true) }, {} - ) + )*/ ) ); switch (resultsGroup.getKind()) { @@ -305,8 +306,8 @@ shared_ptr Parser::matchStatementFunction() { Parsee::tokenParsee(TokenKind::RIGHT_ARROW, true, false), Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), Parsee::valueTypeParsee(true) - }, - {} + }/*, + {}*/ ) ); @@ -358,8 +359,8 @@ shared_ptr Parser::matchStatementRawFunction() { { Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), Parsee::tokenParsee(TokenKind::RAW_FUNCTION, true, false) - }, - {} + }/*, + {}*/ ) ); @@ -383,8 +384,8 @@ shared_ptr Parser::matchStatementRawFunction() { Parsee::tokenParsee(TokenKind::LESS, true, false), Parsee::tokenParsee(TokenKind::STRING, true, true), Parsee::tokenParsee(TokenKind::GREATER, true, false) - }, - {} + }/*, + {}*/ ) ); @@ -413,7 +414,7 @@ shared_ptr Parser::matchStatementRawFunction() { Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), Parsee::valueTypeParsee(true) - }, + }/*, ParseeGroup( { Parsee::tokenParsee(TokenKind::COMMA, true, false), @@ -422,7 +423,7 @@ shared_ptr Parser::matchStatementRawFunction() { Parsee::valueTypeParsee(true) }, {} - ) + )*/ ) ); switch (resultsGroup.getKind()) { @@ -450,8 +451,8 @@ shared_ptr Parser::matchStatementRawFunction() { Parsee::tokenParsee(TokenKind::RIGHT_ARROW, true, false), Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), Parsee::valueTypeParsee(true) - }, - {} + }/*, + {}*/ ) ); @@ -501,21 +502,40 @@ shared_ptr Parser::matchStatementBlob() { ParseeGroup( { Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), - Parsee::tokenParsee(TokenKind::TYPE, true, false) - }, + Parsee::tokenParsee(TokenKind::BLOB, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, true, false), + Parsee::repeatedGroupParsee( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true), + Parsee::tokenParsee(TokenKind::NEW_LINE, true, false) + }/*, + {}*/ + ), false, true, false + ), + Parsee::tokenParsee(TokenKind::SEMICOLON, true, false) + }/*, {} + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true) + }, + {} + )*/ ) ); switch (resultsGroup.getKind()) { case ParseeResultsGroupKind::SUCCESS: identifier = resultsGroup.getResults().at(0).getToken()->getLexme(); - /*for (int i=1; i> arg; arg.first = resultsGroup.getResults().at(i).getToken()->getLexme(); arg.second = resultsGroup.getResults().at(i+1).getValueType(); variables.push_back(arg); - }*/ + } break; case ParseeResultsGroupKind::NO_MATCH: case ParseeResultsGroupKind::FAILURE: @@ -570,8 +590,8 @@ shared_ptr Parser::matchStatementAssignment() { ParseeGroup( { Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), - }, - {} + }/*, + {}*/ ) ); @@ -587,8 +607,8 @@ shared_ptr Parser::matchStatementAssignment() { Parsee::tokenParsee(TokenKind::LEFT_SQUARE_BRACKET, true, false), Parsee::expressionParsee(true), Parsee::tokenParsee(TokenKind::RIGHT_SQUARE_BRACKET, true, false), - }, - {} + }/*, + {}*/ ) ); @@ -608,8 +628,8 @@ shared_ptr Parser::matchStatementAssignment() { { Parsee::tokenParsee(TokenKind::LEFT_ARROW, true, false), Parsee::expressionParsee(true) - }, - {} + }/*, + {}*/ ) ); @@ -633,8 +653,8 @@ shared_ptr Parser::matchStatementReturn() { { Parsee::tokenParsee(TokenKind::RETURN, true, false), Parsee::expressionParsee(false) - }, - {} + }/*, + {}*/ ) ); @@ -1021,20 +1041,24 @@ shared_ptr Parser::matchExpressionBlock(vector terminalTo ParseeResultsGroup Parser::parseeResultsGroupForParseeGroup(ParseeGroup group) { int errorsCount = errors.size(); int startIndex = currentIndex; - vector results; + vector parseeResults; bool mustFulfill = false; for (Parsee &parsee : group.getParsees()) { - optional result; + optional> subResults; switch (parsee.getKind()) { + case ParseeKind::REPEATED_GROUP: + //ParseeResultsGroup results = parseeResultsGroupForParseeGroup(*parsee.getRepeatedGroup()); + subResults = repeatedGroupParseeResults(*parsee.getRepeatedGroup()); + break; case ParseeKind::TOKEN: - result = tokenParseeResult(currentIndex, parsee.getTokenKind()); + subResults = tokenParseeResults(currentIndex, parsee.getTokenKind()); break; case ParseeKind::VALUE_TYPE: - result = valueTypeParseeResult(currentIndex); + subResults = valueTypeParseeResults(currentIndex); break; case ParseeKind::EXPRESSION: - result = expressionParseeResult(currentIndex); + subResults = expressionParseeResults(currentIndex); break; } @@ -1043,33 +1067,39 @@ ParseeResultsGroup Parser::parseeResultsGroupForParseeGroup(ParseeGroup group) { return ParseeResultsGroup::failure(); // if doesn't match on optional group - if (!result && parsee.getIsRequired() && !mustFulfill) { + if (!subResults && parsee.getIsRequired() && !mustFulfill) { currentIndex = startIndex; //return vector(); return ParseeResultsGroup::noMatch(); } // return matching token? - if (result && parsee.getShouldReturn()) - results.push_back(*result); + if (subResults && parsee.getShouldReturn()) { + //parseeResults.push_back(*result); + for (ParseeResult &subResult : *subResults) + parseeResults.push_back(subResult); + } // decide if we're decoding the expected sequence if (!parsee.getIsRequired() && currentIndex > startIndex) mustFulfill = true; // invalid sequence detected? - if (!result && parsee.getIsRequired() && mustFulfill) { + if (!subResults && parsee.getIsRequired() && mustFulfill) { markError(parsee.getTokenKind(), {}); //return {}; return ParseeResultsGroup::failure(); } // got to the next token if we got a match - if (result) - currentIndex += (*result).getTokensCount(); + if (subResults) { + for (ParseeResult &subResult : *subResults ) + currentIndex += subResult.getTokensCount(); + //currentIndex += (*result).getTokensCount(); + } } - if (group.getRepeatedGroup()) { + /*if (group.getRepeatedGroup()) { ParseeResultsGroup subResultsGroup; do { subResultsGroup = parseeResultsGroupForParseeGroup(*group.getRepeatedGroup()); @@ -1079,19 +1109,37 @@ ParseeResultsGroup Parser::parseeResultsGroupForParseeGroup(ParseeGroup group) { for (ParseeResult &subResult : subResultsGroup.getResults()) results.push_back(subResult); } while (subResultsGroup.getKind() == ParseeResultsGroupKind::SUCCESS); - } + }*/ - return ParseeResultsGroup::success(results); + return ParseeResultsGroup::success(parseeResults); } -optional Parser::tokenParseeResult(int index, TokenKind tokenKind) { +optional> Parser::repeatedGroupParseeResults(ParseeGroup group) { + int startIndex = currentIndex; + vector results; + + ParseeResultsGroup resultsGroup; + do { + resultsGroup = parseeResultsGroupForParseeGroup(group); + if (resultsGroup.getKind() == ParseeResultsGroupKind::FAILURE) + return {}; + + for (ParseeResult &result : resultsGroup.getResults()) + results.push_back(result); + } while (resultsGroup.getKind() == ParseeResultsGroupKind::SUCCESS); + + currentIndex = startIndex; + return results; +} + +optional> Parser::tokenParseeResults(int index, TokenKind tokenKind) { shared_ptr token = tokens.at(index); if (token->isOfKind({tokenKind})) - return ParseeResult::tokenResult(token); + return vector({ParseeResult::tokenResult(token)}); return {}; } -optional Parser::valueTypeParseeResult(int index) { +optional> Parser::valueTypeParseeResults(int index) { int startIndex = index; if (!tokens.at(index)->isOfKind({TokenKind::TYPE})) @@ -1103,11 +1151,11 @@ optional Parser::valueTypeParseeResult(int index) { if (tokens.at(index)->isOfKind({TokenKind::LESS})) { index++; - optional subResult = valueTypeParseeResult(index); - if (!subResult) + optional> subResults = valueTypeParseeResults(index); + if (!subResults || (*subResults).empty()) return {}; - subType = (*subResult).getValueType(); - index += (*subResult).getTokensCount(); + subType = (*subResults)[0].getValueType(); + index += (*subResults)[0].getTokensCount(); if (tokens.at(index)->isOfKind({TokenKind::COMMA})) { index++; @@ -1129,10 +1177,10 @@ optional Parser::valueTypeParseeResult(int index) { } shared_ptr valueType = ValueType::valueTypeForToken(typeToken, subType, typeArg); - return ParseeResult::valueTypeResult(valueType, index - startIndex); + return vector({ParseeResult::valueTypeResult(valueType, index - startIndex)}); } -optional Parser::expressionParseeResult(int index) { +optional> Parser::expressionParseeResults(int index) { int startIndex = currentIndex; int errorsCount = errors.size(); shared_ptr expression = nextExpression(); @@ -1141,7 +1189,7 @@ optional Parser::expressionParseeResult(int index) { int tokensCount = currentIndex - startIndex; currentIndex = startIndex; - return ParseeResult::expressionResult(expression, tokensCount); + return vector({ParseeResult::expressionResult(expression, tokensCount)}); } bool Parser::tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance) { diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index bd1dea2..5ef35a6 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -57,9 +57,10 @@ private: shared_ptr matchExpressionBlock(vector terminalTokenKinds); ParseeResultsGroup parseeResultsGroupForParseeGroup(ParseeGroup group); - optional tokenParseeResult(int index, TokenKind tokenKind); - optional valueTypeParseeResult(int index); - optional expressionParseeResult(int index); + optional> repeatedGroupParseeResults(ParseeGroup group); + optional> tokenParseeResults(int index, TokenKind tokenKind); + optional> valueTypeParseeResults(int index); + optional> expressionParseeResults(int index); bool tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance); void markError(optional expectedTokenKind, optional message);