From 993c204596c27d9d644fed0c5570d40859721bdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 1 Jul 2025 12:06:36 +0900 Subject: [PATCH] Better parser handling --- Syntax.md | 59 +++++++---- src/Parser/Parser.cpp | 230 +++++++++++++++++++++--------------------- src/Parser/Parser.h | 9 +- test.brc | 4 + 4 files changed, 163 insertions(+), 139 deletions(-) diff --git a/Syntax.md b/Syntax.md index b0bd0c7..64bcfe9 100644 --- a/Syntax.md +++ b/Syntax.md @@ -23,8 +23,44 @@ This documents specifies what is the allowed syntax for statements and expressio `` Statement +### Overall structure +``` + +| ++ ++ ++ + | + + + | + + + + + + + + + | + + ... + + + | + + + + + | + + ... +``` + +### Statement Meta Extern Function +`@extern fun (: ? (, ? )*)? (-> ? )?` +``` +@extern sum fun: +num1 sint32, +num2 sint32 -> +sint32 +``` + +### Statement Variable +` <- ` + ### Statement Function -` fun (: ? (, ? )*)? (-> ? )? ; ` +` fun (: ? (, ? )*)? (-> ? )? ;` ``` stuff fun ; @@ -40,23 +76,6 @@ sint32 ; ``` -### Statement Meta Extern Function: -`@extern fun (: ? (, ? )*)? (-> ? )? ` -``` -@extern sum fun: -num1 sint32, -num2 sint32 -> -sint32 -``` - - -### Statemnet Variable -` <- ` -``` -num sint32 <- 42 - -``` - ### Statement Assignment ` <- ` ``` @@ -92,8 +111,8 @@ i < 10 `loop ( (, ? )?)? ;` -### StatementReturn -`ret ` +### Statement Return +`ret ? ` ### ExpressionVariable: `` diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 718984a..69748d3 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -58,22 +58,6 @@ shared_ptr Parser::nextStatement() { if (statement != nullptr) return statement; - /*statement = matchStatementAssignment(); - if (statement != nullptr) - return statement;*/ - - /*statement = matchStatementReturn(); - if (statement != nullptr) - return statement;*/ - - /*statement = matchStatementLoop(); - if (statement != nullptr) - return statement;*/ - - /*statement = matchStatementExpression(); - if (statement != nullptr) - return statement;*/ - statement = matchStatementMetaExternFunction(); if (statement != nullptr) return statement; @@ -81,6 +65,106 @@ shared_ptr Parser::nextStatement() { return matchStatementInvalid("Unexpected token"); } +shared_ptr Parser::nextInBlockStatement() { + shared_ptr statement; + + statement = matchStatementVariable(); + if (statement != nullptr) + return statement; + + statement = matchStatementAssignment(); + if (statement != nullptr) + return statement; + + statement = matchStatementReturn(); + if (statement != nullptr) + return statement; + + statement = matchStatementLoop(); + if (statement != nullptr) + return statement; + + statement = matchStatementExpression(); + if (statement != nullptr) + return statement; + + return matchStatementInvalid("Unexpected token"); +} + +shared_ptr Parser::matchStatementMetaExternFunction() { + if (!tryMatchingTokenKinds({TokenKind::M_EXTERN, TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) + return nullptr; + + string name; + vector> arguments; + ValueType returnType = ValueType::NONE; + + currentIndex++; // skip meta + shared_ptr identifierToken = tokens.at(currentIndex++); + currentIndex++; // skip fun + + // arguments + if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { + do { + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) + return matchStatementInvalid("Expected function argument"); + shared_ptr identifierToken = tokens.at(currentIndex++); + shared_ptr typeToken = tokens.at(currentIndex++); + optional argumentType = valueTypeForToken(typeToken); + if (!argumentType) + return matchStatementInvalid("Invalid argument type"); + + arguments.push_back(pair(identifierToken->getLexme(), *argumentType)); + } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); + } + + // Return type + if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line + + shared_ptr typeToken = tokens.at(currentIndex); + optional type = valueTypeForToken(typeToken); + if (!type) + return matchStatementInvalid("Expected return type"); + returnType = *type; + + currentIndex++; // type + } + + return make_shared(identifierToken->getLexme(), arguments, returnType); +} + +shared_ptr Parser::matchStatementVariable() { + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) + return nullptr; + + shared_ptr identifierToken = tokens.at(currentIndex++); + shared_ptr valueTypeToken = tokens.at(currentIndex); + + ValueType valueType; + if (valueTypeToken->getLexme().compare("bool") == 0) + valueType = ValueType::BOOL; + else if (valueTypeToken->getLexme().compare("sint32") == 0) + valueType = ValueType::SINT32; + else if (valueTypeToken->getLexme().compare("real32") == 0) + valueType = ValueType::REAL32; + else + return matchStatementInvalid("Invalid type"); + + currentIndex++; // type + + // Expect left arrow + if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) + return matchStatementInvalid("Expected left arrow"); + + shared_ptr expression = nextExpression(); + if (expression == nullptr || !expression->isValid()) + return matchStatementInvalid("Invalid expression"); + + return make_shared(identifierToken->getLexme(), valueType, expression); +} + shared_ptr Parser::matchStatementFunction() { if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) return nullptr; @@ -138,105 +222,37 @@ shared_ptr Parser::matchStatementFunction() { return make_shared(name, arguments, returnType, dynamic_pointer_cast(statementBlock)); } -shared_ptr Parser::matchStatementVariable() { - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) - return nullptr; +shared_ptr Parser::matchStatementBlock(vector terminalTokenKinds) { + vector> statements; - shared_ptr identifierToken = tokens.at(currentIndex++); - currentIndex++; // identifier - shared_ptr valueTypeToken = tokens.at(currentIndex); + while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) { + shared_ptr statement = nextInBlockStatement(); + if (statement == nullptr || !statement->isValid()) + return statement ?: matchStatementInvalid("Expected statement"); + statements.push_back(statement); - ValueType valueType; - if (valueTypeToken->getLexme().compare("bool") == 0) - valueType = ValueType::BOOL; - else if (valueTypeToken->getLexme().compare("sint32") == 0) - valueType = ValueType::SINT32; - else if (valueTypeToken->getLexme().compare("real32") == 0) - valueType = ValueType::REAL32; - else - return matchStatementInvalid("Invalid type"); + if (tryMatchingTokenKinds(terminalTokenKinds, false, false)) + break; - currentIndex++; // type - - // Expect left arrow - if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) - return matchStatementInvalid("Expected left arrow"); - - shared_ptr expression = nextExpression(); - if (expression == nullptr || !expression->isValid()) - return matchStatementInvalid("Invalid expression"); - - // Expect comma or new line - if (!tryMatchingTokenKinds({TokenKind::COMMA}, true, false) && !tryMatchingTokenKinds({TokenKind::NEW_LINE}, false, true)) - return matchStatementInvalid("Expected a new line after variable declaration"); - - return make_shared(identifierToken->getLexme(), valueType, expression); -} - -shared_ptr Parser::matchStatementMetaExternFunction() { - if (!tryMatchingTokenKinds({TokenKind::M_EXTERN, TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) - return nullptr; - - string name; - vector> arguments; - ValueType returnType = ValueType::NONE; - - currentIndex++; // skip meta - shared_ptr identifierToken = tokens.at(currentIndex++); - currentIndex++; // skip fun - - // arguments - if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { - do { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) - return matchStatementInvalid("Expected function argument"); - shared_ptr identifierToken = tokens.at(currentIndex++); - shared_ptr typeToken = tokens.at(currentIndex++); - optional argumentType = valueTypeForToken(typeToken); - if (!argumentType) - return matchStatementInvalid("Invalid argument type"); - - arguments.push_back(pair(identifierToken->getLexme(), *argumentType)); - } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); - } - - // Return type - if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - - shared_ptr typeToken = tokens.at(currentIndex); - optional type = valueTypeForToken(typeToken); - if (!type) - return matchStatementInvalid("Expected return type"); - returnType = *type; - - currentIndex++; // type - - // consume new line + // except new line if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) - return matchStatementInvalid("Expected new line after function declaration"); + return matchStatementInvalid("Expected new line"); } - return make_shared(identifierToken->getLexme(), arguments, returnType); + return make_shared(statements); } shared_ptr Parser::matchStatementAssignment() { if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::LEFT_ARROW}, true, false)) return nullptr; - shared_ptr identifierToken = tokens.at(currentIndex); - currentIndex++; // identifier + shared_ptr identifierToken = tokens.at(currentIndex++); currentIndex++; // arrow shared_ptr expression = nextExpression(); if (expression == nullptr || !expression->isValid()) return matchStatementInvalid("Expected expression"); - // Expect new line - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, false, true)) - return matchStatementInvalid("Expected a new line after variable declaration"); - return make_shared(identifierToken->getLexme(), expression); } @@ -246,12 +262,7 @@ shared_ptr Parser::matchStatementReturn() { shared_ptr expression = nextExpression(); if (expression != nullptr && !expression->isValid()) - return matchStatementInvalid(); - - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE, TokenKind::SEMICOLON}, false, false)) - return matchStatementInvalid(); - - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); + return matchStatementInvalid("Expected expression"); return make_shared(expression); } @@ -323,19 +334,6 @@ shared_ptr Parser::matchStatementExpression() { return make_shared(expression); } -shared_ptr Parser::matchStatementBlock(vector terminalTokenKinds) { - vector> statements; - - while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) { - shared_ptr statement = nextStatement(); - if (statement == nullptr || !statement->isValid()) - return statement ?: matchStatementInvalid("Expected statement"); - statements.push_back(statement); - } - - return make_shared(statements); -} - shared_ptr Parser::matchStatementInvalid(string message) { return make_shared(tokens.at(currentIndex), message); } diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index aabf44c..761d722 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -19,14 +19,17 @@ private: int currentIndex = 0; shared_ptr nextStatement(); - shared_ptr matchStatementFunction(); + shared_ptr nextInBlockStatement(); + + shared_ptr matchStatementMetaExternFunction(); shared_ptr matchStatementVariable(); + shared_ptr matchStatementFunction(); + + shared_ptr matchStatementBlock(vector terminalTokenKinds); shared_ptr matchStatementAssignment(); shared_ptr matchStatementReturn(); shared_ptr matchStatementLoop(); shared_ptr matchStatementExpression(); - shared_ptr matchStatementMetaExternFunction(); - shared_ptr matchStatementBlock(vector terminalTokenKinds); shared_ptr matchStatementInvalid(string message = ""); shared_ptr nextExpression(); diff --git a/test.brc b/test.brc index ec1a111..bd1d6ef 100644 --- a/test.brc +++ b/test.brc @@ -1,2 +1,6 @@ +dummy sint32 <- 55 stuff fun: num1 sint32, num2 sint32 -> sint32 + num1 sint32 <- 42 + num1 <- 5 * num1 + ret num1 ; \ No newline at end of file