From f4cde21a0a55fa0b9e19e0adebd43e4915fb360d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Mon, 30 Jun 2025 23:40:03 +0900 Subject: [PATCH 1/7] Changed if-else symbo, cleaned up parsing a bit --- Grammar.md | 122 ++++++++++++++++++++++++++++++++++++++++++ src/Lexer/Lexer.cpp | 24 +++++---- src/Lexer/Lexer.h | 8 ++- src/Lexer/Token.cpp | 8 +-- src/Lexer/Token.h | 4 +- src/Parser/Parser.cpp | 98 +++++++++++++++++---------------- src/Parser/Parser.h | 4 +- src/main.cpp | 4 +- test.brc | 20 ++----- 9 files changed, 202 insertions(+), 90 deletions(-) create mode 100644 Grammar.md diff --git a/Grammar.md b/Grammar.md new file mode 100644 index 0000000..e787985 --- /dev/null +++ b/Grammar.md @@ -0,0 +1,122 @@ +`?` 0 or 1 instances + +`*` 0 or more instance + +`+` 1 or more instances + +`` New line + +`` Identifier + +### Statement Function +` fun (: ? (, )*)? (-> ? )? ;` +``` +stuff fun +; + +stuff fun -> sint32 + ret 42 +; + +stuff fun: num1 sint32, +num2 sint32 -> +sint32 + ret num1 + num2 +; +``` + +### Statement Assignment +` <- ` +``` +num1 <- 5 + +``` + +### StatementBlock +( )* + + +### StatementExpression + + +StatementFunction: + fun (: (, )*)? (-> )? +; + +### StatementLoop +`loop [ | ] (, ? (, ? )?)? ;` +``` +loop i sint32 <- 0, true, i < 10 + doStuff(i) +; + +loop i sint32 <- 0, +true, i < 10 + doStuff(i) +; + +loop i sint32 <- 0, +true, +i < 10 + doStuff(i) +; +``` + +`loop ( (, ? )?)? ;` + + +StatementMetaExternFunction: +@extern fun (: (, )*)? (-> )? + + +### StatementReturn +`ret ` + +### Statemnet Variable +` <- ` + +### ExpressionVariable: +`` + +### Expression If Else: +`if : ` +``` +if num1 > 10: putchar('T') + +``` + +`if : ;` +``` +if num1 > 10: + num1 <- 500 + putchar('S') +; +``` + +`if : else ` +``` +if num1 > 10: putchar('T') else putchar('N') + +``` + +`if : else ;` +``` +if num1 > 10: + putchar('T') +else + putchar('N') +; + +``` + +`if : else ;` +``` +if num1 > 10: putchar('T') else + num1 <- 500 + putchar('F') +; + +``` + +### Expression Block +`( )* ( ?)? !` \ No newline at end of file diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index ddd9aa9..d200fe1 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -4,8 +4,12 @@ Lexer::Lexer(string source): source(source) { } vector> Lexer::getTokens() { - shared_ptr token = nullptr; - tokens.clear(); + currentIndex = 0; + currentLine = 0; + currentColumn = 0; + + vector> tokens; + shared_ptr token; do { token = nextToken(); // Got a nullptr, shouldn't have happened @@ -123,14 +127,6 @@ shared_ptr Lexer::nextToken() { token = match(TokenKind::SEMICOLON, ";", false); if (token != nullptr) return token; - - token = match(TokenKind::QUESTION_QUESTION, "??", false); - if (token != nullptr) - return token; - - token = match(TokenKind::QUESTION, "?", false); - if (token != nullptr) - return token; token = match(TokenKind::LEFT_ARROW, "<-", false); if (token != nullptr) @@ -187,6 +183,14 @@ shared_ptr Lexer::nextToken() { return token; // keywords + token = match(TokenKind::IF, "if", true); + if (token != nullptr) + return token; + + token = match(TokenKind::ELSE, "else", true); + if (token != nullptr) + return token; + token = match(TokenKind::FUNCTION, "fun", true); if (token != nullptr) return token; diff --git a/src/Lexer/Lexer.h b/src/Lexer/Lexer.h index f0fe2bb..c117b08 100644 --- a/src/Lexer/Lexer.h +++ b/src/Lexer/Lexer.h @@ -10,11 +10,9 @@ using namespace std; class Lexer { private: string source; - int currentIndex = 0; - int currentLine = 0; - int currentColumn = 0; - - vector> tokens; + int currentIndex; + int currentLine; + int currentColumn; shared_ptr nextToken(); shared_ptr match(TokenKind kind, string lexme, bool needsSeparator); diff --git a/src/Lexer/Token.cpp b/src/Lexer/Token.cpp index b912fb3..aa08a24 100644 --- a/src/Lexer/Token.cpp +++ b/src/Lexer/Token.cpp @@ -112,10 +112,6 @@ string Token::toString() { return ":"; case TokenKind::SEMICOLON: return ";"; - case TokenKind::QUESTION_QUESTION: - return "??"; - case TokenKind::QUESTION: - return "?"; case TokenKind::LEFT_ARROW: return "←"; case TokenKind::RIGHT_ARROW: @@ -136,6 +132,10 @@ string Token::toString() { case TokenKind::TYPE: return "TYPE(" + lexme + ")"; + case TokenKind::IF: + return "IF"; + case TokenKind::ELSE: + return "ELSE"; case TokenKind::FUNCTION: return "FUNCTION"; case TokenKind::RETURN: diff --git a/src/Lexer/Token.h b/src/Lexer/Token.h index dd2cb9d..1e3adbe 100644 --- a/src/Lexer/Token.h +++ b/src/Lexer/Token.h @@ -26,14 +26,14 @@ enum class TokenKind { COMMA, COLON, SEMICOLON, - QUESTION, - QUESTION_QUESTION, LEFT_ARROW, RIGHT_ARROW, FUNCTION, RETURN, REPEAT, + IF, + ELSE, BOOL, INTEGER_DEC, diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 57eb0e2..7ed697d 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -436,14 +436,12 @@ shared_ptr Parser::matchExpressionGrouping() { if (tryMatchingTokenKinds({TokenKind::LEFT_PAREN}, true, true)) { shared_ptr expression = matchTerm(); // has grouped expression failed? - if (expression == nullptr) { - return matchExpressionInvalid(); - } else if(!expression->isValid()) { - return expression; + if (expression == nullptr || !expression->isValid()) { + return expression ?: matchExpressionInvalid("Expected expression"); } else if (tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true)) { return make_shared(expression); } else { - return matchExpressionInvalid(); + return matchExpressionInvalid("Unexpected token"); } } @@ -488,52 +486,56 @@ shared_ptr Parser::matchExpressionCall() { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // optional new line if (!tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true)) - return matchExpressionInvalid(); + return matchExpressionInvalid("Expected \")\""); return make_shared(identifierToken->getLexme(), argumentExpressions); } shared_ptr Parser::matchExpressionIfElse() { - // Try maching '?' - shared_ptr token = tokens.at(currentIndex); - - if (!tryMatchingTokenKinds({TokenKind::QUESTION}, true, true)) + if (!tryMatchingTokenKinds({TokenKind::IF}, true, true)) return nullptr; - // Then get condition - shared_ptr condition = nextExpression(); - if (condition == nullptr) - return matchExpressionInvalid(); - else if (!condition->isValid()) - return condition; - - // Consume optional ':' - tryMatchingTokenKinds({TokenKind::COLON}, true, true); - // Consume optional new line - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); - - // Match then block - shared_ptr thenBlock = matchExpressionBlock({TokenKind::COLON, TokenKind::SEMICOLON}, false); - if (thenBlock == nullptr) - return matchExpressionInvalid(); - else if (!thenBlock->isValid()) - return thenBlock; - - // Match else block. Then and else block are separated by ':' + shared_ptr condition; + shared_ptr thenBlock; shared_ptr elseBlock; - if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { - bool isSingleLine = !tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); - vector terminalTokens = {TokenKind::SEMICOLON, TokenKind::COMMA, TokenKind::RIGHT_PAREN}; - if (isSingleLine) - terminalTokens.push_back(TokenKind::NEW_LINE); - elseBlock = matchExpressionBlock(terminalTokens, false); - if (elseBlock == nullptr) - return matchExpressionInvalid(); - else if (!elseBlock->isValid()) - return elseBlock; + // condition expression + condition = nextExpression(); + if (condition == nullptr || !condition->isValid()) + return condition ?: matchExpressionInvalid("Expected condition expression"); + + if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true)) + return matchExpressionInvalid("Expected \":\""); + + // then + bool isMultiLine = false; + + if (tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) + isMultiLine = true; + + // then block + if (isMultiLine) + thenBlock = matchExpressionBlock({TokenKind::ELSE, TokenKind::SEMICOLON}); + else + thenBlock = matchExpressionBlock({TokenKind::ELSE, TokenKind::NEW_LINE}); + if (thenBlock == nullptr || !thenBlock->isValid()) + return thenBlock ?: matchExpressionInvalid("Expected then block"); + + // else + if (tryMatchingTokenKinds({TokenKind::ELSE}, true, true)) { + if (tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) + isMultiLine = true; + + // else block + if (isMultiLine) + elseBlock = matchExpressionBlock({TokenKind::SEMICOLON}); + else + elseBlock = matchExpressionBlock({TokenKind::NEW_LINE}); + + if (elseBlock == nullptr || !elseBlock->isValid()) + return elseBlock ?: matchExpressionInvalid("Expected else block"); } - tryMatchingTokenKinds({TokenKind::SEMICOLON}, true, true); + tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true); return make_shared(condition, dynamic_pointer_cast(thenBlock), dynamic_pointer_cast(elseBlock)); } @@ -553,7 +555,7 @@ shared_ptr Parser::matchExpressionBinary(shared_ptr left } if (right == nullptr) { - return matchExpressionInvalid(); + return matchExpressionInvalid("Expected right-side expression"); } else if (!right->isValid()) { return right; } else { @@ -563,25 +565,21 @@ shared_ptr Parser::matchExpressionBinary(shared_ptr left return nullptr; } -shared_ptr Parser::matchExpressionBlock(vector terminalTokenKinds, bool shouldConsumeTerminal) { +shared_ptr Parser::matchExpressionBlock(vector terminalTokenKinds) { vector> statements; - bool hasNewLineTerminal = find(terminalTokenKinds.begin(), terminalTokenKinds.end(), TokenKind::NEW_LINE) != terminalTokenKinds.end(); - while (!tryMatchingTokenKinds(terminalTokenKinds, false, shouldConsumeTerminal)) { + while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) { shared_ptr statement = nextStatement(); if (statement == nullptr || !statement->isValid()) - return matchExpressionInvalid(); + return matchExpressionInvalid("Expected statement"); else statements.push_back(statement); - - if (hasNewLineTerminal && tokens.at(currentIndex-1)->getKind() == TokenKind::NEW_LINE) - currentIndex--; } return make_shared(statements); } -shared_ptr Parser::matchExpressionInvalid() { +shared_ptr Parser::matchExpressionInvalid(string message) { return make_shared(tokens.at(currentIndex)); } diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index 15efaac..7c3fdd6 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -42,8 +42,8 @@ private: shared_ptr matchExpressionCall(); shared_ptr matchExpressionIfElse(); shared_ptr matchExpressionBinary(shared_ptr left); - shared_ptr matchExpressionBlock(vector terminalTokenKinds, bool shouldConsumeTerminal); - shared_ptr matchExpressionInvalid(); + shared_ptr matchExpressionBlock(vector terminalTokenKinds); + shared_ptr matchExpressionInvalid(string message); bool tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance); optional valueTypeForToken(shared_ptr token); diff --git a/src/main.cpp b/src/main.cpp index df6119c..4fb8794 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -74,14 +74,14 @@ int main(int argc, char **argv) { cout << endl << endl; } - ModuleBuilder moduleBuilder(moduleName, inputFilePath, statements); + /*ModuleBuilder moduleBuilder(moduleName, inputFilePath, statements); shared_ptr module = moduleBuilder.getModule(); if (isVerbose) { module->print(llvm::outs(), nullptr); } CodeGenerator codeGenerator(module); - codeGenerator.generateObjectFile(outputKind); + codeGenerator.generateObjectFile(outputKind);*/ return 0; } \ No newline at end of file diff --git a/test.brc b/test.brc index 9d31689..c6ce4fc 100644 --- a/test.brc +++ b/test.brc @@ -1,17 +1,7 @@ -@extern putchar fun: character sint32 -> sint32 - -stuff fun: num1 sint32, num2 sint32 - ? num1 > num2 - putchar(0x54) - - : - putchar(0x4e) - +stuff fun: num1 sint32, num2 sint32 -> sint32 + if num1 > num2: + ret 32 * num1 + else + ret 45 * num2 ; - putchar(0x0a) -; - -main fun -> sint32 - stuff(8, 108) - ret 42 ; \ No newline at end of file From cac252a0deba62dfe8ebcd6407a1ed2509f222ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Mon, 30 Jun 2025 23:51:48 +0900 Subject: [PATCH 2/7] Cleaned up function definition a bit --- src/Parser/Parser.cpp | 79 +++++++++++++++++++------------------------ src/Parser/Parser.h | 2 +- 2 files changed, 36 insertions(+), 45 deletions(-) diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 7ed697d..95e617f 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -52,21 +52,21 @@ shared_ptr Parser::nextStatement() { if (statement != nullptr) return statement; - statement = matchStatementAssignment(); + /*statement = matchStatementAssignment(); if (statement != nullptr) - return statement; + return statement;*/ - statement = matchStatementReturn(); + /*statement = matchStatementReturn(); if (statement != nullptr) - return statement; + return statement;*/ - statement = matchStatementLoop(); + /*statement = matchStatementLoop(); if (statement != nullptr) - return statement; + return statement;*/ - statement = matchStatementExpression(); + /*statement = matchStatementExpression(); if (statement != nullptr) - return statement; + return statement;*/ statement = matchStatementMetaExternFunction(); if (statement != nullptr) @@ -79,21 +79,23 @@ shared_ptr Parser::matchStatementFunction() { if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) return nullptr; - shared_ptr identifierToken = tokens.at(currentIndex); - currentIndex++; + string name; + vector> arguments; + ValueType returnType = ValueType::NONE; + shared_ptr statementBlock; + + // name + name = tokens.at(currentIndex++)->getLexme(); currentIndex++; // skip fun - // Get arguments - vector> arguments; + // arguments if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { do { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) return matchStatementInvalid("Expected function argument"); - shared_ptr identifierToken = tokens.at(currentIndex); - currentIndex++; // identifier - shared_ptr typeToken = tokens.at(currentIndex); - currentIndex++; // type + shared_ptr identifierToken = tokens.at(currentIndex++); + shared_ptr typeToken = tokens.at(currentIndex++); optional argumentType = valueTypeForToken(typeToken); if (!argumentType) return matchStatementInvalid("Invalid argument type"); @@ -102,12 +104,10 @@ shared_ptr Parser::matchStatementFunction() { } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); } - // consume optional new line - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); - - // Return type - ValueType returnType = ValueType::NONE; + // return type if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line + shared_ptr typeToken = tokens.at(currentIndex); optional type = valueTypeForToken(typeToken); if (!type) @@ -121,16 +121,15 @@ shared_ptr Parser::matchStatementFunction() { return matchStatementInvalid("Expected new line after function declaration"); } - shared_ptr statementBlock = matchStatementBlock({TokenKind::SEMICOLON}, true); - if (statementBlock == nullptr) - return matchStatementInvalid(); - else if (!statementBlock->isValid()) - return statementBlock; + // block + statementBlock = matchStatementBlock({TokenKind::SEMICOLON}); + if (statementBlock == nullptr || !statementBlock->isValid()) + return statementBlock ?: matchStatementInvalid(); - if(!tryMatchingTokenKinds({TokenKind::NEW_LINE}, false, true)) - return matchStatementInvalid("Expected a new line after a function declaration"); + if(!tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true)) + return matchStatementInvalid("Expected a \";\" after a function declaration"); - return make_shared(identifierToken->getLexme(), arguments, returnType, dynamic_pointer_cast(statementBlock)); + return make_shared(name, arguments, returnType, dynamic_pointer_cast(statementBlock)); } shared_ptr Parser::matchStatementVariable() { @@ -243,7 +242,7 @@ shared_ptr Parser::matchStatementLoop() { } // body - shared_ptr bodyBlockStatement = matchStatementBlock({TokenKind::SEMICOLON}, true); + shared_ptr bodyBlockStatement = matchStatementBlock({TokenKind::SEMICOLON}); if (bodyBlockStatement == nullptr) return matchStatementInvalid("Expected block statement"); else if (!bodyBlockStatement->isValid()) @@ -320,21 +319,14 @@ shared_ptr Parser::matchStatementMetaExternFunction() { return make_shared(identifierToken->getLexme(), arguments, returnType); } -shared_ptr Parser::matchStatementBlock(vector terminalTokenKinds, bool shouldConsumeTerminal) { +shared_ptr Parser::matchStatementBlock(vector terminalTokenKinds) { vector> statements; - bool hasNewLineTerminal = find(terminalTokenKinds.begin(), terminalTokenKinds.end(), TokenKind::NEW_LINE) != terminalTokenKinds.end(); - while (!tryMatchingTokenKinds(terminalTokenKinds, false, shouldConsumeTerminal)) { + while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) { shared_ptr statement = nextStatement(); - if (statement == nullptr) - return matchStatementInvalid(); - else if (!statement->isValid()) - return statement; - else - statements.push_back(statement); - - if (hasNewLineTerminal && tokens.at(currentIndex-1)->getKind() == TokenKind::NEW_LINE) - currentIndex--; + if (statement == nullptr || !statement->isValid()) + return statement ?: matchStatementInvalid("Expected statement"); + statements.push_back(statement); } return make_shared(statements); @@ -572,8 +564,7 @@ shared_ptr Parser::matchExpressionBlock(vector terminalTo shared_ptr statement = nextStatement(); if (statement == nullptr || !statement->isValid()) return matchExpressionInvalid("Expected statement"); - else - statements.push_back(statement); + statements.push_back(statement); } return make_shared(statements); diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index 7c3fdd6..aabf44c 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -26,7 +26,7 @@ private: shared_ptr matchStatementLoop(); shared_ptr matchStatementExpression(); shared_ptr matchStatementMetaExternFunction(); - shared_ptr matchStatementBlock(vector terminalTokenKinds, bool shouldConsumeTerminal); + shared_ptr matchStatementBlock(vector terminalTokenKinds); shared_ptr matchStatementInvalid(string message = ""); shared_ptr nextExpression(); From 2e5e19b30dbfbbfefe05d14b52dbcc57736a88bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 1 Jul 2025 10:17:57 +0900 Subject: [PATCH 3/7] Parser updates --- Grammar.md => Syntax.md | 54 +++++++++++++------ src/Parser/Parser.cpp | 116 +++++++++++++++++++++------------------- test.brc | 5 -- 3 files changed, 97 insertions(+), 78 deletions(-) rename Grammar.md => Syntax.md (66%) diff --git a/Grammar.md b/Syntax.md similarity index 66% rename from Grammar.md rename to Syntax.md index e787985..b0bd0c7 100644 --- a/Grammar.md +++ b/Syntax.md @@ -1,15 +1,30 @@ +# Detailed Syntax + +This documents specifies what is the allowed syntax for statements and expressions. + +### Symbols used `?` 0 or 1 instances `*` 0 or more instance `+` 1 or more instances +`` Terminal token, usually new line , but it can also sometimes be `,`, `else`, or `;` + `` New line -`` Identifier +`` Identifier + +`` Expression block + +`` Statements block + +`` Expression + +`` Statement ### Statement Function -` fun (: ? (, )*)? (-> ? )? ;` +` fun (: ? (, ? )*)? (-> ? )? ; ` ``` stuff fun ; @@ -25,24 +40,37 @@ sint32 ; ``` +### Statement Meta Extern Function: +`@extern fun (: ? (, ? )*)? (-> ? )? ` +``` +@extern sum fun: +num1 sint32, +num2 sint32 -> +sint32 +``` + + +### Statemnet Variable +` <- ` +``` +num sint32 <- 42 + +``` + ### Statement Assignment -` <- ` +` <- ` ``` num1 <- 5 ``` -### StatementBlock +### Statement Block ( )* -### StatementExpression +### Statement Expression -StatementFunction: - fun (: (, )*)? (-> )? -; - ### StatementLoop `loop [ | ] (, ? (, ? )?)? ;` ``` @@ -64,17 +92,9 @@ i < 10 `loop ( (, ? )?)? ;` - -StatementMetaExternFunction: -@extern fun (: (, )*)? (-> )? - - ### StatementReturn `ret ` -### Statemnet Variable -` <- ` - ### ExpressionVariable: `` diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 95e617f..718984a 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -33,6 +33,12 @@ vector> Parser::getStatements() { exit(1); } statements.push_back(statement); + + // Expect new line after statement + if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) { + cerr << "Expected new line" << endl; + exit(1); + } } return statements; @@ -115,12 +121,12 @@ shared_ptr Parser::matchStatementFunction() { returnType = *type; currentIndex++; // type - - // consume new line - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) - return matchStatementInvalid("Expected new line after function declaration"); } + // consume new line + if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) + return matchStatementInvalid("Expected new line after function declaration"); + // block statementBlock = matchStatementBlock({TokenKind::SEMICOLON}); if (statementBlock == nullptr || !statementBlock->isValid()) @@ -136,7 +142,7 @@ shared_ptr Parser::matchStatementVariable() { if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) return nullptr; - shared_ptr identifierToken = tokens.at(currentIndex); + shared_ptr identifierToken = tokens.at(currentIndex++); currentIndex++; // identifier shared_ptr valueTypeToken = tokens.at(currentIndex); @@ -158,7 +164,7 @@ shared_ptr Parser::matchStatementVariable() { shared_ptr expression = nextExpression(); if (expression == nullptr || !expression->isValid()) - return matchStatementInvalid(); + return matchStatementInvalid("Invalid expression"); // Expect comma or new line if (!tryMatchingTokenKinds({TokenKind::COMMA}, true, false) && !tryMatchingTokenKinds({TokenKind::NEW_LINE}, false, true)) @@ -167,6 +173,54 @@ shared_ptr Parser::matchStatementVariable() { return make_shared(identifierToken->getLexme(), valueType, expression); } +shared_ptr Parser::matchStatementMetaExternFunction() { + if (!tryMatchingTokenKinds({TokenKind::M_EXTERN, TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) + return nullptr; + + string name; + vector> arguments; + ValueType returnType = ValueType::NONE; + + currentIndex++; // skip meta + shared_ptr identifierToken = tokens.at(currentIndex++); + currentIndex++; // skip fun + + // arguments + if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { + do { + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) + return matchStatementInvalid("Expected function argument"); + shared_ptr identifierToken = tokens.at(currentIndex++); + shared_ptr typeToken = tokens.at(currentIndex++); + optional argumentType = valueTypeForToken(typeToken); + if (!argumentType) + return matchStatementInvalid("Invalid argument type"); + + arguments.push_back(pair(identifierToken->getLexme(), *argumentType)); + } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); + } + + // Return type + if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line + + shared_ptr typeToken = tokens.at(currentIndex); + optional type = valueTypeForToken(typeToken); + if (!type) + return matchStatementInvalid("Expected return type"); + returnType = *type; + + currentIndex++; // type + + // consume new line + if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) + return matchStatementInvalid("Expected new line after function declaration"); + } + + return make_shared(identifierToken->getLexme(), arguments, returnType); +} + shared_ptr Parser::matchStatementAssignment() { if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::LEFT_ARROW}, true, false)) return nullptr; @@ -269,56 +323,6 @@ shared_ptr Parser::matchStatementExpression() { return make_shared(expression); } -shared_ptr Parser::matchStatementMetaExternFunction() { - if (!tryMatchingTokenKinds({TokenKind::M_EXTERN, TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) - return nullptr; - - currentIndex++; // skip meta - shared_ptr identifierToken = tokens.at(currentIndex); - currentIndex++; - currentIndex++; // skip fun - - // Get arguments - vector> arguments; - if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { - do { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) - return matchStatementInvalid("Expected function argument"); - shared_ptr identifierToken = tokens.at(currentIndex); - currentIndex++; // identifier - shared_ptr typeToken = tokens.at(currentIndex); - currentIndex++; // type - optional argumentType = valueTypeForToken(typeToken); - if (!argumentType) - return matchStatementInvalid("Invalid argument type"); - - arguments.push_back(pair(identifierToken->getLexme(), *argumentType)); - } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); - } - - // consume optional new line - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); - - // Return type - ValueType returnType = ValueType::NONE; - if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { - shared_ptr typeToken = tokens.at(currentIndex); - optional type = valueTypeForToken(typeToken); - if (!type) - return matchStatementInvalid("Expected return type"); - returnType = *type; - - currentIndex++; // type - - // consume new line - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) - return matchStatementInvalid("Expected new line after function declaration"); - } - - return make_shared(identifierToken->getLexme(), arguments, returnType); -} - shared_ptr Parser::matchStatementBlock(vector terminalTokenKinds) { vector> statements; diff --git a/test.brc b/test.brc index c6ce4fc..ec1a111 100644 --- a/test.brc +++ b/test.brc @@ -1,7 +1,2 @@ stuff fun: num1 sint32, num2 sint32 -> sint32 - if num1 > num2: - ret 32 * num1 - else - ret 45 * num2 - ; ; \ No newline at end of file From 993c204596c27d9d644fed0c5570d40859721bdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 1 Jul 2025 12:06:36 +0900 Subject: [PATCH 4/7] Better parser handling --- Syntax.md | 59 +++++++---- src/Parser/Parser.cpp | 230 +++++++++++++++++++++--------------------- src/Parser/Parser.h | 9 +- test.brc | 4 + 4 files changed, 163 insertions(+), 139 deletions(-) diff --git a/Syntax.md b/Syntax.md index b0bd0c7..64bcfe9 100644 --- a/Syntax.md +++ b/Syntax.md @@ -23,8 +23,44 @@ This documents specifies what is the allowed syntax for statements and expressio `` Statement +### Overall structure +``` + +| ++ ++ ++ + | + + + | + + + + + + + + + | + + ... + + + | + + + + + | + + ... +``` + +### Statement Meta Extern Function +`@extern fun (: ? (, ? )*)? (-> ? )?` +``` +@extern sum fun: +num1 sint32, +num2 sint32 -> +sint32 +``` + +### Statement Variable +` <- ` + ### Statement Function -` fun (: ? (, ? )*)? (-> ? )? ; ` +` fun (: ? (, ? )*)? (-> ? )? ;` ``` stuff fun ; @@ -40,23 +76,6 @@ sint32 ; ``` -### Statement Meta Extern Function: -`@extern fun (: ? (, ? )*)? (-> ? )? ` -``` -@extern sum fun: -num1 sint32, -num2 sint32 -> -sint32 -``` - - -### Statemnet Variable -` <- ` -``` -num sint32 <- 42 - -``` - ### Statement Assignment ` <- ` ``` @@ -92,8 +111,8 @@ i < 10 `loop ( (, ? )?)? ;` -### StatementReturn -`ret ` +### Statement Return +`ret ? ` ### ExpressionVariable: `` diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 718984a..69748d3 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -58,22 +58,6 @@ shared_ptr Parser::nextStatement() { if (statement != nullptr) return statement; - /*statement = matchStatementAssignment(); - if (statement != nullptr) - return statement;*/ - - /*statement = matchStatementReturn(); - if (statement != nullptr) - return statement;*/ - - /*statement = matchStatementLoop(); - if (statement != nullptr) - return statement;*/ - - /*statement = matchStatementExpression(); - if (statement != nullptr) - return statement;*/ - statement = matchStatementMetaExternFunction(); if (statement != nullptr) return statement; @@ -81,6 +65,106 @@ shared_ptr Parser::nextStatement() { return matchStatementInvalid("Unexpected token"); } +shared_ptr Parser::nextInBlockStatement() { + shared_ptr statement; + + statement = matchStatementVariable(); + if (statement != nullptr) + return statement; + + statement = matchStatementAssignment(); + if (statement != nullptr) + return statement; + + statement = matchStatementReturn(); + if (statement != nullptr) + return statement; + + statement = matchStatementLoop(); + if (statement != nullptr) + return statement; + + statement = matchStatementExpression(); + if (statement != nullptr) + return statement; + + return matchStatementInvalid("Unexpected token"); +} + +shared_ptr Parser::matchStatementMetaExternFunction() { + if (!tryMatchingTokenKinds({TokenKind::M_EXTERN, TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) + return nullptr; + + string name; + vector> arguments; + ValueType returnType = ValueType::NONE; + + currentIndex++; // skip meta + shared_ptr identifierToken = tokens.at(currentIndex++); + currentIndex++; // skip fun + + // arguments + if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { + do { + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) + return matchStatementInvalid("Expected function argument"); + shared_ptr identifierToken = tokens.at(currentIndex++); + shared_ptr typeToken = tokens.at(currentIndex++); + optional argumentType = valueTypeForToken(typeToken); + if (!argumentType) + return matchStatementInvalid("Invalid argument type"); + + arguments.push_back(pair(identifierToken->getLexme(), *argumentType)); + } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); + } + + // Return type + if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line + + shared_ptr typeToken = tokens.at(currentIndex); + optional type = valueTypeForToken(typeToken); + if (!type) + return matchStatementInvalid("Expected return type"); + returnType = *type; + + currentIndex++; // type + } + + return make_shared(identifierToken->getLexme(), arguments, returnType); +} + +shared_ptr Parser::matchStatementVariable() { + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) + return nullptr; + + shared_ptr identifierToken = tokens.at(currentIndex++); + shared_ptr valueTypeToken = tokens.at(currentIndex); + + ValueType valueType; + if (valueTypeToken->getLexme().compare("bool") == 0) + valueType = ValueType::BOOL; + else if (valueTypeToken->getLexme().compare("sint32") == 0) + valueType = ValueType::SINT32; + else if (valueTypeToken->getLexme().compare("real32") == 0) + valueType = ValueType::REAL32; + else + return matchStatementInvalid("Invalid type"); + + currentIndex++; // type + + // Expect left arrow + if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) + return matchStatementInvalid("Expected left arrow"); + + shared_ptr expression = nextExpression(); + if (expression == nullptr || !expression->isValid()) + return matchStatementInvalid("Invalid expression"); + + return make_shared(identifierToken->getLexme(), valueType, expression); +} + shared_ptr Parser::matchStatementFunction() { if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) return nullptr; @@ -138,105 +222,37 @@ shared_ptr Parser::matchStatementFunction() { return make_shared(name, arguments, returnType, dynamic_pointer_cast(statementBlock)); } -shared_ptr Parser::matchStatementVariable() { - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) - return nullptr; +shared_ptr Parser::matchStatementBlock(vector terminalTokenKinds) { + vector> statements; - shared_ptr identifierToken = tokens.at(currentIndex++); - currentIndex++; // identifier - shared_ptr valueTypeToken = tokens.at(currentIndex); + while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) { + shared_ptr statement = nextInBlockStatement(); + if (statement == nullptr || !statement->isValid()) + return statement ?: matchStatementInvalid("Expected statement"); + statements.push_back(statement); - ValueType valueType; - if (valueTypeToken->getLexme().compare("bool") == 0) - valueType = ValueType::BOOL; - else if (valueTypeToken->getLexme().compare("sint32") == 0) - valueType = ValueType::SINT32; - else if (valueTypeToken->getLexme().compare("real32") == 0) - valueType = ValueType::REAL32; - else - return matchStatementInvalid("Invalid type"); + if (tryMatchingTokenKinds(terminalTokenKinds, false, false)) + break; - currentIndex++; // type - - // Expect left arrow - if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) - return matchStatementInvalid("Expected left arrow"); - - shared_ptr expression = nextExpression(); - if (expression == nullptr || !expression->isValid()) - return matchStatementInvalid("Invalid expression"); - - // Expect comma or new line - if (!tryMatchingTokenKinds({TokenKind::COMMA}, true, false) && !tryMatchingTokenKinds({TokenKind::NEW_LINE}, false, true)) - return matchStatementInvalid("Expected a new line after variable declaration"); - - return make_shared(identifierToken->getLexme(), valueType, expression); -} - -shared_ptr Parser::matchStatementMetaExternFunction() { - if (!tryMatchingTokenKinds({TokenKind::M_EXTERN, TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) - return nullptr; - - string name; - vector> arguments; - ValueType returnType = ValueType::NONE; - - currentIndex++; // skip meta - shared_ptr identifierToken = tokens.at(currentIndex++); - currentIndex++; // skip fun - - // arguments - if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { - do { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) - return matchStatementInvalid("Expected function argument"); - shared_ptr identifierToken = tokens.at(currentIndex++); - shared_ptr typeToken = tokens.at(currentIndex++); - optional argumentType = valueTypeForToken(typeToken); - if (!argumentType) - return matchStatementInvalid("Invalid argument type"); - - arguments.push_back(pair(identifierToken->getLexme(), *argumentType)); - } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); - } - - // Return type - if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - - shared_ptr typeToken = tokens.at(currentIndex); - optional type = valueTypeForToken(typeToken); - if (!type) - return matchStatementInvalid("Expected return type"); - returnType = *type; - - currentIndex++; // type - - // consume new line + // except new line if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) - return matchStatementInvalid("Expected new line after function declaration"); + return matchStatementInvalid("Expected new line"); } - return make_shared(identifierToken->getLexme(), arguments, returnType); + return make_shared(statements); } shared_ptr Parser::matchStatementAssignment() { if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::LEFT_ARROW}, true, false)) return nullptr; - shared_ptr identifierToken = tokens.at(currentIndex); - currentIndex++; // identifier + shared_ptr identifierToken = tokens.at(currentIndex++); currentIndex++; // arrow shared_ptr expression = nextExpression(); if (expression == nullptr || !expression->isValid()) return matchStatementInvalid("Expected expression"); - // Expect new line - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, false, true)) - return matchStatementInvalid("Expected a new line after variable declaration"); - return make_shared(identifierToken->getLexme(), expression); } @@ -246,12 +262,7 @@ shared_ptr Parser::matchStatementReturn() { shared_ptr expression = nextExpression(); if (expression != nullptr && !expression->isValid()) - return matchStatementInvalid(); - - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE, TokenKind::SEMICOLON}, false, false)) - return matchStatementInvalid(); - - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); + return matchStatementInvalid("Expected expression"); return make_shared(expression); } @@ -323,19 +334,6 @@ shared_ptr Parser::matchStatementExpression() { return make_shared(expression); } -shared_ptr Parser::matchStatementBlock(vector terminalTokenKinds) { - vector> statements; - - while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) { - shared_ptr statement = nextStatement(); - if (statement == nullptr || !statement->isValid()) - return statement ?: matchStatementInvalid("Expected statement"); - statements.push_back(statement); - } - - return make_shared(statements); -} - shared_ptr Parser::matchStatementInvalid(string message) { return make_shared(tokens.at(currentIndex), message); } diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index aabf44c..761d722 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -19,14 +19,17 @@ private: int currentIndex = 0; shared_ptr nextStatement(); - shared_ptr matchStatementFunction(); + shared_ptr nextInBlockStatement(); + + shared_ptr matchStatementMetaExternFunction(); shared_ptr matchStatementVariable(); + shared_ptr matchStatementFunction(); + + shared_ptr matchStatementBlock(vector terminalTokenKinds); shared_ptr matchStatementAssignment(); shared_ptr matchStatementReturn(); shared_ptr matchStatementLoop(); shared_ptr matchStatementExpression(); - shared_ptr matchStatementMetaExternFunction(); - shared_ptr matchStatementBlock(vector terminalTokenKinds); shared_ptr matchStatementInvalid(string message = ""); shared_ptr nextExpression(); diff --git a/test.brc b/test.brc index ec1a111..bd1d6ef 100644 --- a/test.brc +++ b/test.brc @@ -1,2 +1,6 @@ +dummy sint32 <- 55 stuff fun: num1 sint32, num2 sint32 -> sint32 + num1 sint32 <- 42 + num1 <- 5 * num1 + ret num1 ; \ No newline at end of file From 505eb7eca7406d12f77ad53eb5f8ec613df19182 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 1 Jul 2025 13:45:40 +0900 Subject: [PATCH 5/7] Improved loop parsing --- Syntax.md | 55 ++++++++++++++++++++----------------------- src/Parser/Parser.cpp | 41 ++++++++++++++++++++------------ test.brc | 6 ++--- 3 files changed, 55 insertions(+), 47 deletions(-) diff --git a/Syntax.md b/Syntax.md index 64bcfe9..16a2af9 100644 --- a/Syntax.md +++ b/Syntax.md @@ -9,19 +9,11 @@ This documents specifies what is the allowed syntax for statements and expressio `+` 1 or more instances -`` Terminal token, usually new line , but it can also sometimes be `,`, `else`, or `;` - `` New line `` Identifier -`` Expression block - -`` Statements block - -`` Expression - -`` Statement +`` Type ### Overall structure ``` @@ -36,7 +28,7 @@ This documents specifies what is the allowed syntax for statements and expressio + + + - + + + | + ... + @@ -77,47 +69,52 @@ sint32 ``` ### Statement Assignment -` <- ` +` <- ` ``` num1 <- 5 - ``` ### Statement Block -( )* +`( )*` +### Statement Repeat +`rep [ | ]? : ` -### Statement Expression - +`rep [ | ]? : ;` -### StatementLoop -`loop [ | ] (, ? (, ? )?)? ;` +`rep [ | ] (, ? (, ? )? )? : ` + +`rep [ | ] (, ? (, ? )? )? : ;` + +`rep ( (, ? )? )? : ` + +`rep ( (, ? )? )? : ;` ``` -loop i sint32 <- 0, true, i < 10 +rep i sint32 <- 0, true, i < 10: doStuff(i) ; -loop i sint32 <- 0, -true, i < 10 +rep i sint32 <- 0, +true, i < 10: doStuff(i) ; -loop i sint32 <- 0, +rep i sint32 <- 0, true, -i < 10 +i < 10: doStuff(i) ; -``` -`loop ( (, ? )?)? ;` +rep: infiniteCall() +``` ### Statement Return -`ret ? ` +`ret ?` -### ExpressionVariable: -`` +### Expression Variable +`` -### Expression If Else: +### Expression If-Else: `if : ` ``` if num1 > 10: putchar('T') @@ -158,4 +155,4 @@ if num1 > 10: putchar('T') else ``` ### Expression Block -`( )* ( ?)? !` \ No newline at end of file +` )* ?` \ No newline at end of file diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 69748d3..e52d777 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -274,48 +274,59 @@ shared_ptr Parser::matchStatementLoop() { shared_ptr initStatement; shared_ptr preConditionExpression; shared_ptr postConditionExpression; + shared_ptr bodyBlockStatement; + + bool isMultiLine; // initial - initStatement = matchStatementVariable(); + initStatement = matchStatementVariable() ?: matchStatementAssignment(); if (initStatement != nullptr && !initStatement->isValid()) initStatement = nullptr; - if (tokens.at(currentIndex-1)->getKind() != TokenKind::NEW_LINE) { + if (!tryMatchingTokenKinds({TokenKind::COLON}, false, true)) { // got initial, expect comma if (initStatement != nullptr && !tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) return matchStatementInvalid("Expected comma after initial statement"); + // optional new line + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); + // pre condition preConditionExpression = nextExpression(); if (preConditionExpression != nullptr && !preConditionExpression->isValid()) return matchStatementInvalid("Expected pre-condition expression"); - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) { + if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { // got pre-condition, expect comma if (!tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) return matchStatementInvalid("Expected comma after pre-condition statement"); + // optional new line + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); + // post condition postConditionExpression = nextExpression(); if (postConditionExpression == nullptr || !postConditionExpression->isValid()) return matchStatementInvalid("Expected post-condition expression"); - // epxect new line - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) - return matchStatementInvalid("Expected new line"); + // expect colon + if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true)) + return matchStatementInvalid("Expected \":\""); } } + isMultiLine = tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); + // body - shared_ptr bodyBlockStatement = matchStatementBlock({TokenKind::SEMICOLON}); - if (bodyBlockStatement == nullptr) - return matchStatementInvalid("Expected block statement"); - else if (!bodyBlockStatement->isValid()) - return bodyBlockStatement; + if (isMultiLine) + bodyBlockStatement = matchStatementBlock({TokenKind::SEMICOLON}); + else + bodyBlockStatement = matchStatementBlock({TokenKind::NEW_LINE}); + + if (bodyBlockStatement == nullptr || !bodyBlockStatement->isValid()) + return bodyBlockStatement ?: matchStatementInvalid("Expected block statement"); - // epxect new line - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) - return matchStatementInvalid("Expected new line"); + tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true); return make_shared(initStatement, preConditionExpression, postConditionExpression, dynamic_pointer_cast(bodyBlockStatement)); } @@ -563,7 +574,7 @@ shared_ptr Parser::matchExpressionBlock(vector terminalTo vector> statements; while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) { - shared_ptr statement = nextStatement(); + shared_ptr statement = nextInBlockStatement(); if (statement == nullptr || !statement->isValid()) return matchExpressionInvalid("Expected statement"); statements.push_back(statement); diff --git a/test.brc b/test.brc index bd1d6ef..2b3a5e5 100644 --- a/test.brc +++ b/test.brc @@ -1,6 +1,6 @@ dummy sint32 <- 55 stuff fun: num1 sint32, num2 sint32 -> sint32 - num1 sint32 <- 42 - num1 <- 5 * num1 - ret num1 + rep: + i <- i + 1 + ; ; \ No newline at end of file From 729ffd0ea21292f122f56cb81ce1c1cd7f59e090 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 1 Jul 2025 13:49:27 +0900 Subject: [PATCH 6/7] Renamed to repeat --- src/Compiler/ModuleBuilder.cpp | 8 ++-- src/Compiler/ModuleBuilder.h | 4 +- src/Parser/Parser.cpp | 8 ++-- src/Parser/Parser.h | 2 +- src/Parser/Statement/Statement.h | 2 +- src/Parser/Statement/StatementLoop.cpp | 39 ------------------- src/Parser/Statement/StatementRepeat.cpp | 39 +++++++++++++++++++ .../{StatementLoop.h => StatementRepeat.h} | 4 +- 8 files changed, 53 insertions(+), 53 deletions(-) delete mode 100644 src/Parser/Statement/StatementLoop.cpp create mode 100644 src/Parser/Statement/StatementRepeat.cpp rename src/Parser/Statement/{StatementLoop.h => StatementRepeat.h} (68%) diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index 88becf4..28a4c00 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -13,7 +13,7 @@ #include "Parser/Statement/StatementAssignment.h" #include "Parser/Statement/StatementReturn.h" #include "Parser/Statement/StatementExpression.h" -#include "Parser/Statement/StatementLoop.h" +#include "Parser/Statement/StatementRepeat.h" #include "Parser/Statement/StatementMetaExternFunction.h" #include "Parser/Statement/StatementBlock.h" @@ -54,8 +54,8 @@ void ModuleBuilder::buildStatement(shared_ptr statement) { case StatementKind::RETURN: buildReturn(dynamic_pointer_cast(statement)); break; - case StatementKind::LOOP: - buildLoop(dynamic_pointer_cast(statement)); + case StatementKind::REPEAT: + buildLoop(dynamic_pointer_cast(statement)); break; case StatementKind::META_EXTERN_FUNCTION: buildMetaExternFunction(dynamic_pointer_cast(statement)); @@ -138,7 +138,7 @@ void ModuleBuilder::buildReturn(shared_ptr statement) { } } -void ModuleBuilder::buildLoop(shared_ptr statement) { +void ModuleBuilder::buildLoop(shared_ptr statement) { shared_ptr initStatement = statement->getInitStatement(); shared_ptr bodyStatement= statement->getBodyBlockStatement(); shared_ptr preExpression = statement->getPreConditionExpression(); diff --git a/src/Compiler/ModuleBuilder.h b/src/Compiler/ModuleBuilder.h index 2f7b4f8..1a91fdf 100644 --- a/src/Compiler/ModuleBuilder.h +++ b/src/Compiler/ModuleBuilder.h @@ -27,7 +27,7 @@ class StatementVariable; class StatementAssignment; class StatementReturn; class StatementExpression; -class StatementLoop; +class StatementRepeat; class StatementMetaExternFunction; class StatementBlock; @@ -57,7 +57,7 @@ private: void buildAssignment(shared_ptr statement); void buildBlock(shared_ptr statement); void buildReturn(shared_ptr statement); - void buildLoop(shared_ptr statement); + void buildLoop(shared_ptr statement); void buildMetaExternFunction(shared_ptr statement); void buildExpression(shared_ptr statement); diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index e52d777..7a2ae0a 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -16,7 +16,7 @@ #include "Parser/Statement/StatementExpression.h" #include "Parser/Statement/StatementMetaExternFunction.h" #include "Parser/Statement/StatementBlock.h" -#include "Parser/Statement/StatementLoop.h" +#include "Parser/Statement/StatementRepeat.h" #include "Parser/Statement/StatementInvalid.h" Parser::Parser(vector> tokens): tokens(tokens) { @@ -80,7 +80,7 @@ shared_ptr Parser::nextInBlockStatement() { if (statement != nullptr) return statement; - statement = matchStatementLoop(); + statement = matchStatementRepeat(); if (statement != nullptr) return statement; @@ -267,7 +267,7 @@ shared_ptr Parser::matchStatementReturn() { return make_shared(expression); } -shared_ptr Parser::matchStatementLoop() { +shared_ptr Parser::matchStatementRepeat() { if (!tryMatchingTokenKinds({TokenKind::REPEAT}, true, true)) return nullptr; @@ -328,7 +328,7 @@ shared_ptr Parser::matchStatementLoop() { tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true); - return make_shared(initStatement, preConditionExpression, postConditionExpression, dynamic_pointer_cast(bodyBlockStatement)); + return make_shared(initStatement, preConditionExpression, postConditionExpression, dynamic_pointer_cast(bodyBlockStatement)); } shared_ptr Parser::matchStatementExpression() { diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index 761d722..6ff976f 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -28,7 +28,7 @@ private: shared_ptr matchStatementBlock(vector terminalTokenKinds); shared_ptr matchStatementAssignment(); shared_ptr matchStatementReturn(); - shared_ptr matchStatementLoop(); + shared_ptr matchStatementRepeat(); shared_ptr matchStatementExpression(); shared_ptr matchStatementInvalid(string message = ""); diff --git a/src/Parser/Statement/Statement.h b/src/Parser/Statement/Statement.h index a0b5aa6..4c6289d 100644 --- a/src/Parser/Statement/Statement.h +++ b/src/Parser/Statement/Statement.h @@ -14,7 +14,7 @@ enum class StatementKind { FUNCTION, VARIABLE, ASSIGNMENT, - LOOP, + REPEAT, META_EXTERN_FUNCTION, INVALID }; diff --git a/src/Parser/Statement/StatementLoop.cpp b/src/Parser/Statement/StatementLoop.cpp deleted file mode 100644 index e29eca2..0000000 --- a/src/Parser/Statement/StatementLoop.cpp +++ /dev/null @@ -1,39 +0,0 @@ -#include "StatementLoop.h" - -#include "Parser/Expression/Expression.h" -#include "Parser/Statement/StatementBlock.h" - -StatementLoop::StatementLoop(shared_ptr initStatement, shared_ptr preConditionExpression, shared_ptr postConditionExpression, shared_ptr bodyBlockStatement): -Statement(StatementKind::LOOP), initStatement(initStatement), preConditionExpression(preConditionExpression), postConditionExpression(postConditionExpression), bodyBlockStatement(bodyBlockStatement) { } - -shared_ptr StatementLoop::getInitStatement() { - return initStatement; -} - -shared_ptr StatementLoop::getPreConditionExpression() { - return preConditionExpression; -} - -shared_ptr StatementLoop::getPostConditionExpression() { - return postConditionExpression; -} - -shared_ptr StatementLoop::getBodyBlockStatement() { - return bodyBlockStatement; -} - -string StatementLoop::toString(int indent) { - string value; - for (int ind=0; indtoString(0), ", "; - if (preConditionExpression != nullptr) - value += preConditionExpression->toString(0) + ", "; - if (postConditionExpression != nullptr) - value += postConditionExpression->toString(0); - value += "):\n"; - value += bodyBlockStatement->toString(indent+1); - return value; -} \ No newline at end of file diff --git a/src/Parser/Statement/StatementRepeat.cpp b/src/Parser/Statement/StatementRepeat.cpp new file mode 100644 index 0000000..6f52ed9 --- /dev/null +++ b/src/Parser/Statement/StatementRepeat.cpp @@ -0,0 +1,39 @@ +#include "StatementRepeat.h" + +#include "Parser/Expression/Expression.h" +#include "Parser/Statement/StatementBlock.h" + +StatementRepeat::StatementRepeat(shared_ptr initStatement, shared_ptr preConditionExpression, shared_ptr postConditionExpression, shared_ptr bodyBlockStatement): +Statement(StatementKind::REPEAT), initStatement(initStatement), preConditionExpression(preConditionExpression), postConditionExpression(postConditionExpression), bodyBlockStatement(bodyBlockStatement) { } + +shared_ptr StatementRepeat::getInitStatement() { + return initStatement; +} + +shared_ptr StatementRepeat::getPreConditionExpression() { + return preConditionExpression; +} + +shared_ptr StatementRepeat::getPostConditionExpression() { + return postConditionExpression; +} + +shared_ptr StatementRepeat::getBodyBlockStatement() { + return bodyBlockStatement; +} + +string StatementRepeat::toString(int indent) { + string value; + for (int ind=0; indtoString(0), ", "; + if (preConditionExpression != nullptr) + value += preConditionExpression->toString(0) + ", "; + if (postConditionExpression != nullptr) + value += postConditionExpression->toString(0); + value += "):\n"; + value += bodyBlockStatement->toString(indent+1); + return value; +} \ No newline at end of file diff --git a/src/Parser/Statement/StatementLoop.h b/src/Parser/Statement/StatementRepeat.h similarity index 68% rename from src/Parser/Statement/StatementLoop.h rename to src/Parser/Statement/StatementRepeat.h index 00cd47d..399a8c8 100644 --- a/src/Parser/Statement/StatementLoop.h +++ b/src/Parser/Statement/StatementRepeat.h @@ -3,7 +3,7 @@ class Expression; class StatementBlock; -class StatementLoop: public Statement { +class StatementRepeat: public Statement { private: shared_ptr initStatement; shared_ptr preConditionExpression; @@ -11,7 +11,7 @@ private: shared_ptr bodyBlockStatement; public: - StatementLoop(shared_ptr initStatement, shared_ptr preConditionExpression, shared_ptr postConditionExpression, shared_ptr bodyBlockStatement); + StatementRepeat(shared_ptr initStatement, shared_ptr preConditionExpression, shared_ptr postConditionExpression, shared_ptr bodyBlockStatement); shared_ptr getInitStatement(); shared_ptr getPreConditionExpression(); shared_ptr getPostConditionExpression(); From e4a2b8415418fb57e83512d15028c86116c6a8fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 1 Jul 2025 17:06:52 +0900 Subject: [PATCH 7/7] New line shouldn't be the first token --- src/Lexer/Lexer.cpp | 6 +++++- src/main.cpp | 4 ++-- test.brc | 3 ++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index d200fe1..d017d6f 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -22,7 +22,11 @@ vector> Lexer::getTokens() { if (!token->isValid()) { cerr << "Unexpected character '" << token->getLexme() << "' at " << token->getLine() << ":" << token->getColumn() << endl; exit(1); - } + } + + // Don't add new line as the first token + if (tokens.empty() && token->isOfKind({TokenKind::NEW_LINE})) + continue; // Insert an additional new line just before end if (token->getKind() == TokenKind::END && tokens.back()->getKind() != TokenKind::NEW_LINE) diff --git a/src/main.cpp b/src/main.cpp index 4fb8794..df6119c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -74,14 +74,14 @@ int main(int argc, char **argv) { cout << endl << endl; } - /*ModuleBuilder moduleBuilder(moduleName, inputFilePath, statements); + ModuleBuilder moduleBuilder(moduleName, inputFilePath, statements); shared_ptr module = moduleBuilder.getModule(); if (isVerbose) { module->print(llvm::outs(), nullptr); } CodeGenerator codeGenerator(module); - codeGenerator.generateObjectFile(outputKind);*/ + codeGenerator.generateObjectFile(outputKind); return 0; } \ No newline at end of file diff --git a/test.brc b/test.brc index 2b3a5e5..7ae6cd8 100644 --- a/test.brc +++ b/test.brc @@ -1,6 +1,7 @@ -dummy sint32 <- 55 +//dummy sint32 <- 55 stuff fun: num1 sint32, num2 sint32 -> sint32 rep: i <- i + 1 ; + ret 42 ; \ No newline at end of file