diff --git a/Syntax.md b/Syntax.md new file mode 100644 index 0000000..16a2af9 --- /dev/null +++ b/Syntax.md @@ -0,0 +1,158 @@ +# Detailed Syntax + +This documents specifies what is the allowed syntax for statements and expressions. + +### Symbols used +`?` 0 or 1 instances + +`*` 0 or more instance + +`+` 1 or more instances + +`` New line + +`` Identifier + +`` Type + +### Overall structure +``` + +| ++ ++ ++ + | + + + | + + + + + + + + + | + + ... + + + | + + + + + | + + ... +``` + +### Statement Meta Extern Function +`@extern fun (: ? (, ? )*)? (-> ? )?` +``` +@extern sum fun: +num1 sint32, +num2 sint32 -> +sint32 +``` + +### Statement Variable +` <- ` + +### Statement Function +` fun (: ? (, ? )*)? (-> ? )? ;` +``` +stuff fun +; + +stuff fun -> sint32 + ret 42 +; + +stuff fun: num1 sint32, +num2 sint32 -> +sint32 + ret num1 + num2 +; +``` + +### Statement Assignment +` <- ` +``` +num1 <- 5 +``` + +### Statement Block +`( )*` + +### Statement Repeat +`rep [ | ]? : ` + +`rep [ | ]? : ;` + +`rep [ | ] (, ? (, ? )? )? : ` + +`rep [ | ] (, ? (, ? )? )? : ;` + +`rep ( (, ? )? )? : ` + +`rep ( (, ? )? )? : ;` +``` +rep i sint32 <- 0, true, i < 10: + doStuff(i) +; + +rep i sint32 <- 0, +true, i < 10: + doStuff(i) +; + +rep i sint32 <- 0, +true, +i < 10: + doStuff(i) +; + +rep: infiniteCall() +``` + +### Statement Return +`ret ?` + +### Expression Variable +`` + +### Expression If-Else: +`if : ` +``` +if num1 > 10: putchar('T') + +``` + +`if : ;` +``` +if num1 > 10: + num1 <- 500 + putchar('S') +; +``` + +`if : else ` +``` +if num1 > 10: putchar('T') else putchar('N') + +``` + +`if : else ;` +``` +if num1 > 10: + putchar('T') +else + putchar('N') +; + +``` + +`if : else ;` +``` +if num1 > 10: putchar('T') else + num1 <- 500 + putchar('F') +; + +``` + +### Expression Block +` )* ?` \ No newline at end of file diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index 88becf4..28a4c00 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -13,7 +13,7 @@ #include "Parser/Statement/StatementAssignment.h" #include "Parser/Statement/StatementReturn.h" #include "Parser/Statement/StatementExpression.h" -#include "Parser/Statement/StatementLoop.h" +#include "Parser/Statement/StatementRepeat.h" #include "Parser/Statement/StatementMetaExternFunction.h" #include "Parser/Statement/StatementBlock.h" @@ -54,8 +54,8 @@ void ModuleBuilder::buildStatement(shared_ptr statement) { case StatementKind::RETURN: buildReturn(dynamic_pointer_cast(statement)); break; - case StatementKind::LOOP: - buildLoop(dynamic_pointer_cast(statement)); + case StatementKind::REPEAT: + buildLoop(dynamic_pointer_cast(statement)); break; case StatementKind::META_EXTERN_FUNCTION: buildMetaExternFunction(dynamic_pointer_cast(statement)); @@ -138,7 +138,7 @@ void ModuleBuilder::buildReturn(shared_ptr statement) { } } -void ModuleBuilder::buildLoop(shared_ptr statement) { +void ModuleBuilder::buildLoop(shared_ptr statement) { shared_ptr initStatement = statement->getInitStatement(); shared_ptr bodyStatement= statement->getBodyBlockStatement(); shared_ptr preExpression = statement->getPreConditionExpression(); diff --git a/src/Compiler/ModuleBuilder.h b/src/Compiler/ModuleBuilder.h index 2f7b4f8..1a91fdf 100644 --- a/src/Compiler/ModuleBuilder.h +++ b/src/Compiler/ModuleBuilder.h @@ -27,7 +27,7 @@ class StatementVariable; class StatementAssignment; class StatementReturn; class StatementExpression; -class StatementLoop; +class StatementRepeat; class StatementMetaExternFunction; class StatementBlock; @@ -57,7 +57,7 @@ private: void buildAssignment(shared_ptr statement); void buildBlock(shared_ptr statement); void buildReturn(shared_ptr statement); - void buildLoop(shared_ptr statement); + void buildLoop(shared_ptr statement); void buildMetaExternFunction(shared_ptr statement); void buildExpression(shared_ptr statement); diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index ddd9aa9..d017d6f 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -4,8 +4,12 @@ Lexer::Lexer(string source): source(source) { } vector> Lexer::getTokens() { - shared_ptr token = nullptr; - tokens.clear(); + currentIndex = 0; + currentLine = 0; + currentColumn = 0; + + vector> tokens; + shared_ptr token; do { token = nextToken(); // Got a nullptr, shouldn't have happened @@ -18,7 +22,11 @@ vector> Lexer::getTokens() { if (!token->isValid()) { cerr << "Unexpected character '" << token->getLexme() << "' at " << token->getLine() << ":" << token->getColumn() << endl; exit(1); - } + } + + // Don't add new line as the first token + if (tokens.empty() && token->isOfKind({TokenKind::NEW_LINE})) + continue; // Insert an additional new line just before end if (token->getKind() == TokenKind::END && tokens.back()->getKind() != TokenKind::NEW_LINE) @@ -123,14 +131,6 @@ shared_ptr Lexer::nextToken() { token = match(TokenKind::SEMICOLON, ";", false); if (token != nullptr) return token; - - token = match(TokenKind::QUESTION_QUESTION, "??", false); - if (token != nullptr) - return token; - - token = match(TokenKind::QUESTION, "?", false); - if (token != nullptr) - return token; token = match(TokenKind::LEFT_ARROW, "<-", false); if (token != nullptr) @@ -187,6 +187,14 @@ shared_ptr Lexer::nextToken() { return token; // keywords + token = match(TokenKind::IF, "if", true); + if (token != nullptr) + return token; + + token = match(TokenKind::ELSE, "else", true); + if (token != nullptr) + return token; + token = match(TokenKind::FUNCTION, "fun", true); if (token != nullptr) return token; diff --git a/src/Lexer/Lexer.h b/src/Lexer/Lexer.h index f0fe2bb..c117b08 100644 --- a/src/Lexer/Lexer.h +++ b/src/Lexer/Lexer.h @@ -10,11 +10,9 @@ using namespace std; class Lexer { private: string source; - int currentIndex = 0; - int currentLine = 0; - int currentColumn = 0; - - vector> tokens; + int currentIndex; + int currentLine; + int currentColumn; shared_ptr nextToken(); shared_ptr match(TokenKind kind, string lexme, bool needsSeparator); diff --git a/src/Lexer/Token.cpp b/src/Lexer/Token.cpp index b912fb3..aa08a24 100644 --- a/src/Lexer/Token.cpp +++ b/src/Lexer/Token.cpp @@ -112,10 +112,6 @@ string Token::toString() { return ":"; case TokenKind::SEMICOLON: return ";"; - case TokenKind::QUESTION_QUESTION: - return "??"; - case TokenKind::QUESTION: - return "?"; case TokenKind::LEFT_ARROW: return "←"; case TokenKind::RIGHT_ARROW: @@ -136,6 +132,10 @@ string Token::toString() { case TokenKind::TYPE: return "TYPE(" + lexme + ")"; + case TokenKind::IF: + return "IF"; + case TokenKind::ELSE: + return "ELSE"; case TokenKind::FUNCTION: return "FUNCTION"; case TokenKind::RETURN: diff --git a/src/Lexer/Token.h b/src/Lexer/Token.h index dd2cb9d..1e3adbe 100644 --- a/src/Lexer/Token.h +++ b/src/Lexer/Token.h @@ -26,14 +26,14 @@ enum class TokenKind { COMMA, COLON, SEMICOLON, - QUESTION, - QUESTION_QUESTION, LEFT_ARROW, RIGHT_ARROW, FUNCTION, RETURN, REPEAT, + IF, + ELSE, BOOL, INTEGER_DEC, diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 57eb0e2..7a2ae0a 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -16,7 +16,7 @@ #include "Parser/Statement/StatementExpression.h" #include "Parser/Statement/StatementMetaExternFunction.h" #include "Parser/Statement/StatementBlock.h" -#include "Parser/Statement/StatementLoop.h" +#include "Parser/Statement/StatementRepeat.h" #include "Parser/Statement/StatementInvalid.h" Parser::Parser(vector> tokens): tokens(tokens) { @@ -33,6 +33,12 @@ vector> Parser::getStatements() { exit(1); } statements.push_back(statement); + + // Expect new line after statement + if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) { + cerr << "Expected new line" << endl; + exit(1); + } } return statements; @@ -48,6 +54,20 @@ shared_ptr Parser::nextStatement() { if (statement != nullptr) return statement; + statement = matchStatementVariable(); + if (statement != nullptr) + return statement; + + statement = matchStatementMetaExternFunction(); + if (statement != nullptr) + return statement; + + return matchStatementInvalid("Unexpected token"); +} + +shared_ptr Parser::nextInBlockStatement() { + shared_ptr statement; + statement = matchStatementVariable(); if (statement != nullptr) return statement; @@ -60,7 +80,7 @@ shared_ptr Parser::nextStatement() { if (statement != nullptr) return statement; - statement = matchStatementLoop(); + statement = matchStatementRepeat(); if (statement != nullptr) return statement; @@ -68,32 +88,29 @@ shared_ptr Parser::nextStatement() { if (statement != nullptr) return statement; - statement = matchStatementMetaExternFunction(); - if (statement != nullptr) - return statement; - return matchStatementInvalid("Unexpected token"); } -shared_ptr Parser::matchStatementFunction() { - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) +shared_ptr Parser::matchStatementMetaExternFunction() { + if (!tryMatchingTokenKinds({TokenKind::M_EXTERN, TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) return nullptr; - shared_ptr identifierToken = tokens.at(currentIndex); - currentIndex++; + string name; + vector> arguments; + ValueType returnType = ValueType::NONE; + + currentIndex++; // skip meta + shared_ptr identifierToken = tokens.at(currentIndex++); currentIndex++; // skip fun - // Get arguments - vector> arguments; + // arguments if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { do { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) return matchStatementInvalid("Expected function argument"); - shared_ptr identifierToken = tokens.at(currentIndex); - currentIndex++; // identifier - shared_ptr typeToken = tokens.at(currentIndex); - currentIndex++; // type + shared_ptr identifierToken = tokens.at(currentIndex++); + shared_ptr typeToken = tokens.at(currentIndex++); optional argumentType = valueTypeForToken(typeToken); if (!argumentType) return matchStatementInvalid("Invalid argument type"); @@ -102,12 +119,10 @@ shared_ptr Parser::matchStatementFunction() { } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); } - // consume optional new line - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); - // Return type - ValueType returnType = ValueType::NONE; if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line + shared_ptr typeToken = tokens.at(currentIndex); optional type = valueTypeForToken(typeToken); if (!type) @@ -115,30 +130,16 @@ shared_ptr Parser::matchStatementFunction() { returnType = *type; currentIndex++; // type - - // consume new line - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) - return matchStatementInvalid("Expected new line after function declaration"); } - shared_ptr statementBlock = matchStatementBlock({TokenKind::SEMICOLON}, true); - if (statementBlock == nullptr) - return matchStatementInvalid(); - else if (!statementBlock->isValid()) - return statementBlock; - - if(!tryMatchingTokenKinds({TokenKind::NEW_LINE}, false, true)) - return matchStatementInvalid("Expected a new line after a function declaration"); - - return make_shared(identifierToken->getLexme(), arguments, returnType, dynamic_pointer_cast(statementBlock)); + return make_shared(identifierToken->getLexme(), arguments, returnType); } shared_ptr Parser::matchStatementVariable() { if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) return nullptr; - shared_ptr identifierToken = tokens.at(currentIndex); - currentIndex++; // identifier + shared_ptr identifierToken = tokens.at(currentIndex++); shared_ptr valueTypeToken = tokens.at(currentIndex); ValueType valueType; @@ -159,31 +160,99 @@ shared_ptr Parser::matchStatementVariable() { shared_ptr expression = nextExpression(); if (expression == nullptr || !expression->isValid()) - return matchStatementInvalid(); - - // Expect comma or new line - if (!tryMatchingTokenKinds({TokenKind::COMMA}, true, false) && !tryMatchingTokenKinds({TokenKind::NEW_LINE}, false, true)) - return matchStatementInvalid("Expected a new line after variable declaration"); + return matchStatementInvalid("Invalid expression"); return make_shared(identifierToken->getLexme(), valueType, expression); } +shared_ptr Parser::matchStatementFunction() { + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) + return nullptr; + + string name; + vector> arguments; + ValueType returnType = ValueType::NONE; + shared_ptr statementBlock; + + // name + name = tokens.at(currentIndex++)->getLexme(); + currentIndex++; // skip fun + + // arguments + if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { + do { + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) + return matchStatementInvalid("Expected function argument"); + shared_ptr identifierToken = tokens.at(currentIndex++); + shared_ptr typeToken = tokens.at(currentIndex++); + optional argumentType = valueTypeForToken(typeToken); + if (!argumentType) + return matchStatementInvalid("Invalid argument type"); + + arguments.push_back(pair(identifierToken->getLexme(), *argumentType)); + } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); + } + + // return type + if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line + + shared_ptr typeToken = tokens.at(currentIndex); + optional type = valueTypeForToken(typeToken); + if (!type) + return matchStatementInvalid("Expected return type"); + returnType = *type; + + currentIndex++; // type + } + + // consume new line + if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) + return matchStatementInvalid("Expected new line after function declaration"); + + // block + statementBlock = matchStatementBlock({TokenKind::SEMICOLON}); + if (statementBlock == nullptr || !statementBlock->isValid()) + return statementBlock ?: matchStatementInvalid(); + + if(!tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true)) + return matchStatementInvalid("Expected a \";\" after a function declaration"); + + return make_shared(name, arguments, returnType, dynamic_pointer_cast(statementBlock)); +} + +shared_ptr Parser::matchStatementBlock(vector terminalTokenKinds) { + vector> statements; + + while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) { + shared_ptr statement = nextInBlockStatement(); + if (statement == nullptr || !statement->isValid()) + return statement ?: matchStatementInvalid("Expected statement"); + statements.push_back(statement); + + if (tryMatchingTokenKinds(terminalTokenKinds, false, false)) + break; + + // except new line + if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) + return matchStatementInvalid("Expected new line"); + } + + return make_shared(statements); +} + shared_ptr Parser::matchStatementAssignment() { if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::LEFT_ARROW}, true, false)) return nullptr; - shared_ptr identifierToken = tokens.at(currentIndex); - currentIndex++; // identifier + shared_ptr identifierToken = tokens.at(currentIndex++); currentIndex++; // arrow shared_ptr expression = nextExpression(); if (expression == nullptr || !expression->isValid()) return matchStatementInvalid("Expected expression"); - // Expect new line - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, false, true)) - return matchStatementInvalid("Expected a new line after variable declaration"); - return make_shared(identifierToken->getLexme(), expression); } @@ -193,67 +262,73 @@ shared_ptr Parser::matchStatementReturn() { shared_ptr expression = nextExpression(); if (expression != nullptr && !expression->isValid()) - return matchStatementInvalid(); - - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE, TokenKind::SEMICOLON}, false, false)) - return matchStatementInvalid(); - - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); + return matchStatementInvalid("Expected expression"); return make_shared(expression); } -shared_ptr Parser::matchStatementLoop() { +shared_ptr Parser::matchStatementRepeat() { if (!tryMatchingTokenKinds({TokenKind::REPEAT}, true, true)) return nullptr; shared_ptr initStatement; shared_ptr preConditionExpression; shared_ptr postConditionExpression; + shared_ptr bodyBlockStatement; + + bool isMultiLine; // initial - initStatement = matchStatementVariable(); + initStatement = matchStatementVariable() ?: matchStatementAssignment(); if (initStatement != nullptr && !initStatement->isValid()) initStatement = nullptr; - if (tokens.at(currentIndex-1)->getKind() != TokenKind::NEW_LINE) { + if (!tryMatchingTokenKinds({TokenKind::COLON}, false, true)) { // got initial, expect comma if (initStatement != nullptr && !tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) return matchStatementInvalid("Expected comma after initial statement"); + // optional new line + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); + // pre condition preConditionExpression = nextExpression(); if (preConditionExpression != nullptr && !preConditionExpression->isValid()) return matchStatementInvalid("Expected pre-condition expression"); - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) { + if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { // got pre-condition, expect comma if (!tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) return matchStatementInvalid("Expected comma after pre-condition statement"); + // optional new line + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); + // post condition postConditionExpression = nextExpression(); if (postConditionExpression == nullptr || !postConditionExpression->isValid()) return matchStatementInvalid("Expected post-condition expression"); - // epxect new line - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) - return matchStatementInvalid("Expected new line"); + // expect colon + if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true)) + return matchStatementInvalid("Expected \":\""); } } - // body - shared_ptr bodyBlockStatement = matchStatementBlock({TokenKind::SEMICOLON}, true); - if (bodyBlockStatement == nullptr) - return matchStatementInvalid("Expected block statement"); - else if (!bodyBlockStatement->isValid()) - return bodyBlockStatement; - - // epxect new line - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) - return matchStatementInvalid("Expected new line"); + isMultiLine = tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); - return make_shared(initStatement, preConditionExpression, postConditionExpression, dynamic_pointer_cast(bodyBlockStatement)); + // body + if (isMultiLine) + bodyBlockStatement = matchStatementBlock({TokenKind::SEMICOLON}); + else + bodyBlockStatement = matchStatementBlock({TokenKind::NEW_LINE}); + + if (bodyBlockStatement == nullptr || !bodyBlockStatement->isValid()) + return bodyBlockStatement ?: matchStatementInvalid("Expected block statement"); + + tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true); + + return make_shared(initStatement, preConditionExpression, postConditionExpression, dynamic_pointer_cast(bodyBlockStatement)); } shared_ptr Parser::matchStatementExpression() { @@ -270,76 +345,6 @@ shared_ptr Parser::matchStatementExpression() { return make_shared(expression); } -shared_ptr Parser::matchStatementMetaExternFunction() { - if (!tryMatchingTokenKinds({TokenKind::M_EXTERN, TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) - return nullptr; - - currentIndex++; // skip meta - shared_ptr identifierToken = tokens.at(currentIndex); - currentIndex++; - currentIndex++; // skip fun - - // Get arguments - vector> arguments; - if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { - do { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) - return matchStatementInvalid("Expected function argument"); - shared_ptr identifierToken = tokens.at(currentIndex); - currentIndex++; // identifier - shared_ptr typeToken = tokens.at(currentIndex); - currentIndex++; // type - optional argumentType = valueTypeForToken(typeToken); - if (!argumentType) - return matchStatementInvalid("Invalid argument type"); - - arguments.push_back(pair(identifierToken->getLexme(), *argumentType)); - } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); - } - - // consume optional new line - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); - - // Return type - ValueType returnType = ValueType::NONE; - if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { - shared_ptr typeToken = tokens.at(currentIndex); - optional type = valueTypeForToken(typeToken); - if (!type) - return matchStatementInvalid("Expected return type"); - returnType = *type; - - currentIndex++; // type - - // consume new line - if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) - return matchStatementInvalid("Expected new line after function declaration"); - } - - return make_shared(identifierToken->getLexme(), arguments, returnType); -} - -shared_ptr Parser::matchStatementBlock(vector terminalTokenKinds, bool shouldConsumeTerminal) { - vector> statements; - - bool hasNewLineTerminal = find(terminalTokenKinds.begin(), terminalTokenKinds.end(), TokenKind::NEW_LINE) != terminalTokenKinds.end(); - while (!tryMatchingTokenKinds(terminalTokenKinds, false, shouldConsumeTerminal)) { - shared_ptr statement = nextStatement(); - if (statement == nullptr) - return matchStatementInvalid(); - else if (!statement->isValid()) - return statement; - else - statements.push_back(statement); - - if (hasNewLineTerminal && tokens.at(currentIndex-1)->getKind() == TokenKind::NEW_LINE) - currentIndex--; - } - - return make_shared(statements); -} - shared_ptr Parser::matchStatementInvalid(string message) { return make_shared(tokens.at(currentIndex), message); } @@ -436,14 +441,12 @@ shared_ptr Parser::matchExpressionGrouping() { if (tryMatchingTokenKinds({TokenKind::LEFT_PAREN}, true, true)) { shared_ptr expression = matchTerm(); // has grouped expression failed? - if (expression == nullptr) { - return matchExpressionInvalid(); - } else if(!expression->isValid()) { - return expression; + if (expression == nullptr || !expression->isValid()) { + return expression ?: matchExpressionInvalid("Expected expression"); } else if (tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true)) { return make_shared(expression); } else { - return matchExpressionInvalid(); + return matchExpressionInvalid("Unexpected token"); } } @@ -488,52 +491,56 @@ shared_ptr Parser::matchExpressionCall() { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // optional new line if (!tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true)) - return matchExpressionInvalid(); + return matchExpressionInvalid("Expected \")\""); return make_shared(identifierToken->getLexme(), argumentExpressions); } shared_ptr Parser::matchExpressionIfElse() { - // Try maching '?' - shared_ptr token = tokens.at(currentIndex); - - if (!tryMatchingTokenKinds({TokenKind::QUESTION}, true, true)) + if (!tryMatchingTokenKinds({TokenKind::IF}, true, true)) return nullptr; - // Then get condition - shared_ptr condition = nextExpression(); - if (condition == nullptr) - return matchExpressionInvalid(); - else if (!condition->isValid()) - return condition; - - // Consume optional ':' - tryMatchingTokenKinds({TokenKind::COLON}, true, true); - // Consume optional new line - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); - - // Match then block - shared_ptr thenBlock = matchExpressionBlock({TokenKind::COLON, TokenKind::SEMICOLON}, false); - if (thenBlock == nullptr) - return matchExpressionInvalid(); - else if (!thenBlock->isValid()) - return thenBlock; - - // Match else block. Then and else block are separated by ':' + shared_ptr condition; + shared_ptr thenBlock; shared_ptr elseBlock; - if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { - bool isSingleLine = !tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); - vector terminalTokens = {TokenKind::SEMICOLON, TokenKind::COMMA, TokenKind::RIGHT_PAREN}; - if (isSingleLine) - terminalTokens.push_back(TokenKind::NEW_LINE); - elseBlock = matchExpressionBlock(terminalTokens, false); - if (elseBlock == nullptr) - return matchExpressionInvalid(); - else if (!elseBlock->isValid()) - return elseBlock; + // condition expression + condition = nextExpression(); + if (condition == nullptr || !condition->isValid()) + return condition ?: matchExpressionInvalid("Expected condition expression"); + + if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true)) + return matchExpressionInvalid("Expected \":\""); + + // then + bool isMultiLine = false; + + if (tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) + isMultiLine = true; + + // then block + if (isMultiLine) + thenBlock = matchExpressionBlock({TokenKind::ELSE, TokenKind::SEMICOLON}); + else + thenBlock = matchExpressionBlock({TokenKind::ELSE, TokenKind::NEW_LINE}); + if (thenBlock == nullptr || !thenBlock->isValid()) + return thenBlock ?: matchExpressionInvalid("Expected then block"); + + // else + if (tryMatchingTokenKinds({TokenKind::ELSE}, true, true)) { + if (tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) + isMultiLine = true; + + // else block + if (isMultiLine) + elseBlock = matchExpressionBlock({TokenKind::SEMICOLON}); + else + elseBlock = matchExpressionBlock({TokenKind::NEW_LINE}); + + if (elseBlock == nullptr || !elseBlock->isValid()) + return elseBlock ?: matchExpressionInvalid("Expected else block"); } - tryMatchingTokenKinds({TokenKind::SEMICOLON}, true, true); + tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true); return make_shared(condition, dynamic_pointer_cast(thenBlock), dynamic_pointer_cast(elseBlock)); } @@ -553,7 +560,7 @@ shared_ptr Parser::matchExpressionBinary(shared_ptr left } if (right == nullptr) { - return matchExpressionInvalid(); + return matchExpressionInvalid("Expected right-side expression"); } else if (!right->isValid()) { return right; } else { @@ -563,25 +570,20 @@ shared_ptr Parser::matchExpressionBinary(shared_ptr left return nullptr; } -shared_ptr Parser::matchExpressionBlock(vector terminalTokenKinds, bool shouldConsumeTerminal) { +shared_ptr Parser::matchExpressionBlock(vector terminalTokenKinds) { vector> statements; - bool hasNewLineTerminal = find(terminalTokenKinds.begin(), terminalTokenKinds.end(), TokenKind::NEW_LINE) != terminalTokenKinds.end(); - while (!tryMatchingTokenKinds(terminalTokenKinds, false, shouldConsumeTerminal)) { - shared_ptr statement = nextStatement(); + while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) { + shared_ptr statement = nextInBlockStatement(); if (statement == nullptr || !statement->isValid()) - return matchExpressionInvalid(); - else - statements.push_back(statement); - - if (hasNewLineTerminal && tokens.at(currentIndex-1)->getKind() == TokenKind::NEW_LINE) - currentIndex--; + return matchExpressionInvalid("Expected statement"); + statements.push_back(statement); } return make_shared(statements); } -shared_ptr Parser::matchExpressionInvalid() { +shared_ptr Parser::matchExpressionInvalid(string message) { return make_shared(tokens.at(currentIndex)); } diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index 15efaac..6ff976f 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -19,14 +19,17 @@ private: int currentIndex = 0; shared_ptr nextStatement(); - shared_ptr matchStatementFunction(); + shared_ptr nextInBlockStatement(); + + shared_ptr matchStatementMetaExternFunction(); shared_ptr matchStatementVariable(); + shared_ptr matchStatementFunction(); + + shared_ptr matchStatementBlock(vector terminalTokenKinds); shared_ptr matchStatementAssignment(); shared_ptr matchStatementReturn(); - shared_ptr matchStatementLoop(); + shared_ptr matchStatementRepeat(); shared_ptr matchStatementExpression(); - shared_ptr matchStatementMetaExternFunction(); - shared_ptr matchStatementBlock(vector terminalTokenKinds, bool shouldConsumeTerminal); shared_ptr matchStatementInvalid(string message = ""); shared_ptr nextExpression(); @@ -42,8 +45,8 @@ private: shared_ptr matchExpressionCall(); shared_ptr matchExpressionIfElse(); shared_ptr matchExpressionBinary(shared_ptr left); - shared_ptr matchExpressionBlock(vector terminalTokenKinds, bool shouldConsumeTerminal); - shared_ptr matchExpressionInvalid(); + shared_ptr matchExpressionBlock(vector terminalTokenKinds); + shared_ptr matchExpressionInvalid(string message); bool tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance); optional valueTypeForToken(shared_ptr token); diff --git a/src/Parser/Statement/Statement.h b/src/Parser/Statement/Statement.h index a0b5aa6..4c6289d 100644 --- a/src/Parser/Statement/Statement.h +++ b/src/Parser/Statement/Statement.h @@ -14,7 +14,7 @@ enum class StatementKind { FUNCTION, VARIABLE, ASSIGNMENT, - LOOP, + REPEAT, META_EXTERN_FUNCTION, INVALID }; diff --git a/src/Parser/Statement/StatementLoop.cpp b/src/Parser/Statement/StatementLoop.cpp deleted file mode 100644 index e29eca2..0000000 --- a/src/Parser/Statement/StatementLoop.cpp +++ /dev/null @@ -1,39 +0,0 @@ -#include "StatementLoop.h" - -#include "Parser/Expression/Expression.h" -#include "Parser/Statement/StatementBlock.h" - -StatementLoop::StatementLoop(shared_ptr initStatement, shared_ptr preConditionExpression, shared_ptr postConditionExpression, shared_ptr bodyBlockStatement): -Statement(StatementKind::LOOP), initStatement(initStatement), preConditionExpression(preConditionExpression), postConditionExpression(postConditionExpression), bodyBlockStatement(bodyBlockStatement) { } - -shared_ptr StatementLoop::getInitStatement() { - return initStatement; -} - -shared_ptr StatementLoop::getPreConditionExpression() { - return preConditionExpression; -} - -shared_ptr StatementLoop::getPostConditionExpression() { - return postConditionExpression; -} - -shared_ptr StatementLoop::getBodyBlockStatement() { - return bodyBlockStatement; -} - -string StatementLoop::toString(int indent) { - string value; - for (int ind=0; indtoString(0), ", "; - if (preConditionExpression != nullptr) - value += preConditionExpression->toString(0) + ", "; - if (postConditionExpression != nullptr) - value += postConditionExpression->toString(0); - value += "):\n"; - value += bodyBlockStatement->toString(indent+1); - return value; -} \ No newline at end of file diff --git a/src/Parser/Statement/StatementRepeat.cpp b/src/Parser/Statement/StatementRepeat.cpp new file mode 100644 index 0000000..6f52ed9 --- /dev/null +++ b/src/Parser/Statement/StatementRepeat.cpp @@ -0,0 +1,39 @@ +#include "StatementRepeat.h" + +#include "Parser/Expression/Expression.h" +#include "Parser/Statement/StatementBlock.h" + +StatementRepeat::StatementRepeat(shared_ptr initStatement, shared_ptr preConditionExpression, shared_ptr postConditionExpression, shared_ptr bodyBlockStatement): +Statement(StatementKind::REPEAT), initStatement(initStatement), preConditionExpression(preConditionExpression), postConditionExpression(postConditionExpression), bodyBlockStatement(bodyBlockStatement) { } + +shared_ptr StatementRepeat::getInitStatement() { + return initStatement; +} + +shared_ptr StatementRepeat::getPreConditionExpression() { + return preConditionExpression; +} + +shared_ptr StatementRepeat::getPostConditionExpression() { + return postConditionExpression; +} + +shared_ptr StatementRepeat::getBodyBlockStatement() { + return bodyBlockStatement; +} + +string StatementRepeat::toString(int indent) { + string value; + for (int ind=0; indtoString(0), ", "; + if (preConditionExpression != nullptr) + value += preConditionExpression->toString(0) + ", "; + if (postConditionExpression != nullptr) + value += postConditionExpression->toString(0); + value += "):\n"; + value += bodyBlockStatement->toString(indent+1); + return value; +} \ No newline at end of file diff --git a/src/Parser/Statement/StatementLoop.h b/src/Parser/Statement/StatementRepeat.h similarity index 68% rename from src/Parser/Statement/StatementLoop.h rename to src/Parser/Statement/StatementRepeat.h index 00cd47d..399a8c8 100644 --- a/src/Parser/Statement/StatementLoop.h +++ b/src/Parser/Statement/StatementRepeat.h @@ -3,7 +3,7 @@ class Expression; class StatementBlock; -class StatementLoop: public Statement { +class StatementRepeat: public Statement { private: shared_ptr initStatement; shared_ptr preConditionExpression; @@ -11,7 +11,7 @@ private: shared_ptr bodyBlockStatement; public: - StatementLoop(shared_ptr initStatement, shared_ptr preConditionExpression, shared_ptr postConditionExpression, shared_ptr bodyBlockStatement); + StatementRepeat(shared_ptr initStatement, shared_ptr preConditionExpression, shared_ptr postConditionExpression, shared_ptr bodyBlockStatement); shared_ptr getInitStatement(); shared_ptr getPreConditionExpression(); shared_ptr getPostConditionExpression(); diff --git a/test.brc b/test.brc index 9d31689..7ae6cd8 100644 --- a/test.brc +++ b/test.brc @@ -1,17 +1,7 @@ -@extern putchar fun: character sint32 -> sint32 - -stuff fun: num1 sint32, num2 sint32 - ? num1 > num2 - putchar(0x54) - - : - putchar(0x4e) - +//dummy sint32 <- 55 +stuff fun: num1 sint32, num2 sint32 -> sint32 + rep: + i <- i + 1 ; - putchar(0x0a) -; - -main fun -> sint32 - stuff(8, 108) ret 42 ; \ No newline at end of file