diff --git a/Grammar.md b/Grammar.md new file mode 100644 index 0000000..e787985 --- /dev/null +++ b/Grammar.md @@ -0,0 +1,122 @@ +`?` 0 or 1 instances + +`*` 0 or more instance + +`+` 1 or more instances + +`` New line + +`` Identifier + +### Statement Function +` fun (: ? (, )*)? (-> ? )? ;` +``` +stuff fun +; + +stuff fun -> sint32 + ret 42 +; + +stuff fun: num1 sint32, +num2 sint32 -> +sint32 + ret num1 + num2 +; +``` + +### Statement Assignment +` <- ` +``` +num1 <- 5 + +``` + +### StatementBlock +( )* + + +### StatementExpression + + +StatementFunction: + fun (: (, )*)? (-> )? +; + +### StatementLoop +`loop [ | ] (, ? (, ? )?)? ;` +``` +loop i sint32 <- 0, true, i < 10 + doStuff(i) +; + +loop i sint32 <- 0, +true, i < 10 + doStuff(i) +; + +loop i sint32 <- 0, +true, +i < 10 + doStuff(i) +; +``` + +`loop ( (, ? )?)? ;` + + +StatementMetaExternFunction: +@extern fun (: (, )*)? (-> )? + + +### StatementReturn +`ret ` + +### Statemnet Variable +` <- ` + +### ExpressionVariable: +`` + +### Expression If Else: +`if : ` +``` +if num1 > 10: putchar('T') + +``` + +`if : ;` +``` +if num1 > 10: + num1 <- 500 + putchar('S') +; +``` + +`if : else ` +``` +if num1 > 10: putchar('T') else putchar('N') + +``` + +`if : else ;` +``` +if num1 > 10: + putchar('T') +else + putchar('N') +; + +``` + +`if : else ;` +``` +if num1 > 10: putchar('T') else + num1 <- 500 + putchar('F') +; + +``` + +### Expression Block +`( )* ( ?)? !` \ No newline at end of file diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index ddd9aa9..d200fe1 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -4,8 +4,12 @@ Lexer::Lexer(string source): source(source) { } vector> Lexer::getTokens() { - shared_ptr token = nullptr; - tokens.clear(); + currentIndex = 0; + currentLine = 0; + currentColumn = 0; + + vector> tokens; + shared_ptr token; do { token = nextToken(); // Got a nullptr, shouldn't have happened @@ -123,14 +127,6 @@ shared_ptr Lexer::nextToken() { token = match(TokenKind::SEMICOLON, ";", false); if (token != nullptr) return token; - - token = match(TokenKind::QUESTION_QUESTION, "??", false); - if (token != nullptr) - return token; - - token = match(TokenKind::QUESTION, "?", false); - if (token != nullptr) - return token; token = match(TokenKind::LEFT_ARROW, "<-", false); if (token != nullptr) @@ -187,6 +183,14 @@ shared_ptr Lexer::nextToken() { return token; // keywords + token = match(TokenKind::IF, "if", true); + if (token != nullptr) + return token; + + token = match(TokenKind::ELSE, "else", true); + if (token != nullptr) + return token; + token = match(TokenKind::FUNCTION, "fun", true); if (token != nullptr) return token; diff --git a/src/Lexer/Lexer.h b/src/Lexer/Lexer.h index f0fe2bb..c117b08 100644 --- a/src/Lexer/Lexer.h +++ b/src/Lexer/Lexer.h @@ -10,11 +10,9 @@ using namespace std; class Lexer { private: string source; - int currentIndex = 0; - int currentLine = 0; - int currentColumn = 0; - - vector> tokens; + int currentIndex; + int currentLine; + int currentColumn; shared_ptr nextToken(); shared_ptr match(TokenKind kind, string lexme, bool needsSeparator); diff --git a/src/Lexer/Token.cpp b/src/Lexer/Token.cpp index b912fb3..aa08a24 100644 --- a/src/Lexer/Token.cpp +++ b/src/Lexer/Token.cpp @@ -112,10 +112,6 @@ string Token::toString() { return ":"; case TokenKind::SEMICOLON: return ";"; - case TokenKind::QUESTION_QUESTION: - return "??"; - case TokenKind::QUESTION: - return "?"; case TokenKind::LEFT_ARROW: return "←"; case TokenKind::RIGHT_ARROW: @@ -136,6 +132,10 @@ string Token::toString() { case TokenKind::TYPE: return "TYPE(" + lexme + ")"; + case TokenKind::IF: + return "IF"; + case TokenKind::ELSE: + return "ELSE"; case TokenKind::FUNCTION: return "FUNCTION"; case TokenKind::RETURN: diff --git a/src/Lexer/Token.h b/src/Lexer/Token.h index dd2cb9d..1e3adbe 100644 --- a/src/Lexer/Token.h +++ b/src/Lexer/Token.h @@ -26,14 +26,14 @@ enum class TokenKind { COMMA, COLON, SEMICOLON, - QUESTION, - QUESTION_QUESTION, LEFT_ARROW, RIGHT_ARROW, FUNCTION, RETURN, REPEAT, + IF, + ELSE, BOOL, INTEGER_DEC, diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 57eb0e2..7ed697d 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -436,14 +436,12 @@ shared_ptr Parser::matchExpressionGrouping() { if (tryMatchingTokenKinds({TokenKind::LEFT_PAREN}, true, true)) { shared_ptr expression = matchTerm(); // has grouped expression failed? - if (expression == nullptr) { - return matchExpressionInvalid(); - } else if(!expression->isValid()) { - return expression; + if (expression == nullptr || !expression->isValid()) { + return expression ?: matchExpressionInvalid("Expected expression"); } else if (tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true)) { return make_shared(expression); } else { - return matchExpressionInvalid(); + return matchExpressionInvalid("Unexpected token"); } } @@ -488,52 +486,56 @@ shared_ptr Parser::matchExpressionCall() { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // optional new line if (!tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true)) - return matchExpressionInvalid(); + return matchExpressionInvalid("Expected \")\""); return make_shared(identifierToken->getLexme(), argumentExpressions); } shared_ptr Parser::matchExpressionIfElse() { - // Try maching '?' - shared_ptr token = tokens.at(currentIndex); - - if (!tryMatchingTokenKinds({TokenKind::QUESTION}, true, true)) + if (!tryMatchingTokenKinds({TokenKind::IF}, true, true)) return nullptr; - // Then get condition - shared_ptr condition = nextExpression(); - if (condition == nullptr) - return matchExpressionInvalid(); - else if (!condition->isValid()) - return condition; - - // Consume optional ':' - tryMatchingTokenKinds({TokenKind::COLON}, true, true); - // Consume optional new line - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); - - // Match then block - shared_ptr thenBlock = matchExpressionBlock({TokenKind::COLON, TokenKind::SEMICOLON}, false); - if (thenBlock == nullptr) - return matchExpressionInvalid(); - else if (!thenBlock->isValid()) - return thenBlock; - - // Match else block. Then and else block are separated by ':' + shared_ptr condition; + shared_ptr thenBlock; shared_ptr elseBlock; - if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { - bool isSingleLine = !tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); - vector terminalTokens = {TokenKind::SEMICOLON, TokenKind::COMMA, TokenKind::RIGHT_PAREN}; - if (isSingleLine) - terminalTokens.push_back(TokenKind::NEW_LINE); - elseBlock = matchExpressionBlock(terminalTokens, false); - if (elseBlock == nullptr) - return matchExpressionInvalid(); - else if (!elseBlock->isValid()) - return elseBlock; + // condition expression + condition = nextExpression(); + if (condition == nullptr || !condition->isValid()) + return condition ?: matchExpressionInvalid("Expected condition expression"); + + if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true)) + return matchExpressionInvalid("Expected \":\""); + + // then + bool isMultiLine = false; + + if (tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) + isMultiLine = true; + + // then block + if (isMultiLine) + thenBlock = matchExpressionBlock({TokenKind::ELSE, TokenKind::SEMICOLON}); + else + thenBlock = matchExpressionBlock({TokenKind::ELSE, TokenKind::NEW_LINE}); + if (thenBlock == nullptr || !thenBlock->isValid()) + return thenBlock ?: matchExpressionInvalid("Expected then block"); + + // else + if (tryMatchingTokenKinds({TokenKind::ELSE}, true, true)) { + if (tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) + isMultiLine = true; + + // else block + if (isMultiLine) + elseBlock = matchExpressionBlock({TokenKind::SEMICOLON}); + else + elseBlock = matchExpressionBlock({TokenKind::NEW_LINE}); + + if (elseBlock == nullptr || !elseBlock->isValid()) + return elseBlock ?: matchExpressionInvalid("Expected else block"); } - tryMatchingTokenKinds({TokenKind::SEMICOLON}, true, true); + tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true); return make_shared(condition, dynamic_pointer_cast(thenBlock), dynamic_pointer_cast(elseBlock)); } @@ -553,7 +555,7 @@ shared_ptr Parser::matchExpressionBinary(shared_ptr left } if (right == nullptr) { - return matchExpressionInvalid(); + return matchExpressionInvalid("Expected right-side expression"); } else if (!right->isValid()) { return right; } else { @@ -563,25 +565,21 @@ shared_ptr Parser::matchExpressionBinary(shared_ptr left return nullptr; } -shared_ptr Parser::matchExpressionBlock(vector terminalTokenKinds, bool shouldConsumeTerminal) { +shared_ptr Parser::matchExpressionBlock(vector terminalTokenKinds) { vector> statements; - bool hasNewLineTerminal = find(terminalTokenKinds.begin(), terminalTokenKinds.end(), TokenKind::NEW_LINE) != terminalTokenKinds.end(); - while (!tryMatchingTokenKinds(terminalTokenKinds, false, shouldConsumeTerminal)) { + while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) { shared_ptr statement = nextStatement(); if (statement == nullptr || !statement->isValid()) - return matchExpressionInvalid(); + return matchExpressionInvalid("Expected statement"); else statements.push_back(statement); - - if (hasNewLineTerminal && tokens.at(currentIndex-1)->getKind() == TokenKind::NEW_LINE) - currentIndex--; } return make_shared(statements); } -shared_ptr Parser::matchExpressionInvalid() { +shared_ptr Parser::matchExpressionInvalid(string message) { return make_shared(tokens.at(currentIndex)); } diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index 15efaac..7c3fdd6 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -42,8 +42,8 @@ private: shared_ptr matchExpressionCall(); shared_ptr matchExpressionIfElse(); shared_ptr matchExpressionBinary(shared_ptr left); - shared_ptr matchExpressionBlock(vector terminalTokenKinds, bool shouldConsumeTerminal); - shared_ptr matchExpressionInvalid(); + shared_ptr matchExpressionBlock(vector terminalTokenKinds); + shared_ptr matchExpressionInvalid(string message); bool tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance); optional valueTypeForToken(shared_ptr token); diff --git a/src/main.cpp b/src/main.cpp index df6119c..4fb8794 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -74,14 +74,14 @@ int main(int argc, char **argv) { cout << endl << endl; } - ModuleBuilder moduleBuilder(moduleName, inputFilePath, statements); + /*ModuleBuilder moduleBuilder(moduleName, inputFilePath, statements); shared_ptr module = moduleBuilder.getModule(); if (isVerbose) { module->print(llvm::outs(), nullptr); } CodeGenerator codeGenerator(module); - codeGenerator.generateObjectFile(outputKind); + codeGenerator.generateObjectFile(outputKind);*/ return 0; } \ No newline at end of file diff --git a/test.brc b/test.brc index 9d31689..c6ce4fc 100644 --- a/test.brc +++ b/test.brc @@ -1,17 +1,7 @@ -@extern putchar fun: character sint32 -> sint32 - -stuff fun: num1 sint32, num2 sint32 - ? num1 > num2 - putchar(0x54) - - : - putchar(0x4e) - +stuff fun: num1 sint32, num2 sint32 -> sint32 + if num1 > num2: + ret 32 * num1 + else + ret 45 * num2 ; - putchar(0x0a) -; - -main fun -> sint32 - stuff(8, 108) - ret 42 ; \ No newline at end of file