diff --git a/samples/test.brc b/samples/test.brc index 64e0a6e..21321d7 100644 --- a/samples/test.brc +++ b/samples/test.brc @@ -24,15 +24,20 @@ i u32 <- 0, rep text[i] != 0: add $1, $0 ;*/ -normAdd fun: num1 sint32, num2 sint32 -> sint32 +/*normAdd fun: num1 sint32, num2 sint32 -> sint32 ret num1 + num2 -; +;*/ rawAdd raw<"=r,r,r">: num1 sint32, num2 sint32 -> sint32 add $1, $2 mov $0, $1 ; +/*rawAdd raw: num1 sint32, num2 sint32 -> sint32 + add $1, $2 + mov $0, $1 +;*/ + /*printChar raw .global REGISTER .text @@ -44,10 +49,12 @@ rawAdd raw<"=r,r,r">: num1 sint32, num2 sint32 -> sint32 ;*/ main fun -> sint32 - //printChar() + /*a sint32 <- 4 + b sint32 <- 5 - res1 sint32 <- normAdd(4, 5) - res2 sint32 <- rawAdd(4, 5) + res sint32 <- rawAdd(4, 5)*/ + putchar('@') + putchar('\n') ret 0 ; \ No newline at end of file diff --git a/src/Logger.cpp b/src/Logger.cpp index b8f85e0..98dd04e 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -482,7 +482,7 @@ void Logger::print(shared_ptr error) { switch (error->getKind()) { case ErrorKind::LEXER_ERROR: { string lexme = error->getLexme() ? *(error->getLexme()) : ""; - message = format("Unexpected token \"{}\" at line: {}, column: {}", lexme, error->getLine() + 1, error->getColumn() + 1); + message = format("At line {}, column {}: Unexpected token \"{}\"", error->getLine() + 1, error->getColumn() + 1, lexme); break; } case ErrorKind::PARSER_ERROR: { @@ -492,13 +492,13 @@ void Logger::print(shared_ptr error) { if (expectedTokenKind) { message = format( - "Expected token {} but instead found {} at line: {}, column: {}", - toString(*expectedTokenKind), toString(token), token->getLine() + 1, token->getColumn() + 1 + "At line {}, column {}: Expected token {} but found {} instead", + token->getLine() + 1, token->getColumn() + 1, toString(*expectedTokenKind), toString(token) ); } else { message = format( - "Unexpected token \"{}\" found at line: {}, column: {}", - toString(token), token->getLine() + 1, token->getColumn() + 1 + "At line {}, column {}: Unexpected token \"{}\" found", + token->getLine() + 1, token->getColumn() + 1, toString(token) ); } if (errorMessage) @@ -507,7 +507,7 @@ void Logger::print(shared_ptr error) { } case ErrorKind::BUILDER_ERROR: { string errorMessage = error->getMessage() ? *(error->getMessage()) : ""; - message = format("Error at line {}, column {}: {}", error->getLine(), error->getColumn(), errorMessage); + message = format("At line {}, column {}: {}", error->getLine(), error->getColumn(), errorMessage); break; } } diff --git a/src/Parser/Parsee/Parsee.cpp b/src/Parser/Parsee/Parsee.cpp new file mode 100644 index 0000000..3ec80d9 --- /dev/null +++ b/src/Parser/Parsee/Parsee.cpp @@ -0,0 +1,44 @@ +#include "Parsee.h" + +Parsee Parsee::tokenParsee(TokenKind tokenKind, bool isRequired, bool shouldReturn) { + Parsee parsee; + parsee.kind = ParseeKind::TOKEN; + parsee.tokenKind = tokenKind; + parsee.isRequired = isRequired; + parsee.shouldReturn = shouldReturn; + return parsee; +} + +Parsee Parsee::valueTypeParsee(bool isRequired) { + Parsee parsee; + parsee.kind = ParseeKind::VALUE_TYPE; + parsee.isRequired = isRequired; + parsee.shouldReturn = true; + return parsee; +} + +Parsee Parsee::expressionParsee(bool isRequired) { + Parsee parsee; + parsee.kind = ParseeKind::EXPRESSION; + parsee.isRequired = isRequired; + parsee.shouldReturn = true; + return parsee; +} + +Parsee::Parsee() { } + +ParseeKind Parsee::getKind() { + return kind; +} + +TokenKind Parsee::getTokenKind() { + return tokenKind; +} + +bool Parsee::getIsRequired() { + return isRequired; +} + +bool Parsee::getShouldReturn() { + return shouldReturn; +} \ No newline at end of file diff --git a/src/Parser/Parsee/Parsee.h b/src/Parser/Parsee/Parsee.h new file mode 100644 index 0000000..6dfdf0e --- /dev/null +++ b/src/Parser/Parsee/Parsee.h @@ -0,0 +1,33 @@ +#ifndef PARSEE_H +#define PARSEE_H + +#include + +enum class TokenKind; + +enum class ParseeKind { + TOKEN, + VALUE_TYPE, + EXPRESSION +}; + +class Parsee { +private: + ParseeKind kind; + TokenKind tokenKind; + bool isRequired; + bool shouldReturn; + Parsee(); + +public: + static Parsee tokenParsee(TokenKind tokenKind, bool isRequired, bool shouldReturn); + static Parsee valueTypeParsee(bool isRequired); + static Parsee expressionParsee(bool isRequired); + + ParseeKind getKind(); + TokenKind getTokenKind(); + bool getIsRequired(); + bool getShouldReturn(); +}; + +#endif \ No newline at end of file diff --git a/src/Parser/Parsee/ParseeGroup.cpp b/src/Parser/Parsee/ParseeGroup.cpp new file mode 100644 index 0000000..ead5352 --- /dev/null +++ b/src/Parser/Parsee/ParseeGroup.cpp @@ -0,0 +1,20 @@ +#include "ParseeGroup.h" + +#include "Parsee.h" + +ParseeGroup::ParseeGroup(vector parsees, optional repeatedGroup): +parsees(parsees) { + if (repeatedGroup) { + this->repeatedGroup = *repeatedGroup; + } else { + this->repeatedGroup = {}; + } + } + +vector ParseeGroup::getParsees() { + return parsees; +} + +optional> ParseeGroup::getRepeatedGroup() { + return repeatedGroup; +} \ No newline at end of file diff --git a/src/Parser/Parsee/ParseeGroup.h b/src/Parser/Parsee/ParseeGroup.h new file mode 100644 index 0000000..4e4cc82 --- /dev/null +++ b/src/Parser/Parsee/ParseeGroup.h @@ -0,0 +1,22 @@ +#ifndef PARSEE_GROUP_H +#define PARSEE_GROUP_H + +#include +#include + +class Parsee; + +using namespace std; + +class ParseeGroup { +private: + vector parsees; + optional> repeatedGroup; + +public: + ParseeGroup(vector parsees, optional repeatedGroup); + vector getParsees(); + optional> getRepeatedGroup(); +}; + +#endif \ No newline at end of file diff --git a/src/Parser/Parsee/ParseeResult.cpp b/src/Parser/Parsee/ParseeResult.cpp new file mode 100644 index 0000000..5e09309 --- /dev/null +++ b/src/Parser/Parsee/ParseeResult.cpp @@ -0,0 +1,51 @@ +#include "ParseeResult.h" + +#include "Lexer/Token.h" +#include "Parser/ValueType.h" + +ParseeResult ParseeResult::tokenResult(shared_ptr token) { + ParseeResult parseeResult; + parseeResult.kind = ParseeResultKind::TOKEN; + parseeResult.token = token; + parseeResult.tokensCount = 1; + return parseeResult; +} + +ParseeResult ParseeResult::valueTypeResult(shared_ptr valueType, int tokensCount) { + ParseeResult parseeResult; + parseeResult.kind = ParseeResultKind::VALUE_TYPE; + parseeResult.valueType = valueType; + parseeResult.tokensCount = tokensCount; + return parseeResult; +} + +ParseeResult ParseeResult::expressionResult(shared_ptr expression, int tokensCount) { + ParseeResult result; + result.kind = ParseeResultKind::EXPRESSION; + result.expression = expression; + result.tokensCount = tokensCount; + return result; +} + +ParseeResult::ParseeResult() { } + + +ParseeResultKind ParseeResult::getKind() { + return kind; +} + +shared_ptr ParseeResult::getToken() { + return token; +} + +shared_ptr ParseeResult::getValueType() { + return valueType; +} + +shared_ptr ParseeResult::getExpression() { + return expression; +} + +int ParseeResult::getTokensCount() { + return tokensCount; +} \ No newline at end of file diff --git a/src/Parser/Parsee/ParseeResult.h b/src/Parser/Parsee/ParseeResult.h new file mode 100644 index 0000000..f59114e --- /dev/null +++ b/src/Parser/Parsee/ParseeResult.h @@ -0,0 +1,39 @@ +#ifndef PARSEE_RESULT_H +#define PARSEE_RESULT_H + +#include + +class Token; +class ValueType; +class Expression; + +using namespace std; + +enum class ParseeResultKind { + TOKEN, + VALUE_TYPE, + EXPRESSION +}; + +class ParseeResult { +private: + ParseeResultKind kind; + shared_ptr token; + shared_ptr valueType; + shared_ptr expression; + int tokensCount; + ParseeResult(); + +public: + static ParseeResult tokenResult(shared_ptr token); + static ParseeResult valueTypeResult(shared_ptr valueType, int tokensCount); + static ParseeResult expressionResult(shared_ptr expression, int tokensCount); + + ParseeResultKind getKind(); + shared_ptr getToken(); + shared_ptr getValueType(); + shared_ptr getExpression(); + int getTokensCount(); +}; + +#endif \ No newline at end of file diff --git a/src/Parser/Parsee/ParseeResultsGroup.cpp b/src/Parser/Parsee/ParseeResultsGroup.cpp new file mode 100644 index 0000000..ca3cc9c --- /dev/null +++ b/src/Parser/Parsee/ParseeResultsGroup.cpp @@ -0,0 +1,30 @@ +#include "ParseeResultsGroup.h" + +#include "ParseeResult.h" + +ParseeResultsGroup ParseeResultsGroup::success(vector results) { + ParseeResultsGroup resultsGroup; + resultsGroup.kind = ParseeResultsGroupKind::SUCCESS; + resultsGroup.results = results; + return resultsGroup; +} + +ParseeResultsGroup ParseeResultsGroup::noMatch() { + ParseeResultsGroup resultsGroup; + resultsGroup.kind = ParseeResultsGroupKind::NO_MATCH; + return resultsGroup; +} + +ParseeResultsGroup ParseeResultsGroup::failure() { + ParseeResultsGroup resultsGroup; + resultsGroup.kind = ParseeResultsGroupKind::FAILURE; + return resultsGroup; +} + +ParseeResultsGroupKind ParseeResultsGroup::getKind() { + return kind; +} + +vector ParseeResultsGroup::getResults() { + return results; +} \ No newline at end of file diff --git a/src/Parser/Parsee/ParseeResultsGroup.h b/src/Parser/Parsee/ParseeResultsGroup.h new file mode 100644 index 0000000..3ba23b4 --- /dev/null +++ b/src/Parser/Parsee/ParseeResultsGroup.h @@ -0,0 +1,30 @@ +#ifndef PARSEE_RESULTS_GROUP_H +#define PARSEE_RESULTS_GROUP_H + +#include + +class ParseeResult; + +using namespace std; + +enum class ParseeResultsGroupKind { + SUCCESS, + NO_MATCH, + FAILURE +}; + +class ParseeResultsGroup { +private: + ParseeResultsGroupKind kind; + vector results; + +public: + static ParseeResultsGroup success(vector results); + static ParseeResultsGroup noMatch(); + static ParseeResultsGroup failure(); + + ParseeResultsGroupKind getKind(); + vector getResults(); +}; + +#endif \ No newline at end of file diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 9852fa2..0db1a3d 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -25,6 +25,11 @@ #include "Parser/Statement/StatementBlock.h" #include "Parser/Statement/StatementRepeat.h" +#include "Parsee/Parsee.h" +#include "Parsee/ParseeGroup.h" +#include "Parsee/ParseeResult.h" +#include "Parsee/ParseeResultsGroup.h" + Parser::Parser(vector> tokens) : tokens(tokens) { } @@ -107,112 +112,207 @@ shared_ptr Parser::nextInBlockStatement() { } shared_ptr Parser::matchStatementMetaExternFunction() { - if (!tryMatchingTokenKinds({TokenKind::M_EXTERN, TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) - return nullptr; + ParseeResultsGroup resultsGroup; - string name; + string identifier; vector>> arguments; shared_ptr returnType = ValueType::NONE; - currentIndex++; // skip meta - shared_ptr identifierToken = tokens.at(currentIndex++); - currentIndex++; // skip fun + // identifier + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::M_EXTERN, true, false), + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::tokenParsee(TokenKind::FUNCTION, true, false) + }, + {} + ) + ); + + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + identifier = resultsGroup.getResults().at(0).getToken()->getLexme(); + break; + case ParseeResultsGroupKind::NO_MATCH: + case ParseeResultsGroupKind::FAILURE: + return nullptr; + } // arguments - if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { - do { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) { - markError({}, "Expected function argument"); - return nullptr; + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::COLON, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true) + }, + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::COMMA, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true) + }, + {} + ) + ) + ); + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + for (int i=0; i> arg; + arg.first = resultsGroup.getResults().at(i).getToken()->getLexme(); + arg.second = resultsGroup.getResults().at(i+1).getValueType(); + arguments.push_back(arg); } - shared_ptr identifierToken = tokens.at(currentIndex++); - //shared_ptr argumentTypeToken = tokens.at(currentIndex++); - shared_ptr argumentType = matchValueType(); - if (argumentType == nullptr) { - markError(TokenKind::TYPE, {}); - return nullptr; - } - - arguments.push_back(pair>(identifierToken->getLexme(), argumentType)); - } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); - } - - // Return type - if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - - //shared_ptr returnTypeToken = tokens.at(currentIndex); - returnType = matchValueType(); - if (returnType == nullptr) { - markError(TokenKind::TYPE, {}); + break; + case ParseeResultsGroupKind::NO_MATCH: + break; + case ParseeResultsGroupKind::FAILURE: return nullptr; - } } - return make_shared(identifierToken->getLexme(), arguments, returnType); + // return type + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::RIGHT_ARROW, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::valueTypeParsee(true) + }, + {} + ) + ); + + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + returnType = resultsGroup.getResults().at(0).getValueType(); + break; + case ParseeResultsGroupKind::NO_MATCH: + break; + case ParseeResultsGroupKind::FAILURE: + return nullptr; + } + + return make_shared(identifier, arguments, returnType); } shared_ptr Parser::matchStatementVariable() { - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) + ParseeResultsGroup resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true), + Parsee::tokenParsee(TokenKind::LEFT_ARROW, true, false), + Parsee::expressionParsee(true) + }, + {} + ) + ); + + if (resultsGroup.getKind() != ParseeResultsGroupKind::SUCCESS) return nullptr; - shared_ptr identifierToken = tokens.at(currentIndex++); - shared_ptr valueType = matchValueType(); + string identifier = resultsGroup.getResults().at(0).getToken()->getLexme(); + shared_ptr valueType = resultsGroup.getResults().at(1).getValueType(); + shared_ptr expression = resultsGroup.getResults().at(2).getExpression(); - // Expect left arrow - if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) { - markError(TokenKind::LEFT_ARROW, {}); - return nullptr; - } - - shared_ptr expression = nextExpression(); - if (expression == nullptr) - return nullptr; - - return make_shared(identifierToken->getLexme(), valueType, expression); + return make_shared(identifier, valueType, expression); } shared_ptr Parser::matchStatementFunction() { - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) - return nullptr; + bool hasError = false; + ParseeResultsGroup resultsGroup; string name; vector>> arguments; shared_ptr returnType = ValueType::NONE; shared_ptr statementBlock; - // name - name = tokens.at(currentIndex++)->getLexme(); - currentIndex++; // skip fun + // identifier + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::tokenParsee(TokenKind::FUNCTION, true, false) + }, + {} + ) + ); - // arguments - if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { - do { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) { - markError({}, "Expected function argument"); - return nullptr; - } - shared_ptr identifierToken = tokens.at(currentIndex++); - shared_ptr argumentType = matchValueType(); - if (argumentType == nullptr) { - markError(TokenKind::TYPE, {}); - return nullptr; - } - - arguments.push_back(pair>(identifierToken->getLexme(), argumentType)); - } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + name = resultsGroup.getResults().at(0).getToken()->getLexme(); + break; + case ParseeResultsGroupKind::NO_MATCH: + return nullptr; + case ParseeResultsGroupKind::FAILURE: + hasError = true; + break; } - // return type - if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line + // arguments + if (!hasError) { + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::COLON, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true) + }, + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::COMMA, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true) + }, + {} + ) + ) + ); + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + for (int i=0; i> arg; + arg.first = resultsGroup.getResults().at(i).getToken()->getLexme(); + arg.second = resultsGroup.getResults().at(i+1).getValueType(); + arguments.push_back(arg); + } + break; + case ParseeResultsGroupKind::NO_MATCH: + break; + case ParseeResultsGroupKind::FAILURE: + hasError = true; + break; + } + } - returnType = matchValueType(); - if (returnType == nullptr) { - markError(TokenKind::TYPE, {}); - return nullptr; + // return type + if (!hasError) { + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::RIGHT_ARROW, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::valueTypeParsee(true) + }, + {} + ) + ); + + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + returnType = resultsGroup.getResults().at(0).getValueType(); + break; + case ParseeResultsGroupKind::NO_MATCH: + break; + case ParseeResultsGroupKind::FAILURE: + hasError = true; + break; } } @@ -223,10 +323,11 @@ shared_ptr Parser::matchStatementFunction() { } // block - statementBlock = matchStatementBlock({TokenKind::SEMICOLON}); + statementBlock = matchStatementBlock({TokenKind::SEMICOLON, TokenKind::END}); if (statementBlock == nullptr) return nullptr; + // closing semicolon if(!tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true)) { markError(TokenKind::SEMICOLON, {}); return nullptr; @@ -236,8 +337,8 @@ shared_ptr Parser::matchStatementFunction() { } shared_ptr Parser::matchStatementRawFunction() { - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::RAW_FUNCTION}, true, false)) - return nullptr; + bool hasError = false; + ParseeResultsGroup resultsGroup; string name; string constraints; @@ -245,50 +346,118 @@ shared_ptr Parser::matchStatementRawFunction() { shared_ptr returnType = ValueType::NONE; string rawSource; - // name - name = tokens.at(currentIndex++)->getLexme(); - currentIndex++; // skip raw + // identifier + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::tokenParsee(TokenKind::RAW_FUNCTION, true, false) + }, + {} + ) + ); + + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + name = resultsGroup.getResults().at(0).getToken()->getLexme(); + break; + case ParseeResultsGroupKind::NO_MATCH: + return nullptr; + break; + case ParseeResultsGroupKind::FAILURE: + hasError = true; + break; + } // constraints + if (!hasError) { + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::LESS, true, false), + Parsee::tokenParsee(TokenKind::STRING, true, true), + Parsee::tokenParsee(TokenKind::GREATER, true, false) + }, + {} + ) + ); - if (tryMatchingTokenKinds({TokenKind::LESS}, true, true)) { - if (tokens.at(currentIndex)->isOfKind({TokenKind::STRING})) { - constraints = tokens.at(currentIndex++)->getLexme(); - // remove enclosing quotes - if (constraints.length() >= 2) - constraints = constraints.substr(1, constraints.length() - 2); + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + constraints = resultsGroup.getResults().at(0).getToken()->getLexme(); + // remove enclosing quotes + if (constraints.length() >= 2) + constraints = constraints.substr(1, constraints.length() - 2); + break; + case ParseeResultsGroupKind::NO_MATCH: + return nullptr; + break; + case ParseeResultsGroupKind::FAILURE: + hasError = true; + break; } - if (!tryMatchingTokenKinds({TokenKind::GREATER}, true, true)) - markError({TokenKind::GREATER}, {}); } // arguments - if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { - do { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) { - markError({}, "Expected function argument"); - return nullptr; - } - shared_ptr identifierToken = tokens.at(currentIndex++); - shared_ptr argumentType = matchValueType(); - if (argumentType == nullptr) { - markError(TokenKind::TYPE, {}); - return nullptr; - } - - arguments.push_back(pair>(identifierToken->getLexme(), argumentType)); - } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); + if (!hasError) { + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::COLON, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true) + }, + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::COMMA, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true) + }, + {} + ) + ) + ); + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + for (int i=0; i> arg; + arg.first = resultsGroup.getResults().at(i).getToken()->getLexme(); + arg.second = resultsGroup.getResults().at(i+1).getValueType(); + arguments.push_back(arg); + } + break; + case ParseeResultsGroupKind::NO_MATCH: + break; + case ParseeResultsGroupKind::FAILURE: + hasError = true; + break; + } } - // return type - if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line + // return type + if (!hasError) { + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::RIGHT_ARROW, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::valueTypeParsee(true) + }, + {} + ) + ); - returnType = matchValueType(); - if (returnType == nullptr) { - markError(TokenKind::TYPE, {}); - return nullptr; + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + returnType = resultsGroup.getResults().at(0).getValueType(); + break; + case ParseeResultsGroupKind::NO_MATCH: + break; + case ParseeResultsGroupKind::FAILURE: + hasError = true; + break; } } @@ -337,44 +506,90 @@ shared_ptr Parser::matchStatementBlock(vector terminalToke shared_ptr Parser::matchStatementAssignment() { int startIndex = currentIndex; + ParseeResultsGroup resultsGroup; - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER}, true, false)) - return nullptr; - shared_ptr identifierToken = tokens.at(currentIndex++); + string identifier; shared_ptr indexExpression; + shared_ptr expression; - if (tryMatchingTokenKinds({TokenKind::LEFT_SQUARE_BRACKET}, true, true)) { - indexExpression = nextExpression(); - if (indexExpression == nullptr) - return nullptr; + // identifier + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + }, + {} + ) + ); - if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { - markError(TokenKind::RIGHT_SQUARE_BRACKET, {}); - return nullptr; - } - } - - // assignment requires left arrow, otherwise abort - if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) { - currentIndex = startIndex; - return nullptr; - } - - shared_ptr expression = nextExpression(); - if (expression == nullptr) + if (resultsGroup.getKind() != ParseeResultsGroupKind::SUCCESS) return nullptr; - return make_shared(identifierToken->getLexme(), indexExpression, expression); + identifier = resultsGroup.getResults().at(0).getToken()->getLexme(); + + // index expression + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::LEFT_SQUARE_BRACKET, true, false), + Parsee::expressionParsee(true), + Parsee::tokenParsee(TokenKind::RIGHT_SQUARE_BRACKET, true, false), + }, + {} + ) + ); + + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + indexExpression = resultsGroup.getResults().at(0).getExpression(); + break; + case ParseeResultsGroupKind::NO_MATCH: + break; + case ParseeResultsGroupKind::FAILURE: + return nullptr; + } + + // expression + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::LEFT_ARROW, true, false), + Parsee::expressionParsee(true) + }, + {} + ) + ); + + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + expression = resultsGroup.getResults().at(0).getExpression(); + break; + case ParseeResultsGroupKind::NO_MATCH: + currentIndex = startIndex; + return nullptr; + case ParseeResultsGroupKind::FAILURE: + return nullptr; + } + + return make_shared(identifier, indexExpression, expression); } shared_ptr Parser::matchStatementReturn() { - if (!tryMatchingTokenKinds({TokenKind::RETURN}, true, true)) + ParseeResultsGroup resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::RETURN, true, false), + Parsee::expressionParsee(false) + }, + {} + ) + ); + + if (resultsGroup.getKind() != ParseeResultsGroupKind::SUCCESS) return nullptr; - shared_ptr expression = nextExpression(); - if (expression == nullptr) - return nullptr; - + shared_ptr expression = !resultsGroup.getResults().empty() ? resultsGroup.getResults().at(0).getExpression() : nullptr; + return make_shared(expression); } @@ -471,7 +686,6 @@ shared_ptr Parser::nextExpression() { if (expression != nullptr || errors.size() > errorsCount) return expression; - markError({}, {}); return nullptr; } @@ -707,6 +921,7 @@ shared_ptr Parser::matchExpressionBinary(shared_ptr left } if (right == nullptr) { + markError({}, "Expected expression"); return nullptr; } else { return make_shared(token, left, right); @@ -737,44 +952,130 @@ shared_ptr Parser::matchExpressionBlock(vector terminalTo return make_shared(statements); } -shared_ptr Parser::matchValueType() { - if (!tryMatchingTokenKinds({TokenKind::TYPE}, true, false)) - return nullptr; - shared_ptr typeToken = tokens.at(currentIndex++); - shared_ptr subType; - int valueArg = 0; +ParseeResultsGroup Parser::parseeResultsGroupForParseeGroup(ParseeGroup group) { + int errorsCount = errors.size(); + int startIndex = currentIndex; + vector results; + bool mustFulfill = false; - if (tryMatchingTokenKinds({TokenKind::LESS}, true, true)) { - if (!tryMatchingTokenKinds({TokenKind::TYPE}, true, false)) { - markError(TokenKind::TYPE, {}); - return nullptr; - } - subType = matchValueType(); - if (subType == nullptr) - return subType; - - if (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) { - if (!tryMatchingTokenKinds({TokenKind::INTEGER_DEC, TokenKind::INTEGER_HEX, TokenKind::INTEGER_BIN, TokenKind::INTEGER_CHAR}, false, false)) { - markError({}, "Expected integer literal"); - return nullptr; - } - shared_ptr expressionValue = matchExpressionLiteral(); - if (expressionValue == nullptr) { - markError({}, "Expected integer literal"); - return nullptr; - } - - valueArg = dynamic_pointer_cast(expressionValue)->getSint32Value(); + for (Parsee &parsee : group.getParsees()) { + optional result; + switch (parsee.getKind()) { + case ParseeKind::TOKEN: + result = tokenParseeResult(currentIndex, parsee.getTokenKind()); + break; + case ParseeKind::VALUE_TYPE: + result = valueTypeParseeResult(currentIndex); + break; + case ParseeKind::EXPRESSION: + result = expressionParseeResult(currentIndex); + break; } + // generated an error? + if (errors.size() > errorsCount) + return ParseeResultsGroup::failure(); - if (!tryMatchingTokenKinds({TokenKind::GREATER}, true, true)) { - markError(TokenKind::GREATER, {}); - return nullptr; + // if doesn't match on optional group + if (!result && parsee.getIsRequired() && !mustFulfill) { + currentIndex = startIndex; + //return vector(); + return ParseeResultsGroup::noMatch(); } + + // return matching token? + if (result && parsee.getShouldReturn()) + results.push_back(*result); + + // decide if we're decoding the expected sequence + if (!parsee.getIsRequired() && currentIndex > startIndex) + mustFulfill = true; + + // invalid sequence detected? + if (!result && parsee.getIsRequired() && mustFulfill) { + markError(parsee.getTokenKind(), {}); + //return {}; + return ParseeResultsGroup::failure(); + } + + // got to the next token if we got a match + if (result) + currentIndex += (*result).getTokensCount(); } - return ValueType::valueTypeForToken(typeToken, subType, valueArg); + if (group.getRepeatedGroup()) { + ParseeResultsGroup subResultsGroup; + do { + subResultsGroup = parseeResultsGroupForParseeGroup(*group.getRepeatedGroup()); + if (subResultsGroup.getKind() == ParseeResultsGroupKind::FAILURE) + return ParseeResultsGroup::failure(); + + for (ParseeResult &subResult : subResultsGroup.getResults()) + results.push_back(subResult); + } while (subResultsGroup.getKind() == ParseeResultsGroupKind::SUCCESS); + } + + return ParseeResultsGroup::success(results); +} + +optional Parser::tokenParseeResult(int index, TokenKind tokenKind) { + shared_ptr token = tokens.at(index); + if (token->isOfKind({tokenKind})) + return ParseeResult::tokenResult(token); + return {}; +} + +optional Parser::valueTypeParseeResult(int index) { + int startIndex = index; + + if (!tokens.at(index)->isOfKind({TokenKind::TYPE})) + return {}; + + shared_ptr typeToken = tokens.at(index++); + shared_ptr subType; + int typeArg = 0; + + if (tokens.at(index)->isOfKind({TokenKind::LESS})) { + index++; + optional subResult = valueTypeParseeResult(index); + if (!subResult) + return {}; + subType = (*subResult).getValueType(); + index += (*subResult).getTokensCount(); + + if (tokens.at(index)->isOfKind({TokenKind::COMMA})) { + index++; + + if (!tokens.at(index)->isOfKind({TokenKind::INTEGER_DEC, TokenKind::INTEGER_HEX, TokenKind::INTEGER_BIN, TokenKind::INTEGER_CHAR})) + return {}; + + int storedIndex = currentIndex; + currentIndex = index; + shared_ptr expressionValue = matchExpressionLiteral(); + typeArg = dynamic_pointer_cast(expressionValue)->getSint32Value(); + currentIndex = storedIndex; + index++; + } + + if (!tokens.at(index)->isOfKind({TokenKind::GREATER})) + return {}; + index++; + } + + shared_ptr valueType = ValueType::valueTypeForToken(typeToken, subType, typeArg); + return ParseeResult::valueTypeResult(valueType, index - startIndex); +} + +optional Parser::expressionParseeResult(int index) { + int startIndex = currentIndex; + int errorsCount = errors.size(); + shared_ptr expression = nextExpression(); + if (errors.size() > errorsCount) + return {}; + + int tokensCount = currentIndex - startIndex; + currentIndex = startIndex; + return ParseeResult::expressionResult(expression, tokensCount); } bool Parser::tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance) { diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index fdfe4a2..8e5a8d5 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -12,6 +12,10 @@ class ValueType; class Expression; class Statement; +class ParseeGroup; +class ParseeResult; +class ParseeResultsGroup; + using namespace std; class Parser { @@ -50,8 +54,10 @@ private: shared_ptr matchExpressionBinary(shared_ptr left); shared_ptr matchExpressionBlock(vector terminalTokenKinds); - shared_ptr matchValueType(); - + ParseeResultsGroup parseeResultsGroupForParseeGroup(ParseeGroup group); + optional tokenParseeResult(int index, TokenKind tokenKind); + optional valueTypeParseeResult(int index); + optional expressionParseeResult(int index); bool tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance); void markError(optional expectedTokenKind, optional message);