#include "Parser.h" #include "Parser/Expression/ExpressionGrouping.h" #include "Parser/Expression/ExpressionLiteral.h" #include "Parser/Expression/ExpressionVariable.h" #include "Parser/Expression/ExpressionCall.h" #include "Parser/Expression/ExpressionIfElse.h" #include "Parser/Expression/ExpressionBinary.h" #include "Parser/Expression/ExpressionBlock.h" #include "Parser/Expression/ExpressionInvalid.h" #include "Parser/Statement/StatementFunction.h" #include "Parser/Statement/StatementVariable.h" #include "Parser/Statement/StatementReturn.h" #include "Parser/Statement/StatementExpression.h" #include "Parser/Statement/StatementMetaExternFunction.h" #include "Parser/Statement/StatementBlock.h" #include "Parser/Statement/StatementInvalid.h" Parser::Parser(vector> tokens): tokens(tokens) { } vector> Parser::getStatements() { vector> statements; while (!tryMatchingTokenKinds({TokenKind::END}, true, false)) { shared_ptr statement = nextStatement(); // Abort parsing if we got an error if (!statement->isValid()) { cerr << statement->toString(0); exit(1); } statements.push_back(statement); } return statements; } // // Statement // shared_ptr Parser::nextStatement() { shared_ptr statement; statement = matchStatementFunction(); if (statement != nullptr) return statement; statement = matchStatementVariable(); if (statement != nullptr) return statement; statement = matchStatementReturn(); if (statement != nullptr) return statement; statement = matchStatementExpression(); if (statement != nullptr) return statement; statement = matchStatementMetaExternFunction(); if (statement != nullptr) return statement; return matchStatementInvalid("Unexpected token"); } shared_ptr Parser::matchStatementFunction() { if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) return nullptr; shared_ptr identifierToken = tokens.at(currentIndex); currentIndex++; currentIndex++; // skip fun // Get arguments vector> arguments; if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { do { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) return matchStatementInvalid("Expected function argument"); shared_ptr identifierToken = tokens.at(currentIndex); currentIndex++; // identifier shared_ptr typeToken = tokens.at(currentIndex); currentIndex++; // type optional argumentType = valueTypeForToken(typeToken); if (!argumentType) return matchStatementInvalid("Invalid argument type"); arguments.push_back(pair(identifierToken->getLexme(), *argumentType)); } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); } // consume optional new line tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // Return type ValueType returnType = ValueType::NONE; if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { shared_ptr typeToken = tokens.at(currentIndex); optional type = valueTypeForToken(typeToken); if (!type) return matchStatementInvalid("Expected return type"); returnType = *type; currentIndex++; // type // consume new line if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) return matchStatementInvalid("Expected new line after function declaration"); } shared_ptr statementBlock = matchStatementBlock({TokenKind::SEMICOLON}, true); if (statementBlock == nullptr) return matchStatementInvalid(); else if (!statementBlock->isValid()) return statementBlock; if(!tryMatchingTokenKinds({TokenKind::NEW_LINE}, false, true)) return matchStatementInvalid("Expected a new line after a function declaration"); return make_shared(identifierToken->getLexme(), arguments, returnType, dynamic_pointer_cast(statementBlock)); } shared_ptr Parser::matchStatementVariable() { if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) return nullptr; shared_ptr identifierToken = tokens.at(currentIndex); currentIndex++; // identifier shared_ptr valueTypeToken = tokens.at(currentIndex); ValueType valueType; if (valueTypeToken->getLexme().compare("bool") == 0) valueType = ValueType::BOOL; else if (valueTypeToken->getLexme().compare("sint32") == 0) valueType = ValueType::SINT32; else if (valueTypeToken->getLexme().compare("real32") == 0) valueType = ValueType::REAL32; else return matchStatementInvalid("Invalid type"); currentIndex++; // type // Expect left arrow if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) return matchStatementInvalid("Expected left arrow"); shared_ptr expression = nextExpression(); if (expression == nullptr || !expression->isValid()) return matchStatementInvalid(); // Expect new line if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, false, true)) return matchStatementInvalid("Expected a new line after variable declaration"); return make_shared(identifierToken->getLexme(), valueType, expression); } shared_ptr Parser::matchStatementReturn() { if (!tryMatchingTokenKinds({TokenKind::RETURN}, true, true)) return nullptr; shared_ptr expression = nextExpression(); if (expression != nullptr && !expression->isValid()) return matchStatementInvalid(); if (!tryMatchingTokenKinds({TokenKind::NEW_LINE, TokenKind::SEMICOLON}, false, false)) return matchStatementInvalid(); tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); return make_shared(expression); } shared_ptr Parser::matchStatementExpression() { shared_ptr expression = nextExpression(); if (expression == nullptr) return nullptr; else if (!expression->isValid()) return make_shared(tokens.at(currentIndex), expression->toString(0)); // Consume new line tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); return make_shared(expression); } shared_ptr Parser::matchStatementMetaExternFunction() { if (!tryMatchingTokenKinds({TokenKind::M_EXTERN, TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) return nullptr; currentIndex++; // skip meta shared_ptr identifierToken = tokens.at(currentIndex); currentIndex++; currentIndex++; // skip fun // Get arguments vector> arguments; if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { do { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) return matchStatementInvalid("Expected function argument"); shared_ptr identifierToken = tokens.at(currentIndex); currentIndex++; // identifier shared_ptr typeToken = tokens.at(currentIndex); currentIndex++; // type optional argumentType = valueTypeForToken(typeToken); if (!argumentType) return matchStatementInvalid("Invalid argument type"); arguments.push_back(pair(identifierToken->getLexme(), *argumentType)); } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); } // consume optional new line tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // Return type ValueType returnType = ValueType::NONE; if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { shared_ptr typeToken = tokens.at(currentIndex); optional type = valueTypeForToken(typeToken); if (!type) return matchStatementInvalid("Expected return type"); returnType = *type; currentIndex++; // type // consume new line if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) return matchStatementInvalid("Expected new line after function declaration"); } return make_shared(identifierToken->getLexme(), arguments, returnType); } shared_ptr Parser::matchStatementBlock(vector terminalTokenKinds, bool shouldConsumeTerminal) { vector> statements; bool hasNewLineTerminal = find(terminalTokenKinds.begin(), terminalTokenKinds.end(), TokenKind::NEW_LINE) != terminalTokenKinds.end(); while (!tryMatchingTokenKinds(terminalTokenKinds, false, shouldConsumeTerminal)) { shared_ptr statement = nextStatement(); if (statement == nullptr) return matchStatementInvalid(); else if (!statement->isValid()) return statement; else statements.push_back(statement); if (hasNewLineTerminal && tokens.at(currentIndex-1)->getKind() == TokenKind::NEW_LINE) currentIndex--; } return make_shared(statements); } shared_ptr Parser::matchStatementInvalid(string message) { return make_shared(tokens.at(currentIndex), message); } // // Expression // shared_ptr Parser::nextExpression() { shared_ptr expression; expression = matchEquality(); if (expression != nullptr) return expression; expression = matchExpressionIfElse(); if (expression != nullptr) return expression; expression = matchExpressionVariable(); if (expression != nullptr) return expression; return nullptr; } shared_ptr Parser::matchEquality() { shared_ptr expression = matchComparison(); if (expression == nullptr || !expression->isValid()) return expression; while (tryMatchingTokenKinds({Token::tokensEquality}, false, false)) expression = matchExpressionBinary(expression); return expression; } shared_ptr Parser::matchComparison() { shared_ptr expression = matchTerm(); if (expression == nullptr || !expression->isValid()) return expression; while (tryMatchingTokenKinds({Token::tokensComparison}, false, false)) expression = matchExpressionBinary(expression); return expression; } shared_ptr Parser::matchTerm() { shared_ptr expression = matchFactor(); if (expression == nullptr || !expression->isValid()) return expression; while (tryMatchingTokenKinds({Token::tokensTerm}, false, false)) expression = matchExpressionBinary(expression); return expression; } shared_ptr Parser::matchFactor() { shared_ptr expression = matchPrimary(); if (expression == nullptr || !expression->isValid()) return expression; while (tokens.at(currentIndex)->isOfKind(Token::tokensFactor)) expression = matchExpressionBinary(expression); return expression; } shared_ptr Parser::matchPrimary() { shared_ptr expression; expression = matchExpressionGrouping(); if (expression != nullptr) return expression; expression = matchExpressionLiteral(); if (expression != nullptr) return expression; expression = matchExpressionVariable(); if (expression != nullptr) return expression; expression = matchExpressionCall(); if (expression != nullptr) return expression; return nullptr; } shared_ptr Parser::matchExpressionGrouping() { shared_ptr token = tokens.at(currentIndex); if (tryMatchingTokenKinds({TokenKind::LEFT_PAREN}, true, true)) { shared_ptr expression = matchTerm(); // has grouped expression failed? if (expression == nullptr) { return matchExpressionInvalid(); } else if(!expression->isValid()) { return expression; } else if (tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true)) { return make_shared(expression); } else { return matchExpressionInvalid(); } } return nullptr; } shared_ptr Parser::matchExpressionLiteral() { shared_ptr token = tokens.at(currentIndex); if (tryMatchingTokenKinds(Token::tokensLiteral, false, true)) return make_shared(token); return nullptr; } shared_ptr Parser::matchExpressionVariable() { shared_ptr token = tokens.at(currentIndex); if (tryMatchingTokenKinds({TokenKind::IDENTIFIER}, true, true)) return make_shared(token->getLexme()); return nullptr; } shared_ptr Parser::matchExpressionCall() { if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::LEFT_PAREN}, true, false)) return nullptr; shared_ptr identifierToken = tokens.at(currentIndex); currentIndex++; // identifier currentIndex++; // left parenthesis vector> argumentExpressions; do { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // optional new line shared_ptr argumentExpression = nextExpression(); if (argumentExpression == nullptr || !argumentExpression->isValid()) return argumentExpression; argumentExpressions.push_back(argumentExpression); } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // optional new line if (!tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true)) return matchExpressionInvalid(); return make_shared(identifierToken->getLexme(), argumentExpressions); } shared_ptr Parser::matchExpressionIfElse() { // Try maching '?' shared_ptr token = tokens.at(currentIndex); if (!tryMatchingTokenKinds({TokenKind::QUESTION}, true, true)) return nullptr; // Then get condition shared_ptr condition = nextExpression(); if (condition == nullptr) return matchExpressionInvalid(); else if (!condition->isValid()) return condition; // Consume optional ':' tryMatchingTokenKinds({TokenKind::COLON}, true, true); // Consume optional new line tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // Match then block shared_ptr thenBlock = matchExpressionBlock({TokenKind::COLON, TokenKind::SEMICOLON}, false); if (thenBlock == nullptr) return matchExpressionInvalid(); else if (!thenBlock->isValid()) return thenBlock; // Match else block. Then and else block are separated by ':' shared_ptr elseBlock; if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { bool isSingleLine = !tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); vector terminalTokens = {TokenKind::SEMICOLON, TokenKind::COMMA, TokenKind::RIGHT_PAREN}; if (isSingleLine) terminalTokens.push_back(TokenKind::NEW_LINE); elseBlock = matchExpressionBlock(terminalTokens, false); if (elseBlock == nullptr) return matchExpressionInvalid(); else if (!elseBlock->isValid()) return elseBlock; } tryMatchingTokenKinds({TokenKind::SEMICOLON}, true, true); return make_shared(condition, dynamic_pointer_cast(thenBlock), dynamic_pointer_cast(elseBlock)); } shared_ptr Parser::matchExpressionBinary(shared_ptr left) { shared_ptr token = tokens.at(currentIndex); shared_ptr right; // What level of binary expression are we having? if (tryMatchingTokenKinds(Token::tokensEquality, false, true)) { right = matchComparison(); } else if (tryMatchingTokenKinds(Token::tokensComparison, false, true)) { right = matchTerm(); } else if (tryMatchingTokenKinds(Token::tokensTerm, false, true)) { right = matchFactor(); } else if (tryMatchingTokenKinds(Token::tokensFactor, false, true)) { right = matchPrimary(); } if (right == nullptr) { return matchExpressionInvalid(); } else if (!right->isValid()) { return right; } else { return make_shared(token, left, right); } return nullptr; } shared_ptr Parser::matchExpressionBlock(vector terminalTokenKinds, bool shouldConsumeTerminal) { vector> statements; bool hasNewLineTerminal = find(terminalTokenKinds.begin(), terminalTokenKinds.end(), TokenKind::NEW_LINE) != terminalTokenKinds.end(); while (!tryMatchingTokenKinds(terminalTokenKinds, false, shouldConsumeTerminal)) { shared_ptr statement = nextStatement(); if (statement == nullptr || !statement->isValid()) return matchExpressionInvalid(); else statements.push_back(statement); if (hasNewLineTerminal && tokens.at(currentIndex-1)->getKind() == TokenKind::NEW_LINE) currentIndex--; } return make_shared(statements); } shared_ptr Parser::matchExpressionInvalid() { return make_shared(tokens.at(currentIndex)); } bool Parser::tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance) { int requiredCount = shouldMatchAll ? kinds.size() : 1; if (currentIndex + requiredCount > tokens.size()) return false; if (shouldMatchAll) { for (int i=0; igetKind()) return false; } if (shouldAdvance) currentIndex += kinds.size(); return true; } else { for (int i=0; igetKind()) { if (shouldAdvance) currentIndex++; return true; } } return false; } } optional Parser::valueTypeForToken(shared_ptr token) { if (token->getKind() != TokenKind::TYPE) return {}; if (token->getLexme().compare("bool") == 0) return ValueType::BOOL; else if (token->getLexme().compare("sint32") == 0) return ValueType::SINT32; else if (token->getLexme().compare("real32") == 0) return ValueType::REAL32; return {}; }