Merge pull request #12 from rafalgrodzinski/10-sanitize-tokens-before-parsing

10 sanitize tokens before parsing
This commit is contained in:
Rafał
2025-07-01 17:07:25 +09:00
committed by GitHub
14 changed files with 443 additions and 284 deletions

158
Syntax.md Normal file
View File

@@ -0,0 +1,158 @@
# Detailed Syntax
This documents specifies what is the allowed syntax for statements and expressions.
### Symbols used
`?` 0 or 1 instances
`*` 0 or more instance
`+` 1 or more instances
`<NL>` New line
`<ID>` Identifier
`<TYPE>` Type
### Overall structure
```
<Statement Meta Module>
|
+ <Statement Meta Extern Function>
+ <Statemnet Variable>
+ <Statement Function>
|
+ <Statement Block>
|
+ <Statemnet Variable>
+ <Statement Assignment>
+ <Statement Return>
+ <Statement Repeat>
|
+ <Statement Block>...
+ <Statement Expression>
|
+ <Expression>
+ <Expression If-Else>
|
+ <Expression Block>...
```
### Statement Meta Extern Function
`@extern <ID> fun (: <NL>? <ID> <TYPE> (, <NL>? <ID> <TYPE>)*)? (-> <NL>? <TYPE>)?`
```
@extern sum fun:
num1 sint32,
num2 sint32 ->
sint32
```
### Statement Variable
`<ID> <TYPE> <- <Expression>`
### Statement Function
`<ID> fun (: <NL>? <ID> <TYPE> (, <NL>? <ID> <TYPE>)*)? (-> <NL>? <TYPE>)? <NL> <Statement Block> ;`
```
stuff fun
;
stuff fun -> sint32
ret 42
;
stuff fun: num1 sint32,
num2 sint32 ->
sint32
ret num1 + num2
;
```
### Statement Assignment
`<ID> <- <Expression>`
```
num1 <- 5
```
### Statement Block
`(<Statement> <NL>)*`
### Statement Repeat
`rep [<StatementVariable> | <StatementAssignment>]? : <Statement Block>`
`rep [<StatementVariable> | <StatementAssignment>]? : <NL> <Statement Block> ;`
`rep [<StatementVariable> | <StatementAssignment>] (, <NL>? <Expression> (, <NL>? <Expression>)? )? : <StatementBlock>`
`rep [<StatementVariable> | <StatementAssignment>] (, <NL>? <Expression> (, <NL>? <Expression>)? )? : <NL> <StatementBlock> ;`
`rep (<Expression> (, <NL>? <Expression>)? )? : <StatementBlock>`
`rep (<Expression> (, <NL>? <Expression>)? )? : <NL> <StatementBlock> ;`
```
rep i sint32 <- 0, true, i < 10:
doStuff(i)
;
rep i sint32 <- 0,
true, i < 10:
doStuff(i)
;
rep i sint32 <- 0,
true,
i < 10:
doStuff(i)
;
rep: infiniteCall()
```
### Statement Return
`ret <Expression>?`
### Expression Variable
`<ID>`
### Expression If-Else:
`if <Expression> : <ExpressionBlock>`
```
if num1 > 10: putchar('T')
```
`if <Expression> : <NL> <ExpressionBlock> ;`
```
if num1 > 10:
num1 <- 500
putchar('S')
;
```
`if <Expression> : <ExpressionBlock> else <ExpressionBlock>`
```
if num1 > 10: putchar('T') else putchar('N')
```
`if <Expression> : <NL> <ExpressionBlock> else <NL> <ExpressionBlock> ;`
```
if num1 > 10:
putchar('T')
else
putchar('N')
;
```
`if <Expression> : <ExpressionBlock> else <NL> <ExpressionBlock> <NL> ;`
```
if num1 > 10: putchar('T') else
num1 <- 500
putchar('F')
;
```
### Expression Block
`<Statement> <NL>)* <Statement Expression>?`

View File

@@ -13,7 +13,7 @@
#include "Parser/Statement/StatementAssignment.h"
#include "Parser/Statement/StatementReturn.h"
#include "Parser/Statement/StatementExpression.h"
#include "Parser/Statement/StatementLoop.h"
#include "Parser/Statement/StatementRepeat.h"
#include "Parser/Statement/StatementMetaExternFunction.h"
#include "Parser/Statement/StatementBlock.h"
@@ -54,8 +54,8 @@ void ModuleBuilder::buildStatement(shared_ptr<Statement> statement) {
case StatementKind::RETURN:
buildReturn(dynamic_pointer_cast<StatementReturn>(statement));
break;
case StatementKind::LOOP:
buildLoop(dynamic_pointer_cast<StatementLoop>(statement));
case StatementKind::REPEAT:
buildLoop(dynamic_pointer_cast<StatementRepeat>(statement));
break;
case StatementKind::META_EXTERN_FUNCTION:
buildMetaExternFunction(dynamic_pointer_cast<StatementMetaExternFunction>(statement));
@@ -138,7 +138,7 @@ void ModuleBuilder::buildReturn(shared_ptr<StatementReturn> statement) {
}
}
void ModuleBuilder::buildLoop(shared_ptr<StatementLoop> statement) {
void ModuleBuilder::buildLoop(shared_ptr<StatementRepeat> statement) {
shared_ptr<Statement> initStatement = statement->getInitStatement();
shared_ptr<StatementBlock> bodyStatement= statement->getBodyBlockStatement();
shared_ptr<Expression> preExpression = statement->getPreConditionExpression();

View File

@@ -27,7 +27,7 @@ class StatementVariable;
class StatementAssignment;
class StatementReturn;
class StatementExpression;
class StatementLoop;
class StatementRepeat;
class StatementMetaExternFunction;
class StatementBlock;
@@ -57,7 +57,7 @@ private:
void buildAssignment(shared_ptr<StatementAssignment> statement);
void buildBlock(shared_ptr<StatementBlock> statement);
void buildReturn(shared_ptr<StatementReturn> statement);
void buildLoop(shared_ptr<StatementLoop> statement);
void buildLoop(shared_ptr<StatementRepeat> statement);
void buildMetaExternFunction(shared_ptr<StatementMetaExternFunction> statement);
void buildExpression(shared_ptr<StatementExpression> statement);

View File

@@ -4,8 +4,12 @@ Lexer::Lexer(string source): source(source) {
}
vector<shared_ptr<Token>> Lexer::getTokens() {
shared_ptr<Token> token = nullptr;
tokens.clear();
currentIndex = 0;
currentLine = 0;
currentColumn = 0;
vector<shared_ptr<Token>> tokens;
shared_ptr<Token> token;
do {
token = nextToken();
// Got a nullptr, shouldn't have happened
@@ -18,7 +22,11 @@ vector<shared_ptr<Token>> Lexer::getTokens() {
if (!token->isValid()) {
cerr << "Unexpected character '" << token->getLexme() << "' at " << token->getLine() << ":" << token->getColumn() << endl;
exit(1);
}
}
// Don't add new line as the first token
if (tokens.empty() && token->isOfKind({TokenKind::NEW_LINE}))
continue;
// Insert an additional new line just before end
if (token->getKind() == TokenKind::END && tokens.back()->getKind() != TokenKind::NEW_LINE)
@@ -123,14 +131,6 @@ shared_ptr<Token> Lexer::nextToken() {
token = match(TokenKind::SEMICOLON, ";", false);
if (token != nullptr)
return token;
token = match(TokenKind::QUESTION_QUESTION, "??", false);
if (token != nullptr)
return token;
token = match(TokenKind::QUESTION, "?", false);
if (token != nullptr)
return token;
token = match(TokenKind::LEFT_ARROW, "<-", false);
if (token != nullptr)
@@ -187,6 +187,14 @@ shared_ptr<Token> Lexer::nextToken() {
return token;
// keywords
token = match(TokenKind::IF, "if", true);
if (token != nullptr)
return token;
token = match(TokenKind::ELSE, "else", true);
if (token != nullptr)
return token;
token = match(TokenKind::FUNCTION, "fun", true);
if (token != nullptr)
return token;

View File

@@ -10,11 +10,9 @@ using namespace std;
class Lexer {
private:
string source;
int currentIndex = 0;
int currentLine = 0;
int currentColumn = 0;
vector<shared_ptr<Token>> tokens;
int currentIndex;
int currentLine;
int currentColumn;
shared_ptr<Token> nextToken();
shared_ptr<Token> match(TokenKind kind, string lexme, bool needsSeparator);

View File

@@ -112,10 +112,6 @@ string Token::toString() {
return ":";
case TokenKind::SEMICOLON:
return ";";
case TokenKind::QUESTION_QUESTION:
return "??";
case TokenKind::QUESTION:
return "?";
case TokenKind::LEFT_ARROW:
return "";
case TokenKind::RIGHT_ARROW:
@@ -136,6 +132,10 @@ string Token::toString() {
case TokenKind::TYPE:
return "TYPE(" + lexme + ")";
case TokenKind::IF:
return "IF";
case TokenKind::ELSE:
return "ELSE";
case TokenKind::FUNCTION:
return "FUNCTION";
case TokenKind::RETURN:

View File

@@ -26,14 +26,14 @@ enum class TokenKind {
COMMA,
COLON,
SEMICOLON,
QUESTION,
QUESTION_QUESTION,
LEFT_ARROW,
RIGHT_ARROW,
FUNCTION,
RETURN,
REPEAT,
IF,
ELSE,
BOOL,
INTEGER_DEC,

View File

@@ -16,7 +16,7 @@
#include "Parser/Statement/StatementExpression.h"
#include "Parser/Statement/StatementMetaExternFunction.h"
#include "Parser/Statement/StatementBlock.h"
#include "Parser/Statement/StatementLoop.h"
#include "Parser/Statement/StatementRepeat.h"
#include "Parser/Statement/StatementInvalid.h"
Parser::Parser(vector<shared_ptr<Token>> tokens): tokens(tokens) {
@@ -33,6 +33,12 @@ vector<shared_ptr<Statement>> Parser::getStatements() {
exit(1);
}
statements.push_back(statement);
// Expect new line after statement
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) {
cerr << "Expected new line" << endl;
exit(1);
}
}
return statements;
@@ -48,6 +54,20 @@ shared_ptr<Statement> Parser::nextStatement() {
if (statement != nullptr)
return statement;
statement = matchStatementVariable();
if (statement != nullptr)
return statement;
statement = matchStatementMetaExternFunction();
if (statement != nullptr)
return statement;
return matchStatementInvalid("Unexpected token");
}
shared_ptr<Statement> Parser::nextInBlockStatement() {
shared_ptr<Statement> statement;
statement = matchStatementVariable();
if (statement != nullptr)
return statement;
@@ -60,7 +80,7 @@ shared_ptr<Statement> Parser::nextStatement() {
if (statement != nullptr)
return statement;
statement = matchStatementLoop();
statement = matchStatementRepeat();
if (statement != nullptr)
return statement;
@@ -68,32 +88,29 @@ shared_ptr<Statement> Parser::nextStatement() {
if (statement != nullptr)
return statement;
statement = matchStatementMetaExternFunction();
if (statement != nullptr)
return statement;
return matchStatementInvalid("Unexpected token");
}
shared_ptr<Statement> Parser::matchStatementFunction() {
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false))
shared_ptr<Statement> Parser::matchStatementMetaExternFunction() {
if (!tryMatchingTokenKinds({TokenKind::M_EXTERN, TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false))
return nullptr;
shared_ptr<Token> identifierToken = tokens.at(currentIndex);
currentIndex++;
string name;
vector<pair<string, ValueType>> arguments;
ValueType returnType = ValueType::NONE;
currentIndex++; // skip meta
shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
currentIndex++; // skip fun
// Get arguments
vector<pair<string, ValueType>> arguments;
// arguments
if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) {
do {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false))
return matchStatementInvalid("Expected function argument");
shared_ptr<Token> identifierToken = tokens.at(currentIndex);
currentIndex++; // identifier
shared_ptr<Token> typeToken = tokens.at(currentIndex);
currentIndex++; // type
shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
shared_ptr<Token> typeToken = tokens.at(currentIndex++);
optional<ValueType> argumentType = valueTypeForToken(typeToken);
if (!argumentType)
return matchStatementInvalid("Invalid argument type");
@@ -102,12 +119,10 @@ shared_ptr<Statement> Parser::matchStatementFunction() {
} while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true));
}
// consume optional new line
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
// Return type
ValueType returnType = ValueType::NONE;
if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line
shared_ptr<Token> typeToken = tokens.at(currentIndex);
optional<ValueType> type = valueTypeForToken(typeToken);
if (!type)
@@ -115,30 +130,16 @@ shared_ptr<Statement> Parser::matchStatementFunction() {
returnType = *type;
currentIndex++; // type
// consume new line
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
return matchStatementInvalid("Expected new line after function declaration");
}
shared_ptr<Statement> statementBlock = matchStatementBlock({TokenKind::SEMICOLON}, true);
if (statementBlock == nullptr)
return matchStatementInvalid();
else if (!statementBlock->isValid())
return statementBlock;
if(!tryMatchingTokenKinds({TokenKind::NEW_LINE}, false, true))
return matchStatementInvalid("Expected a new line after a function declaration");
return make_shared<StatementFunction>(identifierToken->getLexme(), arguments, returnType, dynamic_pointer_cast<StatementBlock>(statementBlock));
return make_shared<StatementMetaExternFunction>(identifierToken->getLexme(), arguments, returnType);
}
shared_ptr<Statement> Parser::matchStatementVariable() {
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false))
return nullptr;
shared_ptr<Token> identifierToken = tokens.at(currentIndex);
currentIndex++; // identifier
shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
shared_ptr<Token> valueTypeToken = tokens.at(currentIndex);
ValueType valueType;
@@ -159,31 +160,99 @@ shared_ptr<Statement> Parser::matchStatementVariable() {
shared_ptr<Expression> expression = nextExpression();
if (expression == nullptr || !expression->isValid())
return matchStatementInvalid();
// Expect comma or new line
if (!tryMatchingTokenKinds({TokenKind::COMMA}, true, false) && !tryMatchingTokenKinds({TokenKind::NEW_LINE}, false, true))
return matchStatementInvalid("Expected a new line after variable declaration");
return matchStatementInvalid("Invalid expression");
return make_shared<StatementVariable>(identifierToken->getLexme(), valueType, expression);
}
shared_ptr<Statement> Parser::matchStatementFunction() {
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false))
return nullptr;
string name;
vector<pair<string, ValueType>> arguments;
ValueType returnType = ValueType::NONE;
shared_ptr<Statement> statementBlock;
// name
name = tokens.at(currentIndex++)->getLexme();
currentIndex++; // skip fun
// arguments
if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) {
do {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false))
return matchStatementInvalid("Expected function argument");
shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
shared_ptr<Token> typeToken = tokens.at(currentIndex++);
optional<ValueType> argumentType = valueTypeForToken(typeToken);
if (!argumentType)
return matchStatementInvalid("Invalid argument type");
arguments.push_back(pair<string, ValueType>(identifierToken->getLexme(), *argumentType));
} while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true));
}
// return type
if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line
shared_ptr<Token> typeToken = tokens.at(currentIndex);
optional<ValueType> type = valueTypeForToken(typeToken);
if (!type)
return matchStatementInvalid("Expected return type");
returnType = *type;
currentIndex++; // type
}
// consume new line
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
return matchStatementInvalid("Expected new line after function declaration");
// block
statementBlock = matchStatementBlock({TokenKind::SEMICOLON});
if (statementBlock == nullptr || !statementBlock->isValid())
return statementBlock ?: matchStatementInvalid();
if(!tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true))
return matchStatementInvalid("Expected a \";\" after a function declaration");
return make_shared<StatementFunction>(name, arguments, returnType, dynamic_pointer_cast<StatementBlock>(statementBlock));
}
shared_ptr<Statement> Parser::matchStatementBlock(vector<TokenKind> terminalTokenKinds) {
vector<shared_ptr<Statement>> statements;
while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) {
shared_ptr<Statement> statement = nextInBlockStatement();
if (statement == nullptr || !statement->isValid())
return statement ?: matchStatementInvalid("Expected statement");
statements.push_back(statement);
if (tryMatchingTokenKinds(terminalTokenKinds, false, false))
break;
// except new line
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
return matchStatementInvalid("Expected new line");
}
return make_shared<StatementBlock>(statements);
}
shared_ptr<Statement> Parser::matchStatementAssignment() {
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::LEFT_ARROW}, true, false))
return nullptr;
shared_ptr<Token> identifierToken = tokens.at(currentIndex);
currentIndex++; // identifier
shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
currentIndex++; // arrow
shared_ptr<Expression> expression = nextExpression();
if (expression == nullptr || !expression->isValid())
return matchStatementInvalid("Expected expression");
// Expect new line
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, false, true))
return matchStatementInvalid("Expected a new line after variable declaration");
return make_shared<StatementAssignment>(identifierToken->getLexme(), expression);
}
@@ -193,67 +262,73 @@ shared_ptr<Statement> Parser::matchStatementReturn() {
shared_ptr<Expression> expression = nextExpression();
if (expression != nullptr && !expression->isValid())
return matchStatementInvalid();
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE, TokenKind::SEMICOLON}, false, false))
return matchStatementInvalid();
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
return matchStatementInvalid("Expected expression");
return make_shared<StatementReturn>(expression);
}
shared_ptr<Statement> Parser::matchStatementLoop() {
shared_ptr<Statement> Parser::matchStatementRepeat() {
if (!tryMatchingTokenKinds({TokenKind::REPEAT}, true, true))
return nullptr;
shared_ptr<Statement> initStatement;
shared_ptr<Expression> preConditionExpression;
shared_ptr<Expression> postConditionExpression;
shared_ptr<Statement> bodyBlockStatement;
bool isMultiLine;
// initial
initStatement = matchStatementVariable();
initStatement = matchStatementVariable() ?: matchStatementAssignment();
if (initStatement != nullptr && !initStatement->isValid())
initStatement = nullptr;
if (tokens.at(currentIndex-1)->getKind() != TokenKind::NEW_LINE) {
if (!tryMatchingTokenKinds({TokenKind::COLON}, false, true)) {
// got initial, expect comma
if (initStatement != nullptr && !tryMatchingTokenKinds({TokenKind::COMMA}, true, true))
return matchStatementInvalid("Expected comma after initial statement");
// optional new line
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
// pre condition
preConditionExpression = nextExpression();
if (preConditionExpression != nullptr && !preConditionExpression->isValid())
return matchStatementInvalid("Expected pre-condition expression");
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) {
if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true)) {
// got pre-condition, expect comma
if (!tryMatchingTokenKinds({TokenKind::COMMA}, true, true))
return matchStatementInvalid("Expected comma after pre-condition statement");
// optional new line
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
// post condition
postConditionExpression = nextExpression();
if (postConditionExpression == nullptr || !postConditionExpression->isValid())
return matchStatementInvalid("Expected post-condition expression");
// epxect new line
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
return matchStatementInvalid("Expected new line");
// expect colon
if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true))
return matchStatementInvalid("Expected \":\"");
}
}
// body
shared_ptr<Statement> bodyBlockStatement = matchStatementBlock({TokenKind::SEMICOLON}, true);
if (bodyBlockStatement == nullptr)
return matchStatementInvalid("Expected block statement");
else if (!bodyBlockStatement->isValid())
return bodyBlockStatement;
// epxect new line
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
return matchStatementInvalid("Expected new line");
isMultiLine = tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
return make_shared<StatementLoop>(initStatement, preConditionExpression, postConditionExpression, dynamic_pointer_cast<StatementBlock>(bodyBlockStatement));
// body
if (isMultiLine)
bodyBlockStatement = matchStatementBlock({TokenKind::SEMICOLON});
else
bodyBlockStatement = matchStatementBlock({TokenKind::NEW_LINE});
if (bodyBlockStatement == nullptr || !bodyBlockStatement->isValid())
return bodyBlockStatement ?: matchStatementInvalid("Expected block statement");
tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true);
return make_shared<StatementRepeat>(initStatement, preConditionExpression, postConditionExpression, dynamic_pointer_cast<StatementBlock>(bodyBlockStatement));
}
shared_ptr<Statement> Parser::matchStatementExpression() {
@@ -270,76 +345,6 @@ shared_ptr<Statement> Parser::matchStatementExpression() {
return make_shared<StatementExpression>(expression);
}
shared_ptr<Statement> Parser::matchStatementMetaExternFunction() {
if (!tryMatchingTokenKinds({TokenKind::M_EXTERN, TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false))
return nullptr;
currentIndex++; // skip meta
shared_ptr<Token> identifierToken = tokens.at(currentIndex);
currentIndex++;
currentIndex++; // skip fun
// Get arguments
vector<pair<string, ValueType>> arguments;
if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) {
do {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false))
return matchStatementInvalid("Expected function argument");
shared_ptr<Token> identifierToken = tokens.at(currentIndex);
currentIndex++; // identifier
shared_ptr<Token> typeToken = tokens.at(currentIndex);
currentIndex++; // type
optional<ValueType> argumentType = valueTypeForToken(typeToken);
if (!argumentType)
return matchStatementInvalid("Invalid argument type");
arguments.push_back(pair<string, ValueType>(identifierToken->getLexme(), *argumentType));
} while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true));
}
// consume optional new line
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
// Return type
ValueType returnType = ValueType::NONE;
if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) {
shared_ptr<Token> typeToken = tokens.at(currentIndex);
optional<ValueType> type = valueTypeForToken(typeToken);
if (!type)
return matchStatementInvalid("Expected return type");
returnType = *type;
currentIndex++; // type
// consume new line
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
return matchStatementInvalid("Expected new line after function declaration");
}
return make_shared<StatementMetaExternFunction>(identifierToken->getLexme(), arguments, returnType);
}
shared_ptr<Statement> Parser::matchStatementBlock(vector<TokenKind> terminalTokenKinds, bool shouldConsumeTerminal) {
vector<shared_ptr<Statement>> statements;
bool hasNewLineTerminal = find(terminalTokenKinds.begin(), terminalTokenKinds.end(), TokenKind::NEW_LINE) != terminalTokenKinds.end();
while (!tryMatchingTokenKinds(terminalTokenKinds, false, shouldConsumeTerminal)) {
shared_ptr<Statement> statement = nextStatement();
if (statement == nullptr)
return matchStatementInvalid();
else if (!statement->isValid())
return statement;
else
statements.push_back(statement);
if (hasNewLineTerminal && tokens.at(currentIndex-1)->getKind() == TokenKind::NEW_LINE)
currentIndex--;
}
return make_shared<StatementBlock>(statements);
}
shared_ptr<StatementInvalid> Parser::matchStatementInvalid(string message) {
return make_shared<StatementInvalid>(tokens.at(currentIndex), message);
}
@@ -436,14 +441,12 @@ shared_ptr<Expression> Parser::matchExpressionGrouping() {
if (tryMatchingTokenKinds({TokenKind::LEFT_PAREN}, true, true)) {
shared_ptr<Expression> expression = matchTerm();
// has grouped expression failed?
if (expression == nullptr) {
return matchExpressionInvalid();
} else if(!expression->isValid()) {
return expression;
if (expression == nullptr || !expression->isValid()) {
return expression ?: matchExpressionInvalid("Expected expression");
} else if (tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true)) {
return make_shared<ExpressionGrouping>(expression);
} else {
return matchExpressionInvalid();
return matchExpressionInvalid("Unexpected token");
}
}
@@ -488,52 +491,56 @@ shared_ptr<Expression> Parser::matchExpressionCall() {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // optional new line
if (!tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true))
return matchExpressionInvalid();
return matchExpressionInvalid("Expected \")\"");
return make_shared<ExpressionCall>(identifierToken->getLexme(), argumentExpressions);
}
shared_ptr<Expression> Parser::matchExpressionIfElse() {
// Try maching '?'
shared_ptr<Token> token = tokens.at(currentIndex);
if (!tryMatchingTokenKinds({TokenKind::QUESTION}, true, true))
if (!tryMatchingTokenKinds({TokenKind::IF}, true, true))
return nullptr;
// Then get condition
shared_ptr<Expression> condition = nextExpression();
if (condition == nullptr)
return matchExpressionInvalid();
else if (!condition->isValid())
return condition;
// Consume optional ':'
tryMatchingTokenKinds({TokenKind::COLON}, true, true);
// Consume optional new line
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
// Match then block
shared_ptr<Expression> thenBlock = matchExpressionBlock({TokenKind::COLON, TokenKind::SEMICOLON}, false);
if (thenBlock == nullptr)
return matchExpressionInvalid();
else if (!thenBlock->isValid())
return thenBlock;
// Match else block. Then and else block are separated by ':'
shared_ptr<Expression> condition;
shared_ptr<Expression> thenBlock;
shared_ptr<Expression> elseBlock;
if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) {
bool isSingleLine = !tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
vector<TokenKind> terminalTokens = {TokenKind::SEMICOLON, TokenKind::COMMA, TokenKind::RIGHT_PAREN};
if (isSingleLine)
terminalTokens.push_back(TokenKind::NEW_LINE);
elseBlock = matchExpressionBlock(terminalTokens, false);
if (elseBlock == nullptr)
return matchExpressionInvalid();
else if (!elseBlock->isValid())
return elseBlock;
// condition expression
condition = nextExpression();
if (condition == nullptr || !condition->isValid())
return condition ?: matchExpressionInvalid("Expected condition expression");
if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true))
return matchExpressionInvalid("Expected \":\"");
// then
bool isMultiLine = false;
if (tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
isMultiLine = true;
// then block
if (isMultiLine)
thenBlock = matchExpressionBlock({TokenKind::ELSE, TokenKind::SEMICOLON});
else
thenBlock = matchExpressionBlock({TokenKind::ELSE, TokenKind::NEW_LINE});
if (thenBlock == nullptr || !thenBlock->isValid())
return thenBlock ?: matchExpressionInvalid("Expected then block");
// else
if (tryMatchingTokenKinds({TokenKind::ELSE}, true, true)) {
if (tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
isMultiLine = true;
// else block
if (isMultiLine)
elseBlock = matchExpressionBlock({TokenKind::SEMICOLON});
else
elseBlock = matchExpressionBlock({TokenKind::NEW_LINE});
if (elseBlock == nullptr || !elseBlock->isValid())
return elseBlock ?: matchExpressionInvalid("Expected else block");
}
tryMatchingTokenKinds({TokenKind::SEMICOLON}, true, true);
tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true);
return make_shared<ExpressionIfElse>(condition, dynamic_pointer_cast<ExpressionBlock>(thenBlock), dynamic_pointer_cast<ExpressionBlock>(elseBlock));
}
@@ -553,7 +560,7 @@ shared_ptr<Expression> Parser::matchExpressionBinary(shared_ptr<Expression> left
}
if (right == nullptr) {
return matchExpressionInvalid();
return matchExpressionInvalid("Expected right-side expression");
} else if (!right->isValid()) {
return right;
} else {
@@ -563,25 +570,20 @@ shared_ptr<Expression> Parser::matchExpressionBinary(shared_ptr<Expression> left
return nullptr;
}
shared_ptr<Expression> Parser::matchExpressionBlock(vector<TokenKind> terminalTokenKinds, bool shouldConsumeTerminal) {
shared_ptr<Expression> Parser::matchExpressionBlock(vector<TokenKind> terminalTokenKinds) {
vector<shared_ptr<Statement>> statements;
bool hasNewLineTerminal = find(terminalTokenKinds.begin(), terminalTokenKinds.end(), TokenKind::NEW_LINE) != terminalTokenKinds.end();
while (!tryMatchingTokenKinds(terminalTokenKinds, false, shouldConsumeTerminal)) {
shared_ptr<Statement> statement = nextStatement();
while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) {
shared_ptr<Statement> statement = nextInBlockStatement();
if (statement == nullptr || !statement->isValid())
return matchExpressionInvalid();
else
statements.push_back(statement);
if (hasNewLineTerminal && tokens.at(currentIndex-1)->getKind() == TokenKind::NEW_LINE)
currentIndex--;
return matchExpressionInvalid("Expected statement");
statements.push_back(statement);
}
return make_shared<ExpressionBlock>(statements);
}
shared_ptr<ExpressionInvalid> Parser::matchExpressionInvalid() {
shared_ptr<ExpressionInvalid> Parser::matchExpressionInvalid(string message) {
return make_shared<ExpressionInvalid>(tokens.at(currentIndex));
}

View File

@@ -19,14 +19,17 @@ private:
int currentIndex = 0;
shared_ptr<Statement> nextStatement();
shared_ptr<Statement> matchStatementFunction();
shared_ptr<Statement> nextInBlockStatement();
shared_ptr<Statement> matchStatementMetaExternFunction();
shared_ptr<Statement> matchStatementVariable();
shared_ptr<Statement> matchStatementFunction();
shared_ptr<Statement> matchStatementBlock(vector<TokenKind> terminalTokenKinds);
shared_ptr<Statement> matchStatementAssignment();
shared_ptr<Statement> matchStatementReturn();
shared_ptr<Statement> matchStatementLoop();
shared_ptr<Statement> matchStatementRepeat();
shared_ptr<Statement> matchStatementExpression();
shared_ptr<Statement> matchStatementMetaExternFunction();
shared_ptr<Statement> matchStatementBlock(vector<TokenKind> terminalTokenKinds, bool shouldConsumeTerminal);
shared_ptr<StatementInvalid> matchStatementInvalid(string message = "");
shared_ptr<Expression> nextExpression();
@@ -42,8 +45,8 @@ private:
shared_ptr<Expression> matchExpressionCall();
shared_ptr<Expression> matchExpressionIfElse();
shared_ptr<Expression> matchExpressionBinary(shared_ptr<Expression> left);
shared_ptr<Expression> matchExpressionBlock(vector<TokenKind> terminalTokenKinds, bool shouldConsumeTerminal);
shared_ptr<ExpressionInvalid> matchExpressionInvalid();
shared_ptr<Expression> matchExpressionBlock(vector<TokenKind> terminalTokenKinds);
shared_ptr<ExpressionInvalid> matchExpressionInvalid(string message);
bool tryMatchingTokenKinds(vector<TokenKind> kinds, bool shouldMatchAll, bool shouldAdvance);
optional<ValueType> valueTypeForToken(shared_ptr<Token> token);

View File

@@ -14,7 +14,7 @@ enum class StatementKind {
FUNCTION,
VARIABLE,
ASSIGNMENT,
LOOP,
REPEAT,
META_EXTERN_FUNCTION,
INVALID
};

View File

@@ -1,39 +0,0 @@
#include "StatementLoop.h"
#include "Parser/Expression/Expression.h"
#include "Parser/Statement/StatementBlock.h"
StatementLoop::StatementLoop(shared_ptr<Statement> initStatement, shared_ptr<Expression> preConditionExpression, shared_ptr<Expression> postConditionExpression, shared_ptr<StatementBlock> bodyBlockStatement):
Statement(StatementKind::LOOP), initStatement(initStatement), preConditionExpression(preConditionExpression), postConditionExpression(postConditionExpression), bodyBlockStatement(bodyBlockStatement) { }
shared_ptr<Statement> StatementLoop::getInitStatement() {
return initStatement;
}
shared_ptr<Expression> StatementLoop::getPreConditionExpression() {
return preConditionExpression;
}
shared_ptr<Expression> StatementLoop::getPostConditionExpression() {
return postConditionExpression;
}
shared_ptr<StatementBlock> StatementLoop::getBodyBlockStatement() {
return bodyBlockStatement;
}
string StatementLoop::toString(int indent) {
string value;
for (int ind=0; ind<indent; ind++)
value += " ";
value += "REP(";
if (initStatement != nullptr)
value += initStatement->toString(0), ", ";
if (preConditionExpression != nullptr)
value += preConditionExpression->toString(0) + ", ";
if (postConditionExpression != nullptr)
value += postConditionExpression->toString(0);
value += "):\n";
value += bodyBlockStatement->toString(indent+1);
return value;
}

View File

@@ -0,0 +1,39 @@
#include "StatementRepeat.h"
#include "Parser/Expression/Expression.h"
#include "Parser/Statement/StatementBlock.h"
StatementRepeat::StatementRepeat(shared_ptr<Statement> initStatement, shared_ptr<Expression> preConditionExpression, shared_ptr<Expression> postConditionExpression, shared_ptr<StatementBlock> bodyBlockStatement):
Statement(StatementKind::REPEAT), initStatement(initStatement), preConditionExpression(preConditionExpression), postConditionExpression(postConditionExpression), bodyBlockStatement(bodyBlockStatement) { }
shared_ptr<Statement> StatementRepeat::getInitStatement() {
return initStatement;
}
shared_ptr<Expression> StatementRepeat::getPreConditionExpression() {
return preConditionExpression;
}
shared_ptr<Expression> StatementRepeat::getPostConditionExpression() {
return postConditionExpression;
}
shared_ptr<StatementBlock> StatementRepeat::getBodyBlockStatement() {
return bodyBlockStatement;
}
string StatementRepeat::toString(int indent) {
string value;
for (int ind=0; ind<indent; ind++)
value += " ";
value += "REP(";
if (initStatement != nullptr)
value += initStatement->toString(0), ", ";
if (preConditionExpression != nullptr)
value += preConditionExpression->toString(0) + ", ";
if (postConditionExpression != nullptr)
value += postConditionExpression->toString(0);
value += "):\n";
value += bodyBlockStatement->toString(indent+1);
return value;
}

View File

@@ -3,7 +3,7 @@
class Expression;
class StatementBlock;
class StatementLoop: public Statement {
class StatementRepeat: public Statement {
private:
shared_ptr<Statement> initStatement;
shared_ptr<Expression> preConditionExpression;
@@ -11,7 +11,7 @@ private:
shared_ptr<StatementBlock> bodyBlockStatement;
public:
StatementLoop(shared_ptr<Statement> initStatement, shared_ptr<Expression> preConditionExpression, shared_ptr<Expression> postConditionExpression, shared_ptr<StatementBlock> bodyBlockStatement);
StatementRepeat(shared_ptr<Statement> initStatement, shared_ptr<Expression> preConditionExpression, shared_ptr<Expression> postConditionExpression, shared_ptr<StatementBlock> bodyBlockStatement);
shared_ptr<Statement> getInitStatement();
shared_ptr<Expression> getPreConditionExpression();
shared_ptr<Expression> getPostConditionExpression();

View File

@@ -1,17 +1,7 @@
@extern putchar fun: character sint32 -> sint32
stuff fun: num1 sint32, num2 sint32
? num1 > num2
putchar(0x54)
:
putchar(0x4e)
//dummy sint32 <- 55
stuff fun: num1 sint32, num2 sint32 -> sint32
rep:
i <- i + 1
;
putchar(0x0a)
;
main fun -> sint32
stuff(8, 108)
ret 42
;