Parser updates

This commit is contained in:
Rafał Grodziński
2025-07-01 10:17:57 +09:00
parent cac252a0de
commit 2e5e19b30d
3 changed files with 97 additions and 78 deletions

View File

@@ -1,15 +1,30 @@
# Detailed Syntax
This documents specifies what is the allowed syntax for statements and expressions.
### Symbols used
`?` 0 or 1 instances
`*` 0 or more instance
`+` 1 or more instances
`<TER>` Terminal token, usually new line <NL>, but it can also sometimes be `,`, `else`, or `;`
`<NL>` New line
`<IDENT>` Identifier
`<ID>` Identifier
`<EXPR_BLOCK>` Expression block
`<STMT_BLOCK>` Statements block
`<EXPR>` Expression
`<STMT>` Statement
### Statement Function
`<IDENT> fun (: <NL>? <IDENT> <TYPE> (, <NL?> <IDENT> <TYPE>)*)? (-> <NL>? <TYPE>)? <NL> ;`
`<ID> fun (: <NL>? <ID> <TYPE> (, <NL>? <ID> <TYPE>)*)? (-> <NL>? <TYPE>)? <NL> <STMT_BLOCK> ; <NL>`
```
stuff fun
;
@@ -25,24 +40,37 @@ sint32
;
```
### Statement Meta Extern Function:
`@extern <ID> fun (: <NL>? <ID> <TYPE> (, <NL>? <ID> <TYPE>)*)? (-> <NL>? <TYPE>)? <NL>`
```
@extern sum fun:
num1 sint32,
num2 sint32 ->
sint32
```
### Statemnet Variable
`<ID> <TYPE> <- <Expression> <TER>`
```
num sint32 <- 42
```
### Statement Assignment
`<IDENT> <- <Expression> <NL>`
`<ID> <- <EXPR> <TER>`
```
num1 <- 5
```
### StatementBlock
### Statement Block
(<Statement> <NL>)*
### StatementExpression
### Statement Expression
<Expression> <NL>
StatementFunction:
<IDENT> fun (: <NLO> <IDENT> <TYPE> (, <NLO> <IDENT> <TYPE>)*)? (-> <NLO> <TYPE>)? <NL>
;
### StatementLoop
`loop [<StatementVariable> | <StatementAssignment>] (, <NL>? <Expression> (, <NL>? <Expression>)?)? <NL> <StatementBlock> ;`
```
@@ -64,17 +92,9 @@ i < 10
`loop (<ExpressionLogical> (, <NL>? <ExpressionLogical>)?)? <NL> <StatementBlock> ;`
StatementMetaExternFunction:
@extern <IDENT> fun (: <NLO> <IDENT> <TYPE> (, <NLO> <IDENT> <TYPE>)*)? (-> <NLO> <TYPE>)? <NL>
### StatementReturn
`ret <Expression>`
### Statemnet Variable
`<IDENT> <TYPE> <- <Expression>`
### ExpressionVariable:
`<IDENT>`

View File

@@ -33,6 +33,12 @@ vector<shared_ptr<Statement>> Parser::getStatements() {
exit(1);
}
statements.push_back(statement);
// Expect new line after statement
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) {
cerr << "Expected new line" << endl;
exit(1);
}
}
return statements;
@@ -115,12 +121,12 @@ shared_ptr<Statement> Parser::matchStatementFunction() {
returnType = *type;
currentIndex++; // type
// consume new line
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
return matchStatementInvalid("Expected new line after function declaration");
}
// consume new line
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
return matchStatementInvalid("Expected new line after function declaration");
// block
statementBlock = matchStatementBlock({TokenKind::SEMICOLON});
if (statementBlock == nullptr || !statementBlock->isValid())
@@ -136,7 +142,7 @@ shared_ptr<Statement> Parser::matchStatementVariable() {
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false))
return nullptr;
shared_ptr<Token> identifierToken = tokens.at(currentIndex);
shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
currentIndex++; // identifier
shared_ptr<Token> valueTypeToken = tokens.at(currentIndex);
@@ -158,7 +164,7 @@ shared_ptr<Statement> Parser::matchStatementVariable() {
shared_ptr<Expression> expression = nextExpression();
if (expression == nullptr || !expression->isValid())
return matchStatementInvalid();
return matchStatementInvalid("Invalid expression");
// Expect comma or new line
if (!tryMatchingTokenKinds({TokenKind::COMMA}, true, false) && !tryMatchingTokenKinds({TokenKind::NEW_LINE}, false, true))
@@ -167,6 +173,54 @@ shared_ptr<Statement> Parser::matchStatementVariable() {
return make_shared<StatementVariable>(identifierToken->getLexme(), valueType, expression);
}
shared_ptr<Statement> Parser::matchStatementMetaExternFunction() {
if (!tryMatchingTokenKinds({TokenKind::M_EXTERN, TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false))
return nullptr;
string name;
vector<pair<string, ValueType>> arguments;
ValueType returnType = ValueType::NONE;
currentIndex++; // skip meta
shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
currentIndex++; // skip fun
// arguments
if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) {
do {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false))
return matchStatementInvalid("Expected function argument");
shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
shared_ptr<Token> typeToken = tokens.at(currentIndex++);
optional<ValueType> argumentType = valueTypeForToken(typeToken);
if (!argumentType)
return matchStatementInvalid("Invalid argument type");
arguments.push_back(pair<string, ValueType>(identifierToken->getLexme(), *argumentType));
} while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true));
}
// Return type
if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line
shared_ptr<Token> typeToken = tokens.at(currentIndex);
optional<ValueType> type = valueTypeForToken(typeToken);
if (!type)
return matchStatementInvalid("Expected return type");
returnType = *type;
currentIndex++; // type
// consume new line
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
return matchStatementInvalid("Expected new line after function declaration");
}
return make_shared<StatementMetaExternFunction>(identifierToken->getLexme(), arguments, returnType);
}
shared_ptr<Statement> Parser::matchStatementAssignment() {
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::LEFT_ARROW}, true, false))
return nullptr;
@@ -269,56 +323,6 @@ shared_ptr<Statement> Parser::matchStatementExpression() {
return make_shared<StatementExpression>(expression);
}
shared_ptr<Statement> Parser::matchStatementMetaExternFunction() {
if (!tryMatchingTokenKinds({TokenKind::M_EXTERN, TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false))
return nullptr;
currentIndex++; // skip meta
shared_ptr<Token> identifierToken = tokens.at(currentIndex);
currentIndex++;
currentIndex++; // skip fun
// Get arguments
vector<pair<string, ValueType>> arguments;
if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) {
do {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false))
return matchStatementInvalid("Expected function argument");
shared_ptr<Token> identifierToken = tokens.at(currentIndex);
currentIndex++; // identifier
shared_ptr<Token> typeToken = tokens.at(currentIndex);
currentIndex++; // type
optional<ValueType> argumentType = valueTypeForToken(typeToken);
if (!argumentType)
return matchStatementInvalid("Invalid argument type");
arguments.push_back(pair<string, ValueType>(identifierToken->getLexme(), *argumentType));
} while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true));
}
// consume optional new line
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
// Return type
ValueType returnType = ValueType::NONE;
if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) {
shared_ptr<Token> typeToken = tokens.at(currentIndex);
optional<ValueType> type = valueTypeForToken(typeToken);
if (!type)
return matchStatementInvalid("Expected return type");
returnType = *type;
currentIndex++; // type
// consume new line
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
return matchStatementInvalid("Expected new line after function declaration");
}
return make_shared<StatementMetaExternFunction>(identifierToken->getLexme(), arguments, returnType);
}
shared_ptr<Statement> Parser::matchStatementBlock(vector<TokenKind> terminalTokenKinds) {
vector<shared_ptr<Statement>> statements;

View File

@@ -1,7 +1,2 @@
stuff fun: num1 sint32, num2 sint32 -> sint32
if num1 > num2:
ret 32 * num1
else
ret 45 * num2
;
;