Changed if-else symbo, cleaned up parsing a bit

This commit is contained in:
Rafał Grodziński
2025-06-30 23:40:03 +09:00
parent b8e66de3e3
commit f4cde21a0a
9 changed files with 202 additions and 90 deletions

122
Grammar.md Normal file
View File

@@ -0,0 +1,122 @@
`?` 0 or 1 instances
`*` 0 or more instance
`+` 1 or more instances
`<NL>` New line
`<IDENT>` Identifier
### Statement Function
`<IDENT> fun (: <NL>? <IDENT> <TYPE> (, <NL?> <IDENT> <TYPE>)*)? (-> <NL>? <TYPE>)? <NL> ;`
```
stuff fun
;
stuff fun -> sint32
ret 42
;
stuff fun: num1 sint32,
num2 sint32 ->
sint32
ret num1 + num2
;
```
### Statement Assignment
`<IDENT> <- <Expression> <NL>`
```
num1 <- 5
```
### StatementBlock
(<Statement> <NL>)*
### StatementExpression
<Expression> <NL>
StatementFunction:
<IDENT> fun (: <NLO> <IDENT> <TYPE> (, <NLO> <IDENT> <TYPE>)*)? (-> <NLO> <TYPE>)? <NL>
;
### StatementLoop
`loop [<StatementVariable> | <StatementAssignment>] (, <NL>? <Expression> (, <NL>? <Expression>)?)? <NL> <StatementBlock> ;`
```
loop i sint32 <- 0, true, i < 10
doStuff(i)
;
loop i sint32 <- 0,
true, i < 10
doStuff(i)
;
loop i sint32 <- 0,
true,
i < 10
doStuff(i)
;
```
`loop (<ExpressionLogical> (, <NL>? <ExpressionLogical>)?)? <NL> <StatementBlock> ;`
StatementMetaExternFunction:
@extern <IDENT> fun (: <NLO> <IDENT> <TYPE> (, <NLO> <IDENT> <TYPE>)*)? (-> <NLO> <TYPE>)? <NL>
### StatementReturn
`ret <Expression>`
### Statemnet Variable
`<IDENT> <TYPE> <- <Expression>`
### ExpressionVariable:
`<IDENT>`
### Expression If Else:
`if <Expression> : <ExpressionBlock>`
```
if num1 > 10: putchar('T')
```
`if <Expression> : <NL> <ExpressionBlock> ;`
```
if num1 > 10:
num1 <- 500
putchar('S')
;
```
`if <Expression> : <ExpressionBlock> else <ExpressionBlock>`
```
if num1 > 10: putchar('T') else putchar('N')
```
`if <Expression> : <NL> <ExpressionBlock> else <NL> <ExpressionBlock> ;`
```
if num1 > 10:
putchar('T')
else
putchar('N')
;
```
`if <Expression> : <ExpressionBlock> else <NL> <ExpressionBlock> <NL> ;`
```
if num1 > 10: putchar('T') else
num1 <- 500
putchar('F')
;
```
### Expression Block
`(<Statement> <NL>)* (<StatementExpression> <NL>?)? <TER>!`

View File

@@ -4,8 +4,12 @@ Lexer::Lexer(string source): source(source) {
}
vector<shared_ptr<Token>> Lexer::getTokens() {
shared_ptr<Token> token = nullptr;
tokens.clear();
currentIndex = 0;
currentLine = 0;
currentColumn = 0;
vector<shared_ptr<Token>> tokens;
shared_ptr<Token> token;
do {
token = nextToken();
// Got a nullptr, shouldn't have happened
@@ -123,14 +127,6 @@ shared_ptr<Token> Lexer::nextToken() {
token = match(TokenKind::SEMICOLON, ";", false);
if (token != nullptr)
return token;
token = match(TokenKind::QUESTION_QUESTION, "??", false);
if (token != nullptr)
return token;
token = match(TokenKind::QUESTION, "?", false);
if (token != nullptr)
return token;
token = match(TokenKind::LEFT_ARROW, "<-", false);
if (token != nullptr)
@@ -187,6 +183,14 @@ shared_ptr<Token> Lexer::nextToken() {
return token;
// keywords
token = match(TokenKind::IF, "if", true);
if (token != nullptr)
return token;
token = match(TokenKind::ELSE, "else", true);
if (token != nullptr)
return token;
token = match(TokenKind::FUNCTION, "fun", true);
if (token != nullptr)
return token;

View File

@@ -10,11 +10,9 @@ using namespace std;
class Lexer {
private:
string source;
int currentIndex = 0;
int currentLine = 0;
int currentColumn = 0;
vector<shared_ptr<Token>> tokens;
int currentIndex;
int currentLine;
int currentColumn;
shared_ptr<Token> nextToken();
shared_ptr<Token> match(TokenKind kind, string lexme, bool needsSeparator);

View File

@@ -112,10 +112,6 @@ string Token::toString() {
return ":";
case TokenKind::SEMICOLON:
return ";";
case TokenKind::QUESTION_QUESTION:
return "??";
case TokenKind::QUESTION:
return "?";
case TokenKind::LEFT_ARROW:
return "";
case TokenKind::RIGHT_ARROW:
@@ -136,6 +132,10 @@ string Token::toString() {
case TokenKind::TYPE:
return "TYPE(" + lexme + ")";
case TokenKind::IF:
return "IF";
case TokenKind::ELSE:
return "ELSE";
case TokenKind::FUNCTION:
return "FUNCTION";
case TokenKind::RETURN:

View File

@@ -26,14 +26,14 @@ enum class TokenKind {
COMMA,
COLON,
SEMICOLON,
QUESTION,
QUESTION_QUESTION,
LEFT_ARROW,
RIGHT_ARROW,
FUNCTION,
RETURN,
REPEAT,
IF,
ELSE,
BOOL,
INTEGER_DEC,

View File

@@ -436,14 +436,12 @@ shared_ptr<Expression> Parser::matchExpressionGrouping() {
if (tryMatchingTokenKinds({TokenKind::LEFT_PAREN}, true, true)) {
shared_ptr<Expression> expression = matchTerm();
// has grouped expression failed?
if (expression == nullptr) {
return matchExpressionInvalid();
} else if(!expression->isValid()) {
return expression;
if (expression == nullptr || !expression->isValid()) {
return expression ?: matchExpressionInvalid("Expected expression");
} else if (tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true)) {
return make_shared<ExpressionGrouping>(expression);
} else {
return matchExpressionInvalid();
return matchExpressionInvalid("Unexpected token");
}
}
@@ -488,52 +486,56 @@ shared_ptr<Expression> Parser::matchExpressionCall() {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // optional new line
if (!tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true))
return matchExpressionInvalid();
return matchExpressionInvalid("Expected \")\"");
return make_shared<ExpressionCall>(identifierToken->getLexme(), argumentExpressions);
}
shared_ptr<Expression> Parser::matchExpressionIfElse() {
// Try maching '?'
shared_ptr<Token> token = tokens.at(currentIndex);
if (!tryMatchingTokenKinds({TokenKind::QUESTION}, true, true))
if (!tryMatchingTokenKinds({TokenKind::IF}, true, true))
return nullptr;
// Then get condition
shared_ptr<Expression> condition = nextExpression();
if (condition == nullptr)
return matchExpressionInvalid();
else if (!condition->isValid())
return condition;
// Consume optional ':'
tryMatchingTokenKinds({TokenKind::COLON}, true, true);
// Consume optional new line
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
// Match then block
shared_ptr<Expression> thenBlock = matchExpressionBlock({TokenKind::COLON, TokenKind::SEMICOLON}, false);
if (thenBlock == nullptr)
return matchExpressionInvalid();
else if (!thenBlock->isValid())
return thenBlock;
// Match else block. Then and else block are separated by ':'
shared_ptr<Expression> condition;
shared_ptr<Expression> thenBlock;
shared_ptr<Expression> elseBlock;
if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) {
bool isSingleLine = !tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
vector<TokenKind> terminalTokens = {TokenKind::SEMICOLON, TokenKind::COMMA, TokenKind::RIGHT_PAREN};
if (isSingleLine)
terminalTokens.push_back(TokenKind::NEW_LINE);
elseBlock = matchExpressionBlock(terminalTokens, false);
if (elseBlock == nullptr)
return matchExpressionInvalid();
else if (!elseBlock->isValid())
return elseBlock;
// condition expression
condition = nextExpression();
if (condition == nullptr || !condition->isValid())
return condition ?: matchExpressionInvalid("Expected condition expression");
if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true))
return matchExpressionInvalid("Expected \":\"");
// then
bool isMultiLine = false;
if (tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
isMultiLine = true;
// then block
if (isMultiLine)
thenBlock = matchExpressionBlock({TokenKind::ELSE, TokenKind::SEMICOLON});
else
thenBlock = matchExpressionBlock({TokenKind::ELSE, TokenKind::NEW_LINE});
if (thenBlock == nullptr || !thenBlock->isValid())
return thenBlock ?: matchExpressionInvalid("Expected then block");
// else
if (tryMatchingTokenKinds({TokenKind::ELSE}, true, true)) {
if (tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
isMultiLine = true;
// else block
if (isMultiLine)
elseBlock = matchExpressionBlock({TokenKind::SEMICOLON});
else
elseBlock = matchExpressionBlock({TokenKind::NEW_LINE});
if (elseBlock == nullptr || !elseBlock->isValid())
return elseBlock ?: matchExpressionInvalid("Expected else block");
}
tryMatchingTokenKinds({TokenKind::SEMICOLON}, true, true);
tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true);
return make_shared<ExpressionIfElse>(condition, dynamic_pointer_cast<ExpressionBlock>(thenBlock), dynamic_pointer_cast<ExpressionBlock>(elseBlock));
}
@@ -553,7 +555,7 @@ shared_ptr<Expression> Parser::matchExpressionBinary(shared_ptr<Expression> left
}
if (right == nullptr) {
return matchExpressionInvalid();
return matchExpressionInvalid("Expected right-side expression");
} else if (!right->isValid()) {
return right;
} else {
@@ -563,25 +565,21 @@ shared_ptr<Expression> Parser::matchExpressionBinary(shared_ptr<Expression> left
return nullptr;
}
shared_ptr<Expression> Parser::matchExpressionBlock(vector<TokenKind> terminalTokenKinds, bool shouldConsumeTerminal) {
shared_ptr<Expression> Parser::matchExpressionBlock(vector<TokenKind> terminalTokenKinds) {
vector<shared_ptr<Statement>> statements;
bool hasNewLineTerminal = find(terminalTokenKinds.begin(), terminalTokenKinds.end(), TokenKind::NEW_LINE) != terminalTokenKinds.end();
while (!tryMatchingTokenKinds(terminalTokenKinds, false, shouldConsumeTerminal)) {
while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) {
shared_ptr<Statement> statement = nextStatement();
if (statement == nullptr || !statement->isValid())
return matchExpressionInvalid();
return matchExpressionInvalid("Expected statement");
else
statements.push_back(statement);
if (hasNewLineTerminal && tokens.at(currentIndex-1)->getKind() == TokenKind::NEW_LINE)
currentIndex--;
}
return make_shared<ExpressionBlock>(statements);
}
shared_ptr<ExpressionInvalid> Parser::matchExpressionInvalid() {
shared_ptr<ExpressionInvalid> Parser::matchExpressionInvalid(string message) {
return make_shared<ExpressionInvalid>(tokens.at(currentIndex));
}

View File

@@ -42,8 +42,8 @@ private:
shared_ptr<Expression> matchExpressionCall();
shared_ptr<Expression> matchExpressionIfElse();
shared_ptr<Expression> matchExpressionBinary(shared_ptr<Expression> left);
shared_ptr<Expression> matchExpressionBlock(vector<TokenKind> terminalTokenKinds, bool shouldConsumeTerminal);
shared_ptr<ExpressionInvalid> matchExpressionInvalid();
shared_ptr<Expression> matchExpressionBlock(vector<TokenKind> terminalTokenKinds);
shared_ptr<ExpressionInvalid> matchExpressionInvalid(string message);
bool tryMatchingTokenKinds(vector<TokenKind> kinds, bool shouldMatchAll, bool shouldAdvance);
optional<ValueType> valueTypeForToken(shared_ptr<Token> token);

View File

@@ -74,14 +74,14 @@ int main(int argc, char **argv) {
cout << endl << endl;
}
ModuleBuilder moduleBuilder(moduleName, inputFilePath, statements);
/*ModuleBuilder moduleBuilder(moduleName, inputFilePath, statements);
shared_ptr<llvm::Module> module = moduleBuilder.getModule();
if (isVerbose) {
module->print(llvm::outs(), nullptr);
}
CodeGenerator codeGenerator(module);
codeGenerator.generateObjectFile(outputKind);
codeGenerator.generateObjectFile(outputKind);*/
return 0;
}

View File

@@ -1,17 +1,7 @@
@extern putchar fun: character sint32 -> sint32
stuff fun: num1 sint32, num2 sint32
? num1 > num2
putchar(0x54)
:
putchar(0x4e)
stuff fun: num1 sint32, num2 sint32 -> sint32
if num1 > num2:
ret 32 * num1
else
ret 45 * num2
;
putchar(0x0a)
;
main fun -> sint32
stuff(8, 108)
ret 42
;