Changed if-else symbo, cleaned up parsing a bit

This commit is contained in:
Rafał Grodziński
2025-06-30 23:40:03 +09:00
parent b8e66de3e3
commit f4cde21a0a
9 changed files with 202 additions and 90 deletions

122
Grammar.md Normal file
View File

@@ -0,0 +1,122 @@
`?` 0 or 1 instances
`*` 0 or more instance
`+` 1 or more instances
`<NL>` New line
`<IDENT>` Identifier
### Statement Function
`<IDENT> fun (: <NL>? <IDENT> <TYPE> (, <NL?> <IDENT> <TYPE>)*)? (-> <NL>? <TYPE>)? <NL> ;`
```
stuff fun
;
stuff fun -> sint32
ret 42
;
stuff fun: num1 sint32,
num2 sint32 ->
sint32
ret num1 + num2
;
```
### Statement Assignment
`<IDENT> <- <Expression> <NL>`
```
num1 <- 5
```
### StatementBlock
(<Statement> <NL>)*
### StatementExpression
<Expression> <NL>
StatementFunction:
<IDENT> fun (: <NLO> <IDENT> <TYPE> (, <NLO> <IDENT> <TYPE>)*)? (-> <NLO> <TYPE>)? <NL>
;
### StatementLoop
`loop [<StatementVariable> | <StatementAssignment>] (, <NL>? <Expression> (, <NL>? <Expression>)?)? <NL> <StatementBlock> ;`
```
loop i sint32 <- 0, true, i < 10
doStuff(i)
;
loop i sint32 <- 0,
true, i < 10
doStuff(i)
;
loop i sint32 <- 0,
true,
i < 10
doStuff(i)
;
```
`loop (<ExpressionLogical> (, <NL>? <ExpressionLogical>)?)? <NL> <StatementBlock> ;`
StatementMetaExternFunction:
@extern <IDENT> fun (: <NLO> <IDENT> <TYPE> (, <NLO> <IDENT> <TYPE>)*)? (-> <NLO> <TYPE>)? <NL>
### StatementReturn
`ret <Expression>`
### Statemnet Variable
`<IDENT> <TYPE> <- <Expression>`
### ExpressionVariable:
`<IDENT>`
### Expression If Else:
`if <Expression> : <ExpressionBlock>`
```
if num1 > 10: putchar('T')
```
`if <Expression> : <NL> <ExpressionBlock> ;`
```
if num1 > 10:
num1 <- 500
putchar('S')
;
```
`if <Expression> : <ExpressionBlock> else <ExpressionBlock>`
```
if num1 > 10: putchar('T') else putchar('N')
```
`if <Expression> : <NL> <ExpressionBlock> else <NL> <ExpressionBlock> ;`
```
if num1 > 10:
putchar('T')
else
putchar('N')
;
```
`if <Expression> : <ExpressionBlock> else <NL> <ExpressionBlock> <NL> ;`
```
if num1 > 10: putchar('T') else
num1 <- 500
putchar('F')
;
```
### Expression Block
`(<Statement> <NL>)* (<StatementExpression> <NL>?)? <TER>!`

View File

@@ -4,8 +4,12 @@ Lexer::Lexer(string source): source(source) {
} }
vector<shared_ptr<Token>> Lexer::getTokens() { vector<shared_ptr<Token>> Lexer::getTokens() {
shared_ptr<Token> token = nullptr; currentIndex = 0;
tokens.clear(); currentLine = 0;
currentColumn = 0;
vector<shared_ptr<Token>> tokens;
shared_ptr<Token> token;
do { do {
token = nextToken(); token = nextToken();
// Got a nullptr, shouldn't have happened // Got a nullptr, shouldn't have happened
@@ -123,14 +127,6 @@ shared_ptr<Token> Lexer::nextToken() {
token = match(TokenKind::SEMICOLON, ";", false); token = match(TokenKind::SEMICOLON, ";", false);
if (token != nullptr) if (token != nullptr)
return token; return token;
token = match(TokenKind::QUESTION_QUESTION, "??", false);
if (token != nullptr)
return token;
token = match(TokenKind::QUESTION, "?", false);
if (token != nullptr)
return token;
token = match(TokenKind::LEFT_ARROW, "<-", false); token = match(TokenKind::LEFT_ARROW, "<-", false);
if (token != nullptr) if (token != nullptr)
@@ -187,6 +183,14 @@ shared_ptr<Token> Lexer::nextToken() {
return token; return token;
// keywords // keywords
token = match(TokenKind::IF, "if", true);
if (token != nullptr)
return token;
token = match(TokenKind::ELSE, "else", true);
if (token != nullptr)
return token;
token = match(TokenKind::FUNCTION, "fun", true); token = match(TokenKind::FUNCTION, "fun", true);
if (token != nullptr) if (token != nullptr)
return token; return token;

View File

@@ -10,11 +10,9 @@ using namespace std;
class Lexer { class Lexer {
private: private:
string source; string source;
int currentIndex = 0; int currentIndex;
int currentLine = 0; int currentLine;
int currentColumn = 0; int currentColumn;
vector<shared_ptr<Token>> tokens;
shared_ptr<Token> nextToken(); shared_ptr<Token> nextToken();
shared_ptr<Token> match(TokenKind kind, string lexme, bool needsSeparator); shared_ptr<Token> match(TokenKind kind, string lexme, bool needsSeparator);

View File

@@ -112,10 +112,6 @@ string Token::toString() {
return ":"; return ":";
case TokenKind::SEMICOLON: case TokenKind::SEMICOLON:
return ";"; return ";";
case TokenKind::QUESTION_QUESTION:
return "??";
case TokenKind::QUESTION:
return "?";
case TokenKind::LEFT_ARROW: case TokenKind::LEFT_ARROW:
return ""; return "";
case TokenKind::RIGHT_ARROW: case TokenKind::RIGHT_ARROW:
@@ -136,6 +132,10 @@ string Token::toString() {
case TokenKind::TYPE: case TokenKind::TYPE:
return "TYPE(" + lexme + ")"; return "TYPE(" + lexme + ")";
case TokenKind::IF:
return "IF";
case TokenKind::ELSE:
return "ELSE";
case TokenKind::FUNCTION: case TokenKind::FUNCTION:
return "FUNCTION"; return "FUNCTION";
case TokenKind::RETURN: case TokenKind::RETURN:

View File

@@ -26,14 +26,14 @@ enum class TokenKind {
COMMA, COMMA,
COLON, COLON,
SEMICOLON, SEMICOLON,
QUESTION,
QUESTION_QUESTION,
LEFT_ARROW, LEFT_ARROW,
RIGHT_ARROW, RIGHT_ARROW,
FUNCTION, FUNCTION,
RETURN, RETURN,
REPEAT, REPEAT,
IF,
ELSE,
BOOL, BOOL,
INTEGER_DEC, INTEGER_DEC,

View File

@@ -436,14 +436,12 @@ shared_ptr<Expression> Parser::matchExpressionGrouping() {
if (tryMatchingTokenKinds({TokenKind::LEFT_PAREN}, true, true)) { if (tryMatchingTokenKinds({TokenKind::LEFT_PAREN}, true, true)) {
shared_ptr<Expression> expression = matchTerm(); shared_ptr<Expression> expression = matchTerm();
// has grouped expression failed? // has grouped expression failed?
if (expression == nullptr) { if (expression == nullptr || !expression->isValid()) {
return matchExpressionInvalid(); return expression ?: matchExpressionInvalid("Expected expression");
} else if(!expression->isValid()) {
return expression;
} else if (tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true)) { } else if (tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true)) {
return make_shared<ExpressionGrouping>(expression); return make_shared<ExpressionGrouping>(expression);
} else { } else {
return matchExpressionInvalid(); return matchExpressionInvalid("Unexpected token");
} }
} }
@@ -488,52 +486,56 @@ shared_ptr<Expression> Parser::matchExpressionCall() {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // optional new line tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // optional new line
if (!tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true)) if (!tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true))
return matchExpressionInvalid(); return matchExpressionInvalid("Expected \")\"");
return make_shared<ExpressionCall>(identifierToken->getLexme(), argumentExpressions); return make_shared<ExpressionCall>(identifierToken->getLexme(), argumentExpressions);
} }
shared_ptr<Expression> Parser::matchExpressionIfElse() { shared_ptr<Expression> Parser::matchExpressionIfElse() {
// Try maching '?' if (!tryMatchingTokenKinds({TokenKind::IF}, true, true))
shared_ptr<Token> token = tokens.at(currentIndex);
if (!tryMatchingTokenKinds({TokenKind::QUESTION}, true, true))
return nullptr; return nullptr;
// Then get condition shared_ptr<Expression> condition;
shared_ptr<Expression> condition = nextExpression(); shared_ptr<Expression> thenBlock;
if (condition == nullptr)
return matchExpressionInvalid();
else if (!condition->isValid())
return condition;
// Consume optional ':'
tryMatchingTokenKinds({TokenKind::COLON}, true, true);
// Consume optional new line
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
// Match then block
shared_ptr<Expression> thenBlock = matchExpressionBlock({TokenKind::COLON, TokenKind::SEMICOLON}, false);
if (thenBlock == nullptr)
return matchExpressionInvalid();
else if (!thenBlock->isValid())
return thenBlock;
// Match else block. Then and else block are separated by ':'
shared_ptr<Expression> elseBlock; shared_ptr<Expression> elseBlock;
if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) {
bool isSingleLine = !tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
vector<TokenKind> terminalTokens = {TokenKind::SEMICOLON, TokenKind::COMMA, TokenKind::RIGHT_PAREN};
if (isSingleLine)
terminalTokens.push_back(TokenKind::NEW_LINE);
elseBlock = matchExpressionBlock(terminalTokens, false); // condition expression
if (elseBlock == nullptr) condition = nextExpression();
return matchExpressionInvalid(); if (condition == nullptr || !condition->isValid())
else if (!elseBlock->isValid()) return condition ?: matchExpressionInvalid("Expected condition expression");
return elseBlock;
if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true))
return matchExpressionInvalid("Expected \":\"");
// then
bool isMultiLine = false;
if (tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
isMultiLine = true;
// then block
if (isMultiLine)
thenBlock = matchExpressionBlock({TokenKind::ELSE, TokenKind::SEMICOLON});
else
thenBlock = matchExpressionBlock({TokenKind::ELSE, TokenKind::NEW_LINE});
if (thenBlock == nullptr || !thenBlock->isValid())
return thenBlock ?: matchExpressionInvalid("Expected then block");
// else
if (tryMatchingTokenKinds({TokenKind::ELSE}, true, true)) {
if (tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
isMultiLine = true;
// else block
if (isMultiLine)
elseBlock = matchExpressionBlock({TokenKind::SEMICOLON});
else
elseBlock = matchExpressionBlock({TokenKind::NEW_LINE});
if (elseBlock == nullptr || !elseBlock->isValid())
return elseBlock ?: matchExpressionInvalid("Expected else block");
} }
tryMatchingTokenKinds({TokenKind::SEMICOLON}, true, true); tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true);
return make_shared<ExpressionIfElse>(condition, dynamic_pointer_cast<ExpressionBlock>(thenBlock), dynamic_pointer_cast<ExpressionBlock>(elseBlock)); return make_shared<ExpressionIfElse>(condition, dynamic_pointer_cast<ExpressionBlock>(thenBlock), dynamic_pointer_cast<ExpressionBlock>(elseBlock));
} }
@@ -553,7 +555,7 @@ shared_ptr<Expression> Parser::matchExpressionBinary(shared_ptr<Expression> left
} }
if (right == nullptr) { if (right == nullptr) {
return matchExpressionInvalid(); return matchExpressionInvalid("Expected right-side expression");
} else if (!right->isValid()) { } else if (!right->isValid()) {
return right; return right;
} else { } else {
@@ -563,25 +565,21 @@ shared_ptr<Expression> Parser::matchExpressionBinary(shared_ptr<Expression> left
return nullptr; return nullptr;
} }
shared_ptr<Expression> Parser::matchExpressionBlock(vector<TokenKind> terminalTokenKinds, bool shouldConsumeTerminal) { shared_ptr<Expression> Parser::matchExpressionBlock(vector<TokenKind> terminalTokenKinds) {
vector<shared_ptr<Statement>> statements; vector<shared_ptr<Statement>> statements;
bool hasNewLineTerminal = find(terminalTokenKinds.begin(), terminalTokenKinds.end(), TokenKind::NEW_LINE) != terminalTokenKinds.end(); while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) {
while (!tryMatchingTokenKinds(terminalTokenKinds, false, shouldConsumeTerminal)) {
shared_ptr<Statement> statement = nextStatement(); shared_ptr<Statement> statement = nextStatement();
if (statement == nullptr || !statement->isValid()) if (statement == nullptr || !statement->isValid())
return matchExpressionInvalid(); return matchExpressionInvalid("Expected statement");
else else
statements.push_back(statement); statements.push_back(statement);
if (hasNewLineTerminal && tokens.at(currentIndex-1)->getKind() == TokenKind::NEW_LINE)
currentIndex--;
} }
return make_shared<ExpressionBlock>(statements); return make_shared<ExpressionBlock>(statements);
} }
shared_ptr<ExpressionInvalid> Parser::matchExpressionInvalid() { shared_ptr<ExpressionInvalid> Parser::matchExpressionInvalid(string message) {
return make_shared<ExpressionInvalid>(tokens.at(currentIndex)); return make_shared<ExpressionInvalid>(tokens.at(currentIndex));
} }

View File

@@ -42,8 +42,8 @@ private:
shared_ptr<Expression> matchExpressionCall(); shared_ptr<Expression> matchExpressionCall();
shared_ptr<Expression> matchExpressionIfElse(); shared_ptr<Expression> matchExpressionIfElse();
shared_ptr<Expression> matchExpressionBinary(shared_ptr<Expression> left); shared_ptr<Expression> matchExpressionBinary(shared_ptr<Expression> left);
shared_ptr<Expression> matchExpressionBlock(vector<TokenKind> terminalTokenKinds, bool shouldConsumeTerminal); shared_ptr<Expression> matchExpressionBlock(vector<TokenKind> terminalTokenKinds);
shared_ptr<ExpressionInvalid> matchExpressionInvalid(); shared_ptr<ExpressionInvalid> matchExpressionInvalid(string message);
bool tryMatchingTokenKinds(vector<TokenKind> kinds, bool shouldMatchAll, bool shouldAdvance); bool tryMatchingTokenKinds(vector<TokenKind> kinds, bool shouldMatchAll, bool shouldAdvance);
optional<ValueType> valueTypeForToken(shared_ptr<Token> token); optional<ValueType> valueTypeForToken(shared_ptr<Token> token);

View File

@@ -74,14 +74,14 @@ int main(int argc, char **argv) {
cout << endl << endl; cout << endl << endl;
} }
ModuleBuilder moduleBuilder(moduleName, inputFilePath, statements); /*ModuleBuilder moduleBuilder(moduleName, inputFilePath, statements);
shared_ptr<llvm::Module> module = moduleBuilder.getModule(); shared_ptr<llvm::Module> module = moduleBuilder.getModule();
if (isVerbose) { if (isVerbose) {
module->print(llvm::outs(), nullptr); module->print(llvm::outs(), nullptr);
} }
CodeGenerator codeGenerator(module); CodeGenerator codeGenerator(module);
codeGenerator.generateObjectFile(outputKind); codeGenerator.generateObjectFile(outputKind);*/
return 0; return 0;
} }

View File

@@ -1,17 +1,7 @@
@extern putchar fun: character sint32 -> sint32 stuff fun: num1 sint32, num2 sint32 -> sint32
if num1 > num2:
stuff fun: num1 sint32, num2 sint32 ret 32 * num1
? num1 > num2 else
putchar(0x54) ret 45 * num2
:
putchar(0x4e)
; ;
putchar(0x0a)
;
main fun -> sint32
stuff(8, 108)
ret 42
; ;