Changed if-else symbo, cleaned up parsing a bit

This commit is contained in:
Rafał Grodziński
2025-06-30 23:40:03 +09:00
parent b8e66de3e3
commit f4cde21a0a
9 changed files with 202 additions and 90 deletions

View File

@@ -4,8 +4,12 @@ Lexer::Lexer(string source): source(source) {
}
vector<shared_ptr<Token>> Lexer::getTokens() {
shared_ptr<Token> token = nullptr;
tokens.clear();
currentIndex = 0;
currentLine = 0;
currentColumn = 0;
vector<shared_ptr<Token>> tokens;
shared_ptr<Token> token;
do {
token = nextToken();
// Got a nullptr, shouldn't have happened
@@ -123,14 +127,6 @@ shared_ptr<Token> Lexer::nextToken() {
token = match(TokenKind::SEMICOLON, ";", false);
if (token != nullptr)
return token;
token = match(TokenKind::QUESTION_QUESTION, "??", false);
if (token != nullptr)
return token;
token = match(TokenKind::QUESTION, "?", false);
if (token != nullptr)
return token;
token = match(TokenKind::LEFT_ARROW, "<-", false);
if (token != nullptr)
@@ -187,6 +183,14 @@ shared_ptr<Token> Lexer::nextToken() {
return token;
// keywords
token = match(TokenKind::IF, "if", true);
if (token != nullptr)
return token;
token = match(TokenKind::ELSE, "else", true);
if (token != nullptr)
return token;
token = match(TokenKind::FUNCTION, "fun", true);
if (token != nullptr)
return token;

View File

@@ -10,11 +10,9 @@ using namespace std;
class Lexer {
private:
string source;
int currentIndex = 0;
int currentLine = 0;
int currentColumn = 0;
vector<shared_ptr<Token>> tokens;
int currentIndex;
int currentLine;
int currentColumn;
shared_ptr<Token> nextToken();
shared_ptr<Token> match(TokenKind kind, string lexme, bool needsSeparator);

View File

@@ -112,10 +112,6 @@ string Token::toString() {
return ":";
case TokenKind::SEMICOLON:
return ";";
case TokenKind::QUESTION_QUESTION:
return "??";
case TokenKind::QUESTION:
return "?";
case TokenKind::LEFT_ARROW:
return "";
case TokenKind::RIGHT_ARROW:
@@ -136,6 +132,10 @@ string Token::toString() {
case TokenKind::TYPE:
return "TYPE(" + lexme + ")";
case TokenKind::IF:
return "IF";
case TokenKind::ELSE:
return "ELSE";
case TokenKind::FUNCTION:
return "FUNCTION";
case TokenKind::RETURN:

View File

@@ -26,14 +26,14 @@ enum class TokenKind {
COMMA,
COLON,
SEMICOLON,
QUESTION,
QUESTION_QUESTION,
LEFT_ARROW,
RIGHT_ARROW,
FUNCTION,
RETURN,
REPEAT,
IF,
ELSE,
BOOL,
INTEGER_DEC,

View File

@@ -436,14 +436,12 @@ shared_ptr<Expression> Parser::matchExpressionGrouping() {
if (tryMatchingTokenKinds({TokenKind::LEFT_PAREN}, true, true)) {
shared_ptr<Expression> expression = matchTerm();
// has grouped expression failed?
if (expression == nullptr) {
return matchExpressionInvalid();
} else if(!expression->isValid()) {
return expression;
if (expression == nullptr || !expression->isValid()) {
return expression ?: matchExpressionInvalid("Expected expression");
} else if (tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true)) {
return make_shared<ExpressionGrouping>(expression);
} else {
return matchExpressionInvalid();
return matchExpressionInvalid("Unexpected token");
}
}
@@ -488,52 +486,56 @@ shared_ptr<Expression> Parser::matchExpressionCall() {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // optional new line
if (!tryMatchingTokenKinds({TokenKind::RIGHT_PAREN}, true, true))
return matchExpressionInvalid();
return matchExpressionInvalid("Expected \")\"");
return make_shared<ExpressionCall>(identifierToken->getLexme(), argumentExpressions);
}
shared_ptr<Expression> Parser::matchExpressionIfElse() {
// Try maching '?'
shared_ptr<Token> token = tokens.at(currentIndex);
if (!tryMatchingTokenKinds({TokenKind::QUESTION}, true, true))
if (!tryMatchingTokenKinds({TokenKind::IF}, true, true))
return nullptr;
// Then get condition
shared_ptr<Expression> condition = nextExpression();
if (condition == nullptr)
return matchExpressionInvalid();
else if (!condition->isValid())
return condition;
// Consume optional ':'
tryMatchingTokenKinds({TokenKind::COLON}, true, true);
// Consume optional new line
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
// Match then block
shared_ptr<Expression> thenBlock = matchExpressionBlock({TokenKind::COLON, TokenKind::SEMICOLON}, false);
if (thenBlock == nullptr)
return matchExpressionInvalid();
else if (!thenBlock->isValid())
return thenBlock;
// Match else block. Then and else block are separated by ':'
shared_ptr<Expression> condition;
shared_ptr<Expression> thenBlock;
shared_ptr<Expression> elseBlock;
if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) {
bool isSingleLine = !tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true);
vector<TokenKind> terminalTokens = {TokenKind::SEMICOLON, TokenKind::COMMA, TokenKind::RIGHT_PAREN};
if (isSingleLine)
terminalTokens.push_back(TokenKind::NEW_LINE);
elseBlock = matchExpressionBlock(terminalTokens, false);
if (elseBlock == nullptr)
return matchExpressionInvalid();
else if (!elseBlock->isValid())
return elseBlock;
// condition expression
condition = nextExpression();
if (condition == nullptr || !condition->isValid())
return condition ?: matchExpressionInvalid("Expected condition expression");
if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true))
return matchExpressionInvalid("Expected \":\"");
// then
bool isMultiLine = false;
if (tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
isMultiLine = true;
// then block
if (isMultiLine)
thenBlock = matchExpressionBlock({TokenKind::ELSE, TokenKind::SEMICOLON});
else
thenBlock = matchExpressionBlock({TokenKind::ELSE, TokenKind::NEW_LINE});
if (thenBlock == nullptr || !thenBlock->isValid())
return thenBlock ?: matchExpressionInvalid("Expected then block");
// else
if (tryMatchingTokenKinds({TokenKind::ELSE}, true, true)) {
if (tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true))
isMultiLine = true;
// else block
if (isMultiLine)
elseBlock = matchExpressionBlock({TokenKind::SEMICOLON});
else
elseBlock = matchExpressionBlock({TokenKind::NEW_LINE});
if (elseBlock == nullptr || !elseBlock->isValid())
return elseBlock ?: matchExpressionInvalid("Expected else block");
}
tryMatchingTokenKinds({TokenKind::SEMICOLON}, true, true);
tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true);
return make_shared<ExpressionIfElse>(condition, dynamic_pointer_cast<ExpressionBlock>(thenBlock), dynamic_pointer_cast<ExpressionBlock>(elseBlock));
}
@@ -553,7 +555,7 @@ shared_ptr<Expression> Parser::matchExpressionBinary(shared_ptr<Expression> left
}
if (right == nullptr) {
return matchExpressionInvalid();
return matchExpressionInvalid("Expected right-side expression");
} else if (!right->isValid()) {
return right;
} else {
@@ -563,25 +565,21 @@ shared_ptr<Expression> Parser::matchExpressionBinary(shared_ptr<Expression> left
return nullptr;
}
shared_ptr<Expression> Parser::matchExpressionBlock(vector<TokenKind> terminalTokenKinds, bool shouldConsumeTerminal) {
shared_ptr<Expression> Parser::matchExpressionBlock(vector<TokenKind> terminalTokenKinds) {
vector<shared_ptr<Statement>> statements;
bool hasNewLineTerminal = find(terminalTokenKinds.begin(), terminalTokenKinds.end(), TokenKind::NEW_LINE) != terminalTokenKinds.end();
while (!tryMatchingTokenKinds(terminalTokenKinds, false, shouldConsumeTerminal)) {
while (!tryMatchingTokenKinds(terminalTokenKinds, false, false)) {
shared_ptr<Statement> statement = nextStatement();
if (statement == nullptr || !statement->isValid())
return matchExpressionInvalid();
return matchExpressionInvalid("Expected statement");
else
statements.push_back(statement);
if (hasNewLineTerminal && tokens.at(currentIndex-1)->getKind() == TokenKind::NEW_LINE)
currentIndex--;
}
return make_shared<ExpressionBlock>(statements);
}
shared_ptr<ExpressionInvalid> Parser::matchExpressionInvalid() {
shared_ptr<ExpressionInvalid> Parser::matchExpressionInvalid(string message) {
return make_shared<ExpressionInvalid>(tokens.at(currentIndex));
}

View File

@@ -42,8 +42,8 @@ private:
shared_ptr<Expression> matchExpressionCall();
shared_ptr<Expression> matchExpressionIfElse();
shared_ptr<Expression> matchExpressionBinary(shared_ptr<Expression> left);
shared_ptr<Expression> matchExpressionBlock(vector<TokenKind> terminalTokenKinds, bool shouldConsumeTerminal);
shared_ptr<ExpressionInvalid> matchExpressionInvalid();
shared_ptr<Expression> matchExpressionBlock(vector<TokenKind> terminalTokenKinds);
shared_ptr<ExpressionInvalid> matchExpressionInvalid(string message);
bool tryMatchingTokenKinds(vector<TokenKind> kinds, bool shouldMatchAll, bool shouldAdvance);
optional<ValueType> valueTypeForToken(shared_ptr<Token> token);

View File

@@ -74,14 +74,14 @@ int main(int argc, char **argv) {
cout << endl << endl;
}
ModuleBuilder moduleBuilder(moduleName, inputFilePath, statements);
/*ModuleBuilder moduleBuilder(moduleName, inputFilePath, statements);
shared_ptr<llvm::Module> module = moduleBuilder.getModule();
if (isVerbose) {
module->print(llvm::outs(), nullptr);
}
CodeGenerator codeGenerator(module);
codeGenerator.generateObjectFile(outputKind);
codeGenerator.generateObjectFile(outputKind);*/
return 0;
}