More complex type parsing

This commit is contained in:
Rafał Grodziński
2025-07-08 17:31:56 +09:00
parent 18dd7d05d4
commit 9e7747dcbc
10 changed files with 128 additions and 62 deletions

View File

@@ -150,6 +150,10 @@ void ModuleBuilder::buildVarDeclaration(shared_ptr<StatementVariable> statement)
builder->CreateStore(ar, arAlloca);*/ builder->CreateStore(ar, arAlloca);*/
} }
void ModuleBuilder::buildArrayDeclaration(shared_ptr<StatementVariable> statement) {
}
void ModuleBuilder::buildAssignment(shared_ptr<StatementAssignment> statement) { void ModuleBuilder::buildAssignment(shared_ptr<StatementAssignment> statement) {
llvm::AllocaInst *alloca = getAlloca(statement->getName()); llvm::AllocaInst *alloca = getAlloca(statement->getName());
if (alloca == nullptr) if (alloca == nullptr)
@@ -251,7 +255,7 @@ llvm::Value *ModuleBuilder::valueForExpression(shared_ptr<Expression> expression
case ExpressionKind::LITERAL: case ExpressionKind::LITERAL:
return valueForLiteral(dynamic_pointer_cast<ExpressionLiteral>(expression)); return valueForLiteral(dynamic_pointer_cast<ExpressionLiteral>(expression));
case ExpressionKind::ARRAY_LITERAL: case ExpressionKind::ARRAY_LITERAL:
return valueForArrayLiteral(dynamic_pointer_cast<ExpressionArrayLiteral>(expression)); return nullptr;// valuesForArrayLiteral(dynamic_pointer_cast<ExpressionArrayLiteral>(expression));
case ExpressionKind::GROUPING: case ExpressionKind::GROUPING:
return valueForExpression(dynamic_pointer_cast<ExpressionGrouping>(expression)->getExpression()); return valueForExpression(dynamic_pointer_cast<ExpressionGrouping>(expression)->getExpression());
case ExpressionKind::BINARY: case ExpressionKind::BINARY:
@@ -284,8 +288,12 @@ llvm::Value *ModuleBuilder::valueForLiteral(shared_ptr<ExpressionLiteral> expres
} }
} }
llvm::Value *ModuleBuilder::valueForArrayLiteral(shared_ptr<ExpressionArrayLiteral> expression) { vector<llvm::Value*> ModuleBuilder::valuesForArrayLiteral(shared_ptr<ExpressionArrayLiteral> expression) {
return nullptr; vector<llvm::Value*> values;
for (shared_ptr<Expression> &expression : expression->getExpressions()) {
values.push_back(valueForExpression(expression));
}
return values;
} }
llvm::Value *ModuleBuilder::valueForGrouping(shared_ptr<ExpressionGrouping> expression) { llvm::Value *ModuleBuilder::valueForGrouping(shared_ptr<ExpressionGrouping> expression) {

View File

@@ -62,6 +62,7 @@ private:
void buildStatement(shared_ptr<Statement> statement); void buildStatement(shared_ptr<Statement> statement);
void buildFunctionDeclaration(shared_ptr<StatementFunction> statement); void buildFunctionDeclaration(shared_ptr<StatementFunction> statement);
void buildVarDeclaration(shared_ptr<StatementVariable> statement); void buildVarDeclaration(shared_ptr<StatementVariable> statement);
void buildArrayDeclaration(shared_ptr<StatementVariable> statement);
void buildAssignment(shared_ptr<StatementAssignment> statement); void buildAssignment(shared_ptr<StatementAssignment> statement);
void buildBlock(shared_ptr<StatementBlock> statement); void buildBlock(shared_ptr<StatementBlock> statement);
void buildReturn(shared_ptr<StatementReturn> statement); void buildReturn(shared_ptr<StatementReturn> statement);
@@ -71,7 +72,7 @@ private:
llvm::Value *valueForExpression(shared_ptr<Expression> expression); llvm::Value *valueForExpression(shared_ptr<Expression> expression);
llvm::Value *valueForLiteral(shared_ptr<ExpressionLiteral> expression); llvm::Value *valueForLiteral(shared_ptr<ExpressionLiteral> expression);
llvm::Value *valueForArrayLiteral(shared_ptr<ExpressionArrayLiteral> expression); vector<llvm::Value*> valuesForArrayLiteral(shared_ptr<ExpressionArrayLiteral> expression);
llvm::Value *valueForGrouping(shared_ptr<ExpressionGrouping> expression); llvm::Value *valueForGrouping(shared_ptr<ExpressionGrouping> expression);
llvm::Value *valueForBinary(shared_ptr<ExpressionBinary> expression); llvm::Value *valueForBinary(shared_ptr<ExpressionBinary> expression);
llvm::Value *valueForBinaryBool(ExpressionBinaryOperation operation, llvm::Value *leftValue, llvm::Value *rightValue); llvm::Value *valueForBinaryBool(ExpressionBinaryOperation operation, llvm::Value *leftValue, llvm::Value *rightValue);

View File

@@ -13,9 +13,9 @@ vector<shared_ptr<Token>> Lexer::getTokens() {
currentLine = 0; currentLine = 0;
currentColumn = 0; currentColumn = 0;
tokens.clear();
errors.clear(); errors.clear();
vector<shared_ptr<Token>> tokens;
shared_ptr<Token> token; shared_ptr<Token> token;
do { do {
token = nextToken(); token = nextToken();
@@ -251,15 +251,7 @@ shared_ptr<Token> Lexer::nextToken() {
return token; return token;
// type // type
token = match(TokenKind::TYPE, "bool", true); token = matchType();
if (token != nullptr)
return token;
token = match(TokenKind::TYPE, "sint32", true);
if (token != nullptr)
return token;
token = match(TokenKind::TYPE, "real32", true);
if (token != nullptr) if (token != nullptr)
return token; return token;
@@ -428,6 +420,24 @@ shared_ptr<Token> Lexer::matchIdentifier() {
return token; return token;
} }
shared_ptr<Token> Lexer::matchType() {
int nextIndex = currentIndex;
if (tokens.empty() || !tokens.back()->isOfKind({TokenKind::IDENTIFIER, TokenKind::LESS, TokenKind::RIGHT_ARROW}))
return nullptr;
while (nextIndex < source.length() && isIdentifier(nextIndex))
nextIndex++;
if (nextIndex == currentIndex || !isSeparator(nextIndex))
return nullptr;
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
shared_ptr<Token> token = make_shared<Token>(TokenKind::TYPE, lexme, currentLine, currentColumn);
advanceWithToken(token);
return token;
}
shared_ptr<Token> Lexer::matchEnd() { shared_ptr<Token> Lexer::matchEnd() {
if (currentIndex >= source.length()) if (currentIndex >= source.length())
return make_shared<Token>(TokenKind::END, "", currentLine, currentColumn); return make_shared<Token>(TokenKind::END, "", currentLine, currentColumn);

View File

@@ -15,6 +15,7 @@ private:
int currentIndex; int currentIndex;
int currentLine; int currentLine;
int currentColumn; int currentColumn;
vector<shared_ptr<Token>> tokens;
vector<shared_ptr<Error>> errors; vector<shared_ptr<Error>> errors;
shared_ptr<Token> nextToken(); shared_ptr<Token> nextToken();
@@ -24,6 +25,7 @@ private:
shared_ptr<Token> matchIntegerBin(); shared_ptr<Token> matchIntegerBin();
shared_ptr<Token> matchIntegerChar(); shared_ptr<Token> matchIntegerChar();
shared_ptr<Token> matchReal(); shared_ptr<Token> matchReal();
shared_ptr<Token> matchType();
shared_ptr<Token> matchIdentifier(); shared_ptr<Token> matchIdentifier();
shared_ptr<Token> matchEnd(); shared_ptr<Token> matchEnd();

View File

@@ -140,6 +140,10 @@ string Logger::toString(TokenKind tokenKind) {
return "("; return "(";
case TokenKind::RIGHT_PAREN: case TokenKind::RIGHT_PAREN:
return ")"; return ")";
case TokenKind::LEFT_SQUARE_BRACKET:
return "[";
case TokenKind::RIGHT_SQUARE_BRACKET:
return "]";
case TokenKind::COMMA: case TokenKind::COMMA:
return ","; return ",";
case TokenKind::COLON: case TokenKind::COLON:
@@ -196,6 +200,8 @@ string Logger::toString(shared_ptr<ValueType> valueType) {
return "SINT32"; return "SINT32";
case ValueTypeKind::REAL32: case ValueTypeKind::REAL32:
return "REAL32"; return "REAL32";
case ValueTypeKind::DATA:
return "[]";
} }
} }
@@ -386,6 +392,8 @@ string Logger::toString(shared_ptr<ExpressionLiteral> expression) {
return to_string(expression->getSint32Value()); return to_string(expression->getSint32Value());
case ValueTypeKind::REAL32: case ValueTypeKind::REAL32:
return to_string(expression->getReal32Value()); return to_string(expression->getReal32Value());
default:
return "?";
} }
} }

View File

@@ -11,20 +11,20 @@ Expression(ExpressionKind::LITERAL, nullptr) {
switch (token->getKind()) { switch (token->getKind()) {
case TokenKind::BOOL: case TokenKind::BOOL:
boolValue = token->getLexme().compare("true") == 0; boolValue = token->getLexme().compare("true") == 0;
valueType = ValueType::valueTypeForToken(token); valueType = ValueType::valueTypeForToken(token, nullptr, 0);
break; break;
case TokenKind::INTEGER_DEC: { case TokenKind::INTEGER_DEC: {
string numString = token->getLexme(); string numString = token->getLexme();
erase(numString, '_'); erase(numString, '_');
sint32Value = stoi(numString, nullptr, 10); sint32Value = stoi(numString, nullptr, 10);
valueType = ValueType::valueTypeForToken(token); valueType = ValueType::valueTypeForToken(token, nullptr, 0);
break; break;
} }
case TokenKind::INTEGER_HEX: { case TokenKind::INTEGER_HEX: {
string numString = token->getLexme(); string numString = token->getLexme();
erase(numString, '_'); erase(numString, '_');
sint32Value = stoi(numString, nullptr, 16); sint32Value = stoi(numString, nullptr, 16);
valueType = ValueType::valueTypeForToken(token); valueType = ValueType::valueTypeForToken(token, nullptr, 0);
break; break;
} }
case TokenKind::INTEGER_BIN: { case TokenKind::INTEGER_BIN: {
@@ -32,13 +32,13 @@ Expression(ExpressionKind::LITERAL, nullptr) {
erase(numString, '_'); erase(numString, '_');
numString = numString.substr(2, numString.size()-1); numString = numString.substr(2, numString.size()-1);
sint32Value = stoi(numString, nullptr, 2); sint32Value = stoi(numString, nullptr, 2);
valueType = ValueType::valueTypeForToken(token); valueType = ValueType::valueTypeForToken(token, nullptr, 0);
break; break;
} }
case TokenKind::INTEGER_CHAR: { case TokenKind::INTEGER_CHAR: {
string charString = token->getLexme(); string charString = token->getLexme();
valueType = ValueType::valueTypeForToken(token); valueType = ValueType::valueTypeForToken(token, nullptr, 0);
if (charString.length() == 3) { if (charString.length() == 3) {
sint32Value = charString[1]; sint32Value = charString[1];
} else if (charString.length() == 4 && charString[1] == '\\') { } else if (charString.length() == 4 && charString[1] == '\\') {
@@ -67,7 +67,7 @@ Expression(ExpressionKind::LITERAL, nullptr) {
} }
case TokenKind::REAL: case TokenKind::REAL:
real32Value = stof(token->getLexme()); real32Value = stof(token->getLexme());
valueType = ValueType::valueTypeForToken(token); valueType = ValueType::valueTypeForToken(token, nullptr, 0);
break; break;
default: default:
exit(1); exit(1);

View File

@@ -123,7 +123,7 @@ shared_ptr<Statement> Parser::matchStatementMetaExternFunction() {
} }
shared_ptr<Token> identifierToken = tokens.at(currentIndex++); shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
shared_ptr<Token> argumentTypeToken = tokens.at(currentIndex++); shared_ptr<Token> argumentTypeToken = tokens.at(currentIndex++);
shared_ptr<ValueType> argumentType = ValueType::valueTypeForToken(argumentTypeToken); shared_ptr<ValueType> argumentType = matchValueType();
if (argumentType == nullptr) { if (argumentType == nullptr) {
markError(TokenKind::TYPE, {}); markError(TokenKind::TYPE, {});
return nullptr; return nullptr;
@@ -137,8 +137,8 @@ shared_ptr<Statement> Parser::matchStatementMetaExternFunction() {
if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line
shared_ptr<Token> returnTypeToken = tokens.at(currentIndex); //shared_ptr<Token> returnTypeToken = tokens.at(currentIndex);
returnType = ValueType::valueTypeForToken(returnTypeToken); returnType = matchValueType();
if (returnType == nullptr) { if (returnType == nullptr) {
markError(TokenKind::TYPE, {}); markError(TokenKind::TYPE, {});
return nullptr; return nullptr;
@@ -155,21 +155,7 @@ shared_ptr<Statement> Parser::matchStatementVariable() {
return nullptr; return nullptr;
shared_ptr<Token> identifierToken = tokens.at(currentIndex++); shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
shared_ptr<Token> valueTypeToken = tokens.at(currentIndex); shared_ptr<ValueType> valueType = matchValueType();
shared_ptr<ValueType> valueType;
if (valueTypeToken->getLexme().compare("bool") == 0)
valueType = ValueType::BOOL;
else if (valueTypeToken->getLexme().compare("sint32") == 0)
valueType = ValueType::SINT32;
else if (valueTypeToken->getLexme().compare("real32") == 0)
valueType = ValueType::REAL32;
else {
markError(TokenKind::TYPE, {});
return nullptr;
}
currentIndex++; // type
// Expect left arrow // Expect left arrow
if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) { if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) {
@@ -207,7 +193,7 @@ shared_ptr<Statement> Parser::matchStatementFunction() {
} }
shared_ptr<Token> identifierToken = tokens.at(currentIndex++); shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
shared_ptr<Token> argumentTypeToken = tokens.at(currentIndex++); shared_ptr<Token> argumentTypeToken = tokens.at(currentIndex++);
shared_ptr<ValueType> argumentType = ValueType::valueTypeForToken(argumentTypeToken); shared_ptr<ValueType> argumentType = matchValueType();
if (argumentType == nullptr) { if (argumentType == nullptr) {
markError(TokenKind::TYPE, {}); markError(TokenKind::TYPE, {});
return nullptr; return nullptr;
@@ -221,14 +207,12 @@ shared_ptr<Statement> Parser::matchStatementFunction() {
if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line
shared_ptr<Token> returnTypeToken = tokens.at(currentIndex); //shared_ptr<Token> returnTypeToken = tokens.at(currentIndex);
returnType = ValueType::valueTypeForToken(returnTypeToken); returnType = matchValueType();
if (returnType == nullptr) { if (returnType == nullptr) {
markError(TokenKind::TYPE, {}); markError(TokenKind::TYPE, {});
return nullptr; return nullptr;
} }
currentIndex++; // type
} }
// consume new line // consume new line
@@ -638,6 +622,46 @@ shared_ptr<Expression> Parser::matchExpressionBlock(vector<TokenKind> terminalTo
return make_shared<ExpressionBlock>(statements); return make_shared<ExpressionBlock>(statements);
} }
shared_ptr<ValueType> Parser::matchValueType() {
if (!tryMatchingTokenKinds({TokenKind::TYPE}, true, false))
return nullptr;
shared_ptr<Token> typeToken = tokens.at(currentIndex++);
shared_ptr<ValueType> subType;
int valueArg = 0;
if (tryMatchingTokenKinds({TokenKind::LESS}, true, true)) {
if (!tryMatchingTokenKinds({TokenKind::TYPE}, true, false)) {
markError(TokenKind::TYPE, {});
return nullptr;
}
subType = matchValueType();
if (subType == nullptr)
return subType;
if (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) {
if (!tryMatchingTokenKinds({TokenKind::INTEGER_DEC, TokenKind::INTEGER_HEX, TokenKind::INTEGER_BIN, TokenKind::INTEGER_CHAR}, false, false)) {
markError({}, "Expected integer literal");
return nullptr;
}
shared_ptr<Expression> expressionValue = matchExpressionLiteral();
if (expressionValue == nullptr) {
markError({}, "Expected integer literal");
return nullptr;
}
valueArg = dynamic_pointer_cast<ExpressionLiteral>(expressionValue)->getSint32Value();
}
if (!tryMatchingTokenKinds({TokenKind::GREATER}, true, true)) {
markError(TokenKind::GREATER, {});
return nullptr;
}
}
return ValueType::valueTypeForToken(typeToken, subType, valueArg);
}
bool Parser::tryMatchingTokenKinds(vector<TokenKind> kinds, bool shouldMatchAll, bool shouldAdvance) { bool Parser::tryMatchingTokenKinds(vector<TokenKind> kinds, bool shouldMatchAll, bool shouldAdvance) {
int requiredCount = shouldMatchAll ? kinds.size() : 1; int requiredCount = shouldMatchAll ? kinds.size() : 1;
if (currentIndex + requiredCount > tokens.size()) if (currentIndex + requiredCount > tokens.size())

View File

@@ -3,9 +3,11 @@
#include <vector> #include <vector>
class Error;
class Token; class Token;
enum class TokenKind; enum class TokenKind;
class Error; class ValueType;
class Expression; class Expression;
class Statement; class Statement;
@@ -47,6 +49,8 @@ private:
shared_ptr<Expression> matchExpressionBinary(shared_ptr<Expression> left); shared_ptr<Expression> matchExpressionBinary(shared_ptr<Expression> left);
shared_ptr<Expression> matchExpressionBlock(vector<TokenKind> terminalTokenKinds); shared_ptr<Expression> matchExpressionBlock(vector<TokenKind> terminalTokenKinds);
shared_ptr<ValueType> matchValueType();
bool tryMatchingTokenKinds(vector<TokenKind> kinds, bool shouldMatchAll, bool shouldAdvance); bool tryMatchingTokenKinds(vector<TokenKind> kinds, bool shouldMatchAll, bool shouldAdvance);
void markError(optional<TokenKind> expectedTokenKind, optional<string> message); void markError(optional<TokenKind> expectedTokenKind, optional<string> message);

View File

@@ -2,36 +2,38 @@
#include "Lexer/Token.h" #include "Lexer/Token.h"
shared_ptr<ValueType> ValueType::NONE = make_shared<ValueType>(ValueTypeKind::NONE); shared_ptr<ValueType> ValueType::NONE = make_shared<ValueType>(ValueTypeKind::NONE, nullptr, 0);
shared_ptr<ValueType> ValueType::BOOL = make_shared<ValueType>(ValueTypeKind::BOOL); shared_ptr<ValueType> ValueType::BOOL = make_shared<ValueType>(ValueTypeKind::BOOL, nullptr, 0);
shared_ptr<ValueType> ValueType::SINT32 = make_shared<ValueType>(ValueTypeKind::SINT32); shared_ptr<ValueType> ValueType::SINT32 = make_shared<ValueType>(ValueTypeKind::SINT32, nullptr, 0);
shared_ptr<ValueType> ValueType::REAL32 = make_shared<ValueType>(ValueTypeKind::REAL32); shared_ptr<ValueType> ValueType::REAL32 = make_shared<ValueType>(ValueTypeKind::REAL32, nullptr, 0);
ValueType::ValueType(ValueTypeKind kind): ValueType::ValueType(ValueTypeKind kind, shared_ptr<ValueType> subType, int valueArg):
kind(kind) { } kind(kind), subType(subType), valueArg(valueArg) { }
shared_ptr<ValueType> ValueType::valueTypeForToken(shared_ptr<Token> token) { shared_ptr<ValueType> ValueType::valueTypeForToken(shared_ptr<Token> token, shared_ptr<ValueType> subType, int valueArg) {
switch (token->getKind()) { switch (token->getKind()) {
case TokenKind::TYPE: { case TokenKind::TYPE: {
string lexme = token->getLexme(); string lexme = token->getLexme();
if (lexme.compare("bool") == 0) if (lexme.compare("bool") == 0)
return make_shared<ValueType>(ValueTypeKind::BOOL); return make_shared<ValueType>(ValueTypeKind::BOOL, subType, valueArg);
else if (lexme.compare("sint32") == 0) else if (lexme.compare("sint32") == 0)
return make_shared<ValueType>(ValueTypeKind::SINT32); return make_shared<ValueType>(ValueTypeKind::SINT32, subType, valueArg);
else if (lexme.compare("real32") == 0) else if (lexme.compare("real32") == 0)
return make_shared<ValueType>(ValueTypeKind::REAL32); return make_shared<ValueType>(ValueTypeKind::REAL32, subType, valueArg);
else if (lexme.compare("data") == 0)
return make_shared<ValueType>(ValueTypeKind::DATA, subType, valueArg);
else else
return nullptr; return nullptr;
} }
case TokenKind::BOOL: case TokenKind::BOOL:
return make_shared<ValueType>(ValueTypeKind::BOOL); return make_shared<ValueType>(ValueTypeKind::BOOL, nullptr, 0);
case TokenKind::INTEGER_DEC: case TokenKind::INTEGER_DEC:
case TokenKind::INTEGER_HEX: case TokenKind::INTEGER_HEX:
case TokenKind::INTEGER_BIN: case TokenKind::INTEGER_BIN:
case TokenKind::INTEGER_CHAR: case TokenKind::INTEGER_CHAR:
return make_shared<ValueType>(ValueTypeKind::SINT32); return make_shared<ValueType>(ValueTypeKind::SINT32, nullptr, 0);
case TokenKind::REAL: case TokenKind::REAL:
return make_shared<ValueType>(ValueTypeKind::REAL32); return make_shared<ValueType>(ValueTypeKind::REAL32, nullptr, 0);
default: default:
return nullptr; return nullptr;
} }
@@ -39,4 +41,6 @@ shared_ptr<ValueType> ValueType::valueTypeForToken(shared_ptr<Token> token) {
ValueTypeKind ValueType::getKind() { ValueTypeKind ValueType::getKind() {
return kind; return kind;
} }
shared_ptr<ValueType> getSubType() {}

View File

@@ -11,22 +11,27 @@ enum class ValueTypeKind {
NONE, NONE,
BOOL, BOOL,
SINT32, SINT32,
REAL32 REAL32,
DATA
}; };
class ValueType { class ValueType {
private: private:
ValueTypeKind kind; ValueTypeKind kind;
shared_ptr<ValueType> subType;
int valueArg;
public: public:
static shared_ptr<ValueType> NONE; static shared_ptr<ValueType> NONE;
static shared_ptr<ValueType> BOOL; static shared_ptr<ValueType> BOOL;
static shared_ptr<ValueType> SINT32; static shared_ptr<ValueType> SINT32;
static shared_ptr<ValueType> REAL32; static shared_ptr<ValueType> REAL32;
static shared_ptr<ValueType> valueTypeForToken(shared_ptr<Token> token); static shared_ptr<ValueType> valueTypeForToken(shared_ptr<Token> token, shared_ptr<ValueType> subType, int valueArg);
ValueType(ValueTypeKind kind); ValueType(ValueTypeKind kind, shared_ptr<ValueType> subType, int valueArg);
ValueTypeKind getKind(); ValueTypeKind getKind();
shared_ptr<ValueType> getSubType();
int getValueArg();
}; };
#endif #endif