More complex type parsing

This commit is contained in:
Rafał Grodziński
2025-07-08 17:31:56 +09:00
parent 18dd7d05d4
commit 9e7747dcbc
10 changed files with 128 additions and 62 deletions

View File

@@ -150,6 +150,10 @@ void ModuleBuilder::buildVarDeclaration(shared_ptr<StatementVariable> statement)
builder->CreateStore(ar, arAlloca);*/
}
void ModuleBuilder::buildArrayDeclaration(shared_ptr<StatementVariable> statement) {
}
void ModuleBuilder::buildAssignment(shared_ptr<StatementAssignment> statement) {
llvm::AllocaInst *alloca = getAlloca(statement->getName());
if (alloca == nullptr)
@@ -251,7 +255,7 @@ llvm::Value *ModuleBuilder::valueForExpression(shared_ptr<Expression> expression
case ExpressionKind::LITERAL:
return valueForLiteral(dynamic_pointer_cast<ExpressionLiteral>(expression));
case ExpressionKind::ARRAY_LITERAL:
return valueForArrayLiteral(dynamic_pointer_cast<ExpressionArrayLiteral>(expression));
return nullptr;// valuesForArrayLiteral(dynamic_pointer_cast<ExpressionArrayLiteral>(expression));
case ExpressionKind::GROUPING:
return valueForExpression(dynamic_pointer_cast<ExpressionGrouping>(expression)->getExpression());
case ExpressionKind::BINARY:
@@ -284,8 +288,12 @@ llvm::Value *ModuleBuilder::valueForLiteral(shared_ptr<ExpressionLiteral> expres
}
}
llvm::Value *ModuleBuilder::valueForArrayLiteral(shared_ptr<ExpressionArrayLiteral> expression) {
return nullptr;
vector<llvm::Value*> ModuleBuilder::valuesForArrayLiteral(shared_ptr<ExpressionArrayLiteral> expression) {
vector<llvm::Value*> values;
for (shared_ptr<Expression> &expression : expression->getExpressions()) {
values.push_back(valueForExpression(expression));
}
return values;
}
llvm::Value *ModuleBuilder::valueForGrouping(shared_ptr<ExpressionGrouping> expression) {

View File

@@ -62,6 +62,7 @@ private:
void buildStatement(shared_ptr<Statement> statement);
void buildFunctionDeclaration(shared_ptr<StatementFunction> statement);
void buildVarDeclaration(shared_ptr<StatementVariable> statement);
void buildArrayDeclaration(shared_ptr<StatementVariable> statement);
void buildAssignment(shared_ptr<StatementAssignment> statement);
void buildBlock(shared_ptr<StatementBlock> statement);
void buildReturn(shared_ptr<StatementReturn> statement);
@@ -71,7 +72,7 @@ private:
llvm::Value *valueForExpression(shared_ptr<Expression> expression);
llvm::Value *valueForLiteral(shared_ptr<ExpressionLiteral> expression);
llvm::Value *valueForArrayLiteral(shared_ptr<ExpressionArrayLiteral> expression);
vector<llvm::Value*> valuesForArrayLiteral(shared_ptr<ExpressionArrayLiteral> expression);
llvm::Value *valueForGrouping(shared_ptr<ExpressionGrouping> expression);
llvm::Value *valueForBinary(shared_ptr<ExpressionBinary> expression);
llvm::Value *valueForBinaryBool(ExpressionBinaryOperation operation, llvm::Value *leftValue, llvm::Value *rightValue);

View File

@@ -13,9 +13,9 @@ vector<shared_ptr<Token>> Lexer::getTokens() {
currentLine = 0;
currentColumn = 0;
tokens.clear();
errors.clear();
vector<shared_ptr<Token>> tokens;
shared_ptr<Token> token;
do {
token = nextToken();
@@ -251,15 +251,7 @@ shared_ptr<Token> Lexer::nextToken() {
return token;
// type
token = match(TokenKind::TYPE, "bool", true);
if (token != nullptr)
return token;
token = match(TokenKind::TYPE, "sint32", true);
if (token != nullptr)
return token;
token = match(TokenKind::TYPE, "real32", true);
token = matchType();
if (token != nullptr)
return token;
@@ -428,6 +420,24 @@ shared_ptr<Token> Lexer::matchIdentifier() {
return token;
}
shared_ptr<Token> Lexer::matchType() {
int nextIndex = currentIndex;
if (tokens.empty() || !tokens.back()->isOfKind({TokenKind::IDENTIFIER, TokenKind::LESS, TokenKind::RIGHT_ARROW}))
return nullptr;
while (nextIndex < source.length() && isIdentifier(nextIndex))
nextIndex++;
if (nextIndex == currentIndex || !isSeparator(nextIndex))
return nullptr;
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
shared_ptr<Token> token = make_shared<Token>(TokenKind::TYPE, lexme, currentLine, currentColumn);
advanceWithToken(token);
return token;
}
shared_ptr<Token> Lexer::matchEnd() {
if (currentIndex >= source.length())
return make_shared<Token>(TokenKind::END, "", currentLine, currentColumn);

View File

@@ -15,6 +15,7 @@ private:
int currentIndex;
int currentLine;
int currentColumn;
vector<shared_ptr<Token>> tokens;
vector<shared_ptr<Error>> errors;
shared_ptr<Token> nextToken();
@@ -24,6 +25,7 @@ private:
shared_ptr<Token> matchIntegerBin();
shared_ptr<Token> matchIntegerChar();
shared_ptr<Token> matchReal();
shared_ptr<Token> matchType();
shared_ptr<Token> matchIdentifier();
shared_ptr<Token> matchEnd();

View File

@@ -140,6 +140,10 @@ string Logger::toString(TokenKind tokenKind) {
return "(";
case TokenKind::RIGHT_PAREN:
return ")";
case TokenKind::LEFT_SQUARE_BRACKET:
return "[";
case TokenKind::RIGHT_SQUARE_BRACKET:
return "]";
case TokenKind::COMMA:
return ",";
case TokenKind::COLON:
@@ -196,6 +200,8 @@ string Logger::toString(shared_ptr<ValueType> valueType) {
return "SINT32";
case ValueTypeKind::REAL32:
return "REAL32";
case ValueTypeKind::DATA:
return "[]";
}
}
@@ -386,6 +392,8 @@ string Logger::toString(shared_ptr<ExpressionLiteral> expression) {
return to_string(expression->getSint32Value());
case ValueTypeKind::REAL32:
return to_string(expression->getReal32Value());
default:
return "?";
}
}

View File

@@ -11,20 +11,20 @@ Expression(ExpressionKind::LITERAL, nullptr) {
switch (token->getKind()) {
case TokenKind::BOOL:
boolValue = token->getLexme().compare("true") == 0;
valueType = ValueType::valueTypeForToken(token);
valueType = ValueType::valueTypeForToken(token, nullptr, 0);
break;
case TokenKind::INTEGER_DEC: {
string numString = token->getLexme();
erase(numString, '_');
sint32Value = stoi(numString, nullptr, 10);
valueType = ValueType::valueTypeForToken(token);
valueType = ValueType::valueTypeForToken(token, nullptr, 0);
break;
}
case TokenKind::INTEGER_HEX: {
string numString = token->getLexme();
erase(numString, '_');
sint32Value = stoi(numString, nullptr, 16);
valueType = ValueType::valueTypeForToken(token);
valueType = ValueType::valueTypeForToken(token, nullptr, 0);
break;
}
case TokenKind::INTEGER_BIN: {
@@ -32,13 +32,13 @@ Expression(ExpressionKind::LITERAL, nullptr) {
erase(numString, '_');
numString = numString.substr(2, numString.size()-1);
sint32Value = stoi(numString, nullptr, 2);
valueType = ValueType::valueTypeForToken(token);
valueType = ValueType::valueTypeForToken(token, nullptr, 0);
break;
}
case TokenKind::INTEGER_CHAR: {
string charString = token->getLexme();
valueType = ValueType::valueTypeForToken(token);
valueType = ValueType::valueTypeForToken(token, nullptr, 0);
if (charString.length() == 3) {
sint32Value = charString[1];
} else if (charString.length() == 4 && charString[1] == '\\') {
@@ -67,7 +67,7 @@ Expression(ExpressionKind::LITERAL, nullptr) {
}
case TokenKind::REAL:
real32Value = stof(token->getLexme());
valueType = ValueType::valueTypeForToken(token);
valueType = ValueType::valueTypeForToken(token, nullptr, 0);
break;
default:
exit(1);

View File

@@ -123,7 +123,7 @@ shared_ptr<Statement> Parser::matchStatementMetaExternFunction() {
}
shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
shared_ptr<Token> argumentTypeToken = tokens.at(currentIndex++);
shared_ptr<ValueType> argumentType = ValueType::valueTypeForToken(argumentTypeToken);
shared_ptr<ValueType> argumentType = matchValueType();
if (argumentType == nullptr) {
markError(TokenKind::TYPE, {});
return nullptr;
@@ -137,8 +137,8 @@ shared_ptr<Statement> Parser::matchStatementMetaExternFunction() {
if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line
shared_ptr<Token> returnTypeToken = tokens.at(currentIndex);
returnType = ValueType::valueTypeForToken(returnTypeToken);
//shared_ptr<Token> returnTypeToken = tokens.at(currentIndex);
returnType = matchValueType();
if (returnType == nullptr) {
markError(TokenKind::TYPE, {});
return nullptr;
@@ -155,21 +155,7 @@ shared_ptr<Statement> Parser::matchStatementVariable() {
return nullptr;
shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
shared_ptr<Token> valueTypeToken = tokens.at(currentIndex);
shared_ptr<ValueType> valueType;
if (valueTypeToken->getLexme().compare("bool") == 0)
valueType = ValueType::BOOL;
else if (valueTypeToken->getLexme().compare("sint32") == 0)
valueType = ValueType::SINT32;
else if (valueTypeToken->getLexme().compare("real32") == 0)
valueType = ValueType::REAL32;
else {
markError(TokenKind::TYPE, {});
return nullptr;
}
currentIndex++; // type
shared_ptr<ValueType> valueType = matchValueType();
// Expect left arrow
if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) {
@@ -207,7 +193,7 @@ shared_ptr<Statement> Parser::matchStatementFunction() {
}
shared_ptr<Token> identifierToken = tokens.at(currentIndex++);
shared_ptr<Token> argumentTypeToken = tokens.at(currentIndex++);
shared_ptr<ValueType> argumentType = ValueType::valueTypeForToken(argumentTypeToken);
shared_ptr<ValueType> argumentType = matchValueType();
if (argumentType == nullptr) {
markError(TokenKind::TYPE, {});
return nullptr;
@@ -221,14 +207,12 @@ shared_ptr<Statement> Parser::matchStatementFunction() {
if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) {
tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line
shared_ptr<Token> returnTypeToken = tokens.at(currentIndex);
returnType = ValueType::valueTypeForToken(returnTypeToken);
//shared_ptr<Token> returnTypeToken = tokens.at(currentIndex);
returnType = matchValueType();
if (returnType == nullptr) {
markError(TokenKind::TYPE, {});
return nullptr;
}
currentIndex++; // type
}
// consume new line
@@ -638,6 +622,46 @@ shared_ptr<Expression> Parser::matchExpressionBlock(vector<TokenKind> terminalTo
return make_shared<ExpressionBlock>(statements);
}
shared_ptr<ValueType> Parser::matchValueType() {
if (!tryMatchingTokenKinds({TokenKind::TYPE}, true, false))
return nullptr;
shared_ptr<Token> typeToken = tokens.at(currentIndex++);
shared_ptr<ValueType> subType;
int valueArg = 0;
if (tryMatchingTokenKinds({TokenKind::LESS}, true, true)) {
if (!tryMatchingTokenKinds({TokenKind::TYPE}, true, false)) {
markError(TokenKind::TYPE, {});
return nullptr;
}
subType = matchValueType();
if (subType == nullptr)
return subType;
if (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) {
if (!tryMatchingTokenKinds({TokenKind::INTEGER_DEC, TokenKind::INTEGER_HEX, TokenKind::INTEGER_BIN, TokenKind::INTEGER_CHAR}, false, false)) {
markError({}, "Expected integer literal");
return nullptr;
}
shared_ptr<Expression> expressionValue = matchExpressionLiteral();
if (expressionValue == nullptr) {
markError({}, "Expected integer literal");
return nullptr;
}
valueArg = dynamic_pointer_cast<ExpressionLiteral>(expressionValue)->getSint32Value();
}
if (!tryMatchingTokenKinds({TokenKind::GREATER}, true, true)) {
markError(TokenKind::GREATER, {});
return nullptr;
}
}
return ValueType::valueTypeForToken(typeToken, subType, valueArg);
}
bool Parser::tryMatchingTokenKinds(vector<TokenKind> kinds, bool shouldMatchAll, bool shouldAdvance) {
int requiredCount = shouldMatchAll ? kinds.size() : 1;
if (currentIndex + requiredCount > tokens.size())

View File

@@ -3,9 +3,11 @@
#include <vector>
class Error;
class Token;
enum class TokenKind;
class Error;
class ValueType;
class Expression;
class Statement;
@@ -47,6 +49,8 @@ private:
shared_ptr<Expression> matchExpressionBinary(shared_ptr<Expression> left);
shared_ptr<Expression> matchExpressionBlock(vector<TokenKind> terminalTokenKinds);
shared_ptr<ValueType> matchValueType();
bool tryMatchingTokenKinds(vector<TokenKind> kinds, bool shouldMatchAll, bool shouldAdvance);
void markError(optional<TokenKind> expectedTokenKind, optional<string> message);

View File

@@ -2,36 +2,38 @@
#include "Lexer/Token.h"
shared_ptr<ValueType> ValueType::NONE = make_shared<ValueType>(ValueTypeKind::NONE);
shared_ptr<ValueType> ValueType::BOOL = make_shared<ValueType>(ValueTypeKind::BOOL);
shared_ptr<ValueType> ValueType::SINT32 = make_shared<ValueType>(ValueTypeKind::SINT32);
shared_ptr<ValueType> ValueType::REAL32 = make_shared<ValueType>(ValueTypeKind::REAL32);
shared_ptr<ValueType> ValueType::NONE = make_shared<ValueType>(ValueTypeKind::NONE, nullptr, 0);
shared_ptr<ValueType> ValueType::BOOL = make_shared<ValueType>(ValueTypeKind::BOOL, nullptr, 0);
shared_ptr<ValueType> ValueType::SINT32 = make_shared<ValueType>(ValueTypeKind::SINT32, nullptr, 0);
shared_ptr<ValueType> ValueType::REAL32 = make_shared<ValueType>(ValueTypeKind::REAL32, nullptr, 0);
ValueType::ValueType(ValueTypeKind kind):
kind(kind) { }
ValueType::ValueType(ValueTypeKind kind, shared_ptr<ValueType> subType, int valueArg):
kind(kind), subType(subType), valueArg(valueArg) { }
shared_ptr<ValueType> ValueType::valueTypeForToken(shared_ptr<Token> token) {
shared_ptr<ValueType> ValueType::valueTypeForToken(shared_ptr<Token> token, shared_ptr<ValueType> subType, int valueArg) {
switch (token->getKind()) {
case TokenKind::TYPE: {
string lexme = token->getLexme();
if (lexme.compare("bool") == 0)
return make_shared<ValueType>(ValueTypeKind::BOOL);
return make_shared<ValueType>(ValueTypeKind::BOOL, subType, valueArg);
else if (lexme.compare("sint32") == 0)
return make_shared<ValueType>(ValueTypeKind::SINT32);
return make_shared<ValueType>(ValueTypeKind::SINT32, subType, valueArg);
else if (lexme.compare("real32") == 0)
return make_shared<ValueType>(ValueTypeKind::REAL32);
return make_shared<ValueType>(ValueTypeKind::REAL32, subType, valueArg);
else if (lexme.compare("data") == 0)
return make_shared<ValueType>(ValueTypeKind::DATA, subType, valueArg);
else
return nullptr;
}
case TokenKind::BOOL:
return make_shared<ValueType>(ValueTypeKind::BOOL);
return make_shared<ValueType>(ValueTypeKind::BOOL, nullptr, 0);
case TokenKind::INTEGER_DEC:
case TokenKind::INTEGER_HEX:
case TokenKind::INTEGER_BIN:
case TokenKind::INTEGER_CHAR:
return make_shared<ValueType>(ValueTypeKind::SINT32);
return make_shared<ValueType>(ValueTypeKind::SINT32, nullptr, 0);
case TokenKind::REAL:
return make_shared<ValueType>(ValueTypeKind::REAL32);
return make_shared<ValueType>(ValueTypeKind::REAL32, nullptr, 0);
default:
return nullptr;
}
@@ -39,4 +41,6 @@ shared_ptr<ValueType> ValueType::valueTypeForToken(shared_ptr<Token> token) {
ValueTypeKind ValueType::getKind() {
return kind;
}
}
shared_ptr<ValueType> getSubType() {}

View File

@@ -11,22 +11,27 @@ enum class ValueTypeKind {
NONE,
BOOL,
SINT32,
REAL32
REAL32,
DATA
};
class ValueType {
private:
ValueTypeKind kind;
shared_ptr<ValueType> subType;
int valueArg;
public:
static shared_ptr<ValueType> NONE;
static shared_ptr<ValueType> BOOL;
static shared_ptr<ValueType> SINT32;
static shared_ptr<ValueType> REAL32;
static shared_ptr<ValueType> valueTypeForToken(shared_ptr<Token> token);
static shared_ptr<ValueType> valueTypeForToken(shared_ptr<Token> token, shared_ptr<ValueType> subType, int valueArg);
ValueType(ValueTypeKind kind);
ValueType(ValueTypeKind kind, shared_ptr<ValueType> subType, int valueArg);
ValueTypeKind getKind();
shared_ptr<ValueType> getSubType();
int getValueArg();
};
#endif