diff --git a/.gitignore b/.gitignore index 0f82e52..94ff068 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ .DS_Store -*.o -brb .vscode/settings.json + +# project build artifacts *.dSYM -build/ \ No newline at end of file +build/ + +# brb build artifiacts +*.o diff --git a/.vscode/launch.json b/.vscode/launch.json index 2dd8039..5574805 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,7 +6,7 @@ "type": "lldb-dap", "request": "launch", "program": "${command:cmake.launchTargetPath}", - "args": ["-v", "${workspaceFolder}/test.brc"], + "args": ["-v", "${workspaceFolder}/samples/hello.brc"], "cwd": "${workspaceFolder}", "internalConsoleOptions": "openOnSessionStart", } diff --git a/samples/hello.brc b/samples/hello.brc new file mode 100644 index 0000000..b8fa9d7 --- /dev/null +++ b/samples/hello.brc @@ -0,0 +1,12 @@ +@extern putchar fun: character sint32 -> sint32 + +main fun -> sint32 + text data <- "Hello, world!\n" + + rep i sint32 <- 0, text[i] != 0: + putchar(text[i]) + i <- i + 1 + ; + + ret 0 +; \ No newline at end of file diff --git a/samples/test.brc b/samples/test.brc new file mode 100644 index 0000000..f71b061 --- /dev/null +++ b/samples/test.brc @@ -0,0 +1,25 @@ +@extern putchar fun: character sint32 -> sint32 + +/* +User type + name data + age u32 + successRatio r32 + isActive bool +; +*/ + +/* +i u32 <- 0, rep text[i] != 0: + putchar(text[i]) + i++ +; +*/ + +// text data <- "Hello world!" + +main fun -> sint32 + text data <- "Hello string!\n" + + ret 0 +; \ No newline at end of file diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index 5777fe5..4e1aaa1 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -6,6 +6,7 @@ #include "Parser/Expression/ExpressionGrouping.h" #include "Parser/Expression/ExpressionLiteral.h" +#include "Parser/Expression/ExpressionArrayLiteral.h" #include "Parser/Expression/ExpressionVariable.h" #include "Parser/Expression/ExpressionCall.h" #include "Parser/Expression/ExpressionIfElse.h" @@ -126,12 +127,30 @@ void ModuleBuilder::buildFunctionDeclaration(shared_ptr state } void ModuleBuilder::buildVarDeclaration(shared_ptr statement) { - llvm::Value *value = valueForExpression(statement->getExpression()); - llvm::AllocaInst *alloca = builder->CreateAlloca(typeForValueType(statement->getValueType()), nullptr, statement->getName()); + if (statement->getValueType()->getKind() == ValueTypeKind::DATA) { + vector values = valuesForExpression(statement->getExpression()); - if (!setAlloca(statement->getName(), alloca)) - return; - builder->CreateStore(value, alloca); + llvm::ArrayType *type = (llvm::ArrayType *)typeForValueType(statement->getValueType(), values.size()); + llvm::AllocaInst *alloca = builder->CreateAlloca(type, nullptr, statement->getName()); + if (!setAlloca(statement->getName(), alloca)) + return; + for (int i=0; i < type->getNumElements(); i++) { + llvm::Value *index[] = { + builder->getInt32(0), + builder->getInt32(i) + }; + llvm::Value *elementPtr = builder->CreateGEP(type, alloca, index, format("{}_{}", statement->getName(), i)); + + builder->CreateStore(values[i], elementPtr); + } + } else { + llvm::Value *value = valueForExpression(statement->getExpression()); + llvm::AllocaInst *alloca = builder->CreateAlloca(typeForValueType(statement->getValueType(), 0), nullptr, statement->getName()); + + if (!setAlloca(statement->getName(), alloca)) + return; + builder->CreateStore(value, alloca); + } } void ModuleBuilder::buildAssignment(shared_ptr statement) { @@ -140,7 +159,20 @@ void ModuleBuilder::buildAssignment(shared_ptr statement) { return; llvm::Value *value = valueForExpression(statement->getExpression()); - builder->CreateStore(value, alloca); + + if (statement->getIndexExpression()) { + llvm::Value *indexValue = valueForExpression(statement->getIndexExpression()); + llvm::Value *index[] = { + builder->getInt32(0), + indexValue + }; + llvm::ArrayType *type = (llvm::ArrayType *)alloca->getAllocatedType(); + llvm::Value *elementPtr = builder->CreateGEP(type, alloca, index, format("{}[]", statement->getName())); + + builder->CreateStore(value, elementPtr); + } else { + builder->CreateStore(value, alloca); + } } void ModuleBuilder::buildBlock(shared_ptr statement) { @@ -250,6 +282,16 @@ llvm::Value *ModuleBuilder::valueForExpression(shared_ptr expression } } +vector ModuleBuilder::valuesForExpression(shared_ptr expression) { + switch (expression->getKind()) { + case ExpressionKind::ARRAY_LITERAL: + return valuesForArrayLiteral(dynamic_pointer_cast(expression)); + default: + markError(0, 0, "Unexpected expression"); + return vector(); + } +} + llvm::Value *ModuleBuilder::valueForLiteral(shared_ptr expression) { if (expression->getValueType() == nullptr) return llvm::UndefValue::get(typeVoid); @@ -266,6 +308,14 @@ llvm::Value *ModuleBuilder::valueForLiteral(shared_ptr expres } } +vector ModuleBuilder::valuesForArrayLiteral(shared_ptr expression) { + vector values; + for (shared_ptr &expression : expression->getExpressions()) { + values.push_back(valueForExpression(expression)); + } + return values; +} + llvm::Value *ModuleBuilder::valueForGrouping(shared_ptr expression) { return valueForExpression(expression->getExpression()); } @@ -274,6 +324,9 @@ llvm::Value *ModuleBuilder::valueForBinary(shared_ptr expressi llvm::Value *leftValue = valueForExpression(expression->getLeft()); llvm::Value *rightValue = valueForExpression(expression->getRight()); + if (leftValue == nullptr || rightValue == nullptr) + return nullptr; + llvm::Type *type = leftValue->getType(); if (type == typeBool) { @@ -411,7 +464,19 @@ llvm::Value *ModuleBuilder::valueForVar(shared_ptr expressio if (alloca == nullptr) return nullptr; - return builder->CreateLoad(alloca->getAllocatedType(), alloca, expression->getName()); + if (expression->getIndexExpression()) { + llvm::Value *indexValue = valueForExpression(expression->getIndexExpression()); + llvm::Value *index[] = { + builder->getInt32(0), + indexValue + }; + llvm::ArrayType *type = (llvm::ArrayType *)alloca->getAllocatedType(); + llvm::Value *elementPtr = builder->CreateGEP(type, alloca, index, format("{}[]", expression->getName())); + + return builder->CreateLoad(type->getArrayElementType(), elementPtr); + } else { + return builder->CreateLoad(alloca->getAllocatedType(), alloca, expression->getName()); + } } llvm::Value *ModuleBuilder::valueForCall(shared_ptr expression) { @@ -475,7 +540,7 @@ llvm::Function* ModuleBuilder::getFun(string name) { return nullptr; } -llvm::Type *ModuleBuilder::typeForValueType(shared_ptr valueType) { +llvm::Type *ModuleBuilder::typeForValueType(shared_ptr valueType, int count) { switch (valueType->getKind()) { case ValueTypeKind::NONE: return typeVoid; @@ -485,6 +550,14 @@ llvm::Type *ModuleBuilder::typeForValueType(shared_ptr valueType) { return typeSint32; case ValueTypeKind::REAL32: return typeReal32; + case ValueTypeKind::DATA: { + if (valueType->getSubType() == nullptr) + return nullptr; + if (valueType->getValueArg() > 0) + count = valueType->getValueArg(); + return llvm::ArrayType::get(typeForValueType(valueType->getSubType(), count), count); + return nullptr; + } } } diff --git a/src/Compiler/ModuleBuilder.h b/src/Compiler/ModuleBuilder.h index ddc70df..783e984 100644 --- a/src/Compiler/ModuleBuilder.h +++ b/src/Compiler/ModuleBuilder.h @@ -17,6 +17,7 @@ class ValueType; class Expression; class ExpressionGrouping; class ExpressionLiteral; +class ExpressionArrayLiteral; class ExpressionVariable; class ExpressionCall; class ExpressionIfElse; @@ -69,7 +70,9 @@ private: void buildExpression(shared_ptr statement); llvm::Value *valueForExpression(shared_ptr expression); + vector valuesForExpression(shared_ptr expression); llvm::Value *valueForLiteral(shared_ptr expression); + vector valuesForArrayLiteral(shared_ptr expression); llvm::Value *valueForGrouping(shared_ptr expression); llvm::Value *valueForBinary(shared_ptr expression); llvm::Value *valueForBinaryBool(ExpressionBinaryOperation operation, llvm::Value *leftValue, llvm::Value *rightValue); @@ -85,7 +88,7 @@ private: bool setFun(string name, llvm::Function *fun); llvm::Function *getFun(string name); - llvm::Type *typeForValueType(shared_ptr valueType); + llvm::Type *typeForValueType(shared_ptr valueType, int count = 0); void markError(int line, int column, string message); diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index 8d4ec3a..c00f19c 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -13,9 +13,9 @@ vector> Lexer::getTokens() { currentLine = 0; currentColumn = 0; + tokens.clear(); errors.clear(); - - vector> tokens; + shared_ptr token; do { token = nextToken(); @@ -126,6 +126,14 @@ shared_ptr Lexer::nextToken() { if (token != nullptr) return token; + token = match(TokenKind::LEFT_SQUARE_BRACKET, "[", false); + if (token != nullptr) + return token; + + token = match(TokenKind::RIGHT_SQUARE_BRACKET, "]", false); + if (token != nullptr) + return token; + token = match(TokenKind::COMMA, ",", false); if (token != nullptr) return token; @@ -242,16 +250,12 @@ shared_ptr Lexer::nextToken() { if (token != nullptr) return token; + token = matchString(); + if (token != nullptr) + return token; + // type - token = match(TokenKind::TYPE, "bool", true); - if (token != nullptr) - return token; - - token = match(TokenKind::TYPE, "sint32", true); - if (token != nullptr) - return token; - - token = match(TokenKind::TYPE, "real32", true); + token = matchType(); if (token != nullptr) return token; @@ -405,6 +409,27 @@ shared_ptr Lexer::matchReal() { return token; } +shared_ptr Lexer::matchString() { + int nextIndex = currentIndex; + + if (currentIndex >= source.size() || source.at(nextIndex) != '\"') + return nullptr; + + bool isClosing = false; + do { + nextIndex++; + isClosing = source.at(nextIndex) == '\"' && source.at(nextIndex - 1) != '\\'; + } while (nextIndex < source.length() && !isClosing); + + if (!isClosing) + return nullptr; + + string lexme = source.substr(currentIndex, nextIndex - currentIndex + 1); + shared_ptr token = make_shared(TokenKind::STRING, lexme, currentLine, currentColumn); + advanceWithToken(token); + return token; +} + shared_ptr Lexer::matchIdentifier() { int nextIndex = currentIndex; @@ -420,6 +445,24 @@ shared_ptr Lexer::matchIdentifier() { return token; } +shared_ptr Lexer::matchType() { + int nextIndex = currentIndex; + + if (tokens.empty() || !tokens.back()->isOfKind({TokenKind::IDENTIFIER, TokenKind::LESS, TokenKind::RIGHT_ARROW})) + return nullptr; + + while (nextIndex < source.length() && isIdentifier(nextIndex)) + nextIndex++; + + if (nextIndex == currentIndex || !isSeparator(nextIndex)) + return nullptr; + + string lexme = source.substr(currentIndex, nextIndex - currentIndex); + shared_ptr token = make_shared(TokenKind::TYPE, lexme, currentLine, currentColumn); + advanceWithToken(token); + return token; +} + shared_ptr Lexer::matchEnd() { if (currentIndex >= source.length()) return make_shared(TokenKind::END, "", currentLine, currentColumn); @@ -472,6 +515,8 @@ bool Lexer::isSeparator(int index) { case '>': case '(': case ')': + case '[': + case ']': case ',': case ':': case ';': diff --git a/src/Lexer/Lexer.h b/src/Lexer/Lexer.h index f385030..9cc8cc6 100644 --- a/src/Lexer/Lexer.h +++ b/src/Lexer/Lexer.h @@ -15,6 +15,7 @@ private: int currentIndex; int currentLine; int currentColumn; + vector> tokens; vector> errors; shared_ptr nextToken(); @@ -24,6 +25,8 @@ private: shared_ptr matchIntegerBin(); shared_ptr matchIntegerChar(); shared_ptr matchReal(); + shared_ptr matchString(); + shared_ptr matchType(); shared_ptr matchIdentifier(); shared_ptr matchEnd(); diff --git a/src/Lexer/Token.cpp b/src/Lexer/Token.cpp index 2d4817d..5fcc1df 100644 --- a/src/Lexer/Token.cpp +++ b/src/Lexer/Token.cpp @@ -41,7 +41,8 @@ vector Token::tokensLiteral = { TokenKind::INTEGER_HEX, TokenKind::INTEGER_BIN, TokenKind::INTEGER_CHAR, - TokenKind::REAL + TokenKind::REAL, + TokenKind::STRING }; Token::Token(TokenKind kind, string lexme, int line, int column): diff --git a/src/Lexer/Token.h b/src/Lexer/Token.h index 1987b11..2c4ce2e 100644 --- a/src/Lexer/Token.h +++ b/src/Lexer/Token.h @@ -21,6 +21,8 @@ enum class TokenKind { LEFT_PAREN, RIGHT_PAREN, + LEFT_SQUARE_BRACKET, + RIGHT_SQUARE_BRACKET, COMMA, COLON, SEMICOLON, @@ -39,6 +41,7 @@ enum class TokenKind { INTEGER_BIN, INTEGER_CHAR, REAL, + STRING, IDENTIFIER, TYPE, diff --git a/src/Logger.cpp b/src/Logger.cpp index b776a35..9055750 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -23,6 +23,7 @@ #include "Parser/Expression/ExpressionVariable.h" #include "Parser/Expression/ExpressionGrouping.h" #include "Parser/Expression/ExpressionLiteral.h" +#include "Parser/Expression/ExpressionArrayLiteral.h" #include "Parser/Expression/ExpressionCall.h" #include "Parser/Expression/ExpressionBlock.h" @@ -56,6 +57,10 @@ string Logger::toString(shared_ptr token) { return "("; case TokenKind::RIGHT_PAREN: return ")"; + case TokenKind::LEFT_SQUARE_BRACKET: + return "["; + case TokenKind::RIGHT_SQUARE_BRACKET: + return "]"; case TokenKind::COMMA: return ","; case TokenKind::COLON: @@ -79,6 +84,8 @@ string Logger::toString(shared_ptr token) { return "INT_CHAR(" + token->getLexme() + ")"; case TokenKind::REAL: return "REAL(" + token->getLexme() + ")"; + case TokenKind::STRING: + return "STRING(" + token->getLexme() + ")"; case TokenKind::IDENTIFIER: return "ID(" + token->getLexme() + ")"; case TokenKind::TYPE: @@ -135,6 +142,10 @@ string Logger::toString(TokenKind tokenKind) { return "("; case TokenKind::RIGHT_PAREN: return ")"; + case TokenKind::LEFT_SQUARE_BRACKET: + return "["; + case TokenKind::RIGHT_SQUARE_BRACKET: + return "]"; case TokenKind::COMMA: return ","; case TokenKind::COLON: @@ -155,6 +166,8 @@ string Logger::toString(TokenKind tokenKind) { return "LITERAL(INTEGER)"; case TokenKind::REAL: return "LITERAL(REAL)"; + case TokenKind::STRING: + return "LITERAL(STRING)"; case TokenKind::IDENTIFIER: return "LITERAL(ID)"; case TokenKind::TYPE: @@ -191,6 +204,8 @@ string Logger::toString(shared_ptr valueType) { return "SINT32"; case ValueTypeKind::REAL32: return "REAL32"; + case ValueTypeKind::DATA: + return "[]"; } } @@ -308,6 +323,8 @@ string Logger::toString(shared_ptr expression) { return toString(dynamic_pointer_cast(expression)); case ExpressionKind::LITERAL: return toString(dynamic_pointer_cast(expression)); + case ExpressionKind::ARRAY_LITERAL: + return toString(dynamic_pointer_cast(expression)); case ExpressionKind::CALL: return toString(dynamic_pointer_cast(expression)); case ExpressionKind::BLOCK: @@ -359,7 +376,11 @@ string Logger::toString(shared_ptr expression) { } string Logger::toString(shared_ptr expression) { - return format("VAR({})", expression->getName()); + string text = format("VAR({}", expression->getName()); + if (expression->getIndexExpression() != nullptr) + text += format("|{}", toString(expression->getIndexExpression())); + text += ")"; + return text; } string Logger::toString(shared_ptr expression) { @@ -379,9 +400,23 @@ string Logger::toString(shared_ptr expression) { return to_string(expression->getSint32Value()); case ValueTypeKind::REAL32: return to_string(expression->getReal32Value()); + default: + return "?"; } } +string Logger::toString(shared_ptr expression) { + string text; + text += "["; + for (int i=0; igetExpressions().size(); i++) { + text += toString(expression->getExpressions().at(i)); + if (i < expression->getExpressions().size() - 1) + text += ", "; + } + text += "]"; + return text; +} + string Logger::toString(shared_ptr expression) { string argsString; for (int i = 0; i < expression->getArgumentExpressions().size(); i++) { diff --git a/src/Logger.h b/src/Logger.h index 0ffcdff..d5f1eba 100644 --- a/src/Logger.h +++ b/src/Logger.h @@ -23,6 +23,7 @@ class ExpressionIfElse; class ExpressionVariable; class ExpressionGrouping; class ExpressionLiteral; +class ExpressionArrayLiteral; class ExpressionCall; class ExpressionBlock; @@ -52,6 +53,7 @@ private: static string toString(shared_ptr expression); static string toString(shared_ptr expression); static string toString(shared_ptr expression); + static string toString(shared_ptr expression); static string toString(shared_ptr expression); static string toString(shared_ptr expression); diff --git a/src/Parser/Expression/Expression.h b/src/Parser/Expression/Expression.h index 249f606..66bd170 100644 --- a/src/Parser/Expression/Expression.h +++ b/src/Parser/Expression/Expression.h @@ -10,6 +10,7 @@ using namespace std; enum class ExpressionKind { LITERAL, + ARRAY_LITERAL, GROUPING, BINARY, IF_ELSE, diff --git a/src/Parser/Expression/ExpressionArrayLiteral.cpp b/src/Parser/Expression/ExpressionArrayLiteral.cpp new file mode 100644 index 0000000..da5e9df --- /dev/null +++ b/src/Parser/Expression/ExpressionArrayLiteral.cpp @@ -0,0 +1,47 @@ +#include "ExpressionArrayLiteral.h" + +#include "Lexer/Token.h" +#include "Parser/Expression/ExpressionLiteral.h" + +shared_ptr ExpressionArrayLiteral::expressionArrayLiteralForExpressions(vector> expressions) { + shared_ptr expression = make_shared(); + expression->expressions = expressions; + return expression; +} + +shared_ptr ExpressionArrayLiteral::expressionArrayLiteralForTokenString(shared_ptr tokenString) { + if (tokenString->getKind() != TokenKind::STRING) + return nullptr; + + shared_ptr expression = make_shared(); + + vector> expressions; + string stringValue = tokenString->getLexme(); + for (int i=1; i token = make_shared(TokenKind::INTEGER_CHAR, lexme, tokenString->getLine(), tokenString->getColumn() + i); + shared_ptr expression = ExpressionLiteral::expressionLiteralForToken(token); + expressions.push_back(expression); + + // add terminal 0 if missing + if (i == stringValue.length() - 2 && lexme.compare("\\0") != 0) { + shared_ptr token = make_shared(TokenKind::INTEGER_CHAR, "\\0", tokenString->getLine(), tokenString->getColumn() + i + lexme.length()); + shared_ptr expression = ExpressionLiteral::expressionLiteralForToken(token); + expressions.push_back(expression); + } + } + + expression->expressions = expressions; + return expression; +} + +ExpressionArrayLiteral::ExpressionArrayLiteral(): +Expression(ExpressionKind::ARRAY_LITERAL, nullptr) { } + +vector> ExpressionArrayLiteral::getExpressions() { + return expressions; +} \ No newline at end of file diff --git a/src/Parser/Expression/ExpressionArrayLiteral.h b/src/Parser/Expression/ExpressionArrayLiteral.h new file mode 100644 index 0000000..3f74e32 --- /dev/null +++ b/src/Parser/Expression/ExpressionArrayLiteral.h @@ -0,0 +1,18 @@ +#ifndef EXPRESSION_ARRAY_LITERAL_H +#define EXPRESSION_ARRAY_LITERAL_H + +#include "Expression.h" + +class ExpressionArrayLiteral: public Expression { +private: + vector> expressions; + +public: + static shared_ptr expressionArrayLiteralForExpressions(vector> expressions); + static shared_ptr expressionArrayLiteralForTokenString(shared_ptr tokenString); + + ExpressionArrayLiteral(); + vector> getExpressions(); +}; + +#endif \ No newline at end of file diff --git a/src/Parser/Expression/ExpressionLiteral.cpp b/src/Parser/Expression/ExpressionLiteral.cpp index 3bd506f..648046e 100644 --- a/src/Parser/Expression/ExpressionLiteral.cpp +++ b/src/Parser/Expression/ExpressionLiteral.cpp @@ -1,79 +1,63 @@ #include "ExpressionLiteral.h" +#include "Utils.h" #include "Lexer/Token.h" #include "Parser/ValueType.h" -ExpressionLiteral::ExpressionLiteral(): -Expression(ExpressionKind::LITERAL, nullptr) { } +shared_ptr ExpressionLiteral::expressionLiteralForToken(shared_ptr token) { + shared_ptr expression = make_shared(); -ExpressionLiteral::ExpressionLiteral(shared_ptr token): -Expression(ExpressionKind::LITERAL, nullptr) { switch (token->getKind()) { case TokenKind::BOOL: - boolValue = token->getLexme().compare("true") == 0; - valueType = ValueType::valueTypeForToken(token); + expression->boolValue = token->getLexme().compare("true") == 0; + expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; case TokenKind::INTEGER_DEC: { string numString = token->getLexme(); erase(numString, '_'); - sint32Value = stoi(numString, nullptr, 10); - valueType = ValueType::valueTypeForToken(token); + expression->sint32Value = stoi(numString, nullptr, 10); + expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; } case TokenKind::INTEGER_HEX: { string numString = token->getLexme(); erase(numString, '_'); - sint32Value = stoi(numString, nullptr, 16); - valueType = ValueType::valueTypeForToken(token); + expression->sint32Value = stoi(numString, nullptr, 16); + expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; } case TokenKind::INTEGER_BIN: { string numString = token->getLexme(); erase(numString, '_'); numString = numString.substr(2, numString.size()-1); - sint32Value = stoi(numString, nullptr, 2); - valueType = ValueType::valueTypeForToken(token); + expression->sint32Value = stoi(numString, nullptr, 2); + expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; } case TokenKind::INTEGER_CHAR: { string charString = token->getLexme(); - - valueType = ValueType::valueTypeForToken(token); - if (charString.length() == 3) { - sint32Value = charString[1]; - } else if (charString.length() == 4 && charString[1] == '\\') { - switch (charString[2]) { - case 'b': - sint32Value = '\b'; - break; - case 'n': - sint32Value = '\n'; - break; - case 't': - sint32Value = '\t'; - break; - case '\\': - sint32Value = '\\'; - break; - case '\'': - sint32Value = '\''; - break; - case '\"': - sint32Value = '\"'; - break; - } - } - break; + optional charValue = Utils::charStringToInt(charString); + if (!charValue) + return nullptr; + + expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); + expression->sint32Value = *charValue; + return expression; } case TokenKind::REAL: - real32Value = stof(token->getLexme()); - valueType = ValueType::valueTypeForToken(token); + expression->real32Value = stof(token->getLexme()); + expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; default: - exit(1); + return nullptr; } + + return expression; } +ExpressionLiteral::ExpressionLiteral(): +Expression(ExpressionKind::LITERAL, nullptr) { } + bool ExpressionLiteral::getBoolValue() { return boolValue; } diff --git a/src/Parser/Expression/ExpressionLiteral.h b/src/Parser/Expression/ExpressionLiteral.h index 0a9dfb2..89b545e 100644 --- a/src/Parser/Expression/ExpressionLiteral.h +++ b/src/Parser/Expression/ExpressionLiteral.h @@ -1,4 +1,7 @@ -#include "Parser/Expression/Expression.h" +#ifndef EXPRESSION_LITERAL_H +#define EXPRESSION_LITERAL_H + +#include "Expression.h" class ExpressionLiteral: public Expression { private: @@ -7,9 +10,12 @@ private: float real32Value; public: - ExpressionLiteral(shared_ptr token); + static shared_ptr expressionLiteralForToken(shared_ptr token); + ExpressionLiteral(); bool getBoolValue(); int32_t getSint32Value(); float getReal32Value(); -}; \ No newline at end of file +}; + +#endif \ No newline at end of file diff --git a/src/Parser/Expression/ExpressionVariable.cpp b/src/Parser/Expression/ExpressionVariable.cpp index b75cddf..61a03e2 100644 --- a/src/Parser/Expression/ExpressionVariable.cpp +++ b/src/Parser/Expression/ExpressionVariable.cpp @@ -1,8 +1,12 @@ #include "ExpressionVariable.h" -ExpressionVariable::ExpressionVariable(string name): -Expression(ExpressionKind::VAR, nullptr), name(name) { } +ExpressionVariable::ExpressionVariable(string name, shared_ptr indexExpression): +Expression(ExpressionKind::VAR, nullptr), name(name), indexExpression(indexExpression) { } string ExpressionVariable::getName() { return name; } + +shared_ptr ExpressionVariable::getIndexExpression() { + return indexExpression; +} diff --git a/src/Parser/Expression/ExpressionVariable.h b/src/Parser/Expression/ExpressionVariable.h index bd0b99f..5835d9a 100644 --- a/src/Parser/Expression/ExpressionVariable.h +++ b/src/Parser/Expression/ExpressionVariable.h @@ -3,8 +3,10 @@ class ExpressionVariable: public Expression { private: string name; + shared_ptr indexExpression; public: - ExpressionVariable(string name); + ExpressionVariable(string name, shared_ptr indexExpression); string getName(); + shared_ptr getIndexExpression(); }; \ No newline at end of file diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 979b15c..c8922ed 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -8,6 +8,7 @@ #include "Parser/Expression/ExpressionGrouping.h" #include "Parser/Expression/ExpressionLiteral.h" +#include "Parser/Expression/ExpressionArrayLiteral.h" #include "Parser/Expression/ExpressionVariable.h" #include "Parser/Expression/ExpressionCall.h" #include "Parser/Expression/ExpressionIfElse.h" @@ -121,8 +122,8 @@ shared_ptr Parser::matchStatementMetaExternFunction() { return nullptr; } shared_ptr identifierToken = tokens.at(currentIndex++); - shared_ptr argumentTypeToken = tokens.at(currentIndex++); - shared_ptr argumentType = ValueType::valueTypeForToken(argumentTypeToken); + //shared_ptr argumentTypeToken = tokens.at(currentIndex++); + shared_ptr argumentType = matchValueType(); if (argumentType == nullptr) { markError(TokenKind::TYPE, {}); return nullptr; @@ -136,14 +137,12 @@ shared_ptr Parser::matchStatementMetaExternFunction() { if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - shared_ptr returnTypeToken = tokens.at(currentIndex); - returnType = ValueType::valueTypeForToken(returnTypeToken); + //shared_ptr returnTypeToken = tokens.at(currentIndex); + returnType = matchValueType(); if (returnType == nullptr) { markError(TokenKind::TYPE, {}); return nullptr; } - - currentIndex++; // type } return make_shared(identifierToken->getLexme(), arguments, returnType); @@ -154,21 +153,7 @@ shared_ptr Parser::matchStatementVariable() { return nullptr; shared_ptr identifierToken = tokens.at(currentIndex++); - shared_ptr valueTypeToken = tokens.at(currentIndex); - - shared_ptr valueType; - if (valueTypeToken->getLexme().compare("bool") == 0) - valueType = ValueType::BOOL; - else if (valueTypeToken->getLexme().compare("sint32") == 0) - valueType = ValueType::SINT32; - else if (valueTypeToken->getLexme().compare("real32") == 0) - valueType = ValueType::REAL32; - else { - markError(TokenKind::TYPE, {}); - return nullptr; - } - - currentIndex++; // type + shared_ptr valueType = matchValueType(); // Expect left arrow if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) { @@ -205,8 +190,7 @@ shared_ptr Parser::matchStatementFunction() { return nullptr; } shared_ptr identifierToken = tokens.at(currentIndex++); - shared_ptr argumentTypeToken = tokens.at(currentIndex++); - shared_ptr argumentType = ValueType::valueTypeForToken(argumentTypeToken); + shared_ptr argumentType = matchValueType(); if (argumentType == nullptr) { markError(TokenKind::TYPE, {}); return nullptr; @@ -220,14 +204,11 @@ shared_ptr Parser::matchStatementFunction() { if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - shared_ptr returnTypeToken = tokens.at(currentIndex); - returnType = ValueType::valueTypeForToken(returnTypeToken); + returnType = matchValueType(); if (returnType == nullptr) { markError(TokenKind::TYPE, {}); return nullptr; } - - currentIndex++; // type } // consume new line @@ -261,27 +242,43 @@ shared_ptr Parser::matchStatementBlock(vector terminalToke break; // except new line - if (statement != nullptr && !tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) { + if (statement != nullptr && !tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) markError(TokenKind::NEW_LINE, {}); - return nullptr; - } } return make_shared(statements); } shared_ptr Parser::matchStatementAssignment() { - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::LEFT_ARROW}, true, false)) + int startIndex = currentIndex; + + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER}, true, false)) return nullptr; - shared_ptr identifierToken = tokens.at(currentIndex++); - currentIndex++; // arrow + shared_ptr indexExpression; + + if (tryMatchingTokenKinds({TokenKind::LEFT_SQUARE_BRACKET}, true, true)) { + indexExpression = nextExpression(); + if (indexExpression == nullptr) + return nullptr; + + if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { + markError(TokenKind::RIGHT_SQUARE_BRACKET, {}); + return nullptr; + } + } + + // assignment requires left arrow, otherwise abort + if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) { + currentIndex = startIndex; + return nullptr; + } shared_ptr expression = nextExpression(); if (expression == nullptr) return nullptr; - return make_shared(identifierToken->getLexme(), expression); + return make_shared(identifierToken->getLexme(), indexExpression, expression); } shared_ptr Parser::matchStatementReturn() { @@ -313,7 +310,7 @@ shared_ptr Parser::matchStatementRepeat() { // got initial, expect comma if (initStatement != nullptr && !tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) { markError(TokenKind::COMMA, {}); - return nullptr; + goto afterIf; } // optional new line @@ -326,7 +323,7 @@ shared_ptr Parser::matchStatementRepeat() { // got pre-condition, expect comma if (!tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) { markError(TokenKind::COMMA, {}); - return nullptr; + goto afterIf; } // optional new line @@ -338,10 +335,11 @@ shared_ptr Parser::matchStatementRepeat() { // expect colon if (!tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { markError(TokenKind::COLON, {}); - return nullptr; + goto afterIf; } } } + afterIf: isMultiLine = tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); @@ -437,28 +435,32 @@ shared_ptr Parser::matchFactor() { shared_ptr Parser::matchPrimary() { shared_ptr expression; + int errorsCount = errors.size(); expression = matchExpressionGrouping(); - if (expression != nullptr) + if (expression != nullptr || errors.size() > errorsCount) return expression; + expression = matchExpressionArrayLiteral(); + if (expression != nullptr || errors.size() > errorsCount) + return expression; + expression = matchExpressionLiteral(); - if (expression != nullptr) + if (expression != nullptr || errors.size() > errorsCount) return expression; expression = matchExpressionCall(); - if (expression != nullptr) + if (expression != nullptr || errors.size() > errorsCount) return expression; expression = matchExpressionVariable(); - if (expression != nullptr) + if (expression != nullptr || errors.size() > errorsCount) return expression; return nullptr; } shared_ptr Parser::matchExpressionGrouping() { - shared_ptr token = tokens.at(currentIndex); if (tryMatchingTokenKinds({TokenKind::LEFT_PAREN}, true, true)) { shared_ptr expression = matchTerm(); // has grouped expression failed? @@ -478,18 +480,53 @@ shared_ptr Parser::matchExpressionLiteral() { shared_ptr token = tokens.at(currentIndex); if (tryMatchingTokenKinds(Token::tokensLiteral, false, true)) - return make_shared(token); + return ExpressionLiteral::expressionLiteralForToken(token); + + return nullptr; +} + +shared_ptr Parser::matchExpressionArrayLiteral() { + if (tryMatchingTokenKinds({TokenKind::STRING}, true, false)) { + return ExpressionArrayLiteral::expressionArrayLiteralForTokenString(tokens.at(currentIndex++)); + } else if (tryMatchingTokenKinds({TokenKind::LEFT_SQUARE_BRACKET}, true, true)) { + vector> expressions; + if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { + do { + shared_ptr expression = nextExpression(); + if (expression != nullptr) + expressions.push_back(expression); + } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); + + if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { + markError(TokenKind::RIGHT_SQUARE_BRACKET, {}); + return nullptr; + } + } + + return ExpressionArrayLiteral::expressionArrayLiteralForExpressions(expressions); + } return nullptr; } shared_ptr Parser::matchExpressionVariable() { - shared_ptr token = tokens.at(currentIndex); + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER}, true, false)) + return nullptr; + shared_ptr idToken = tokens.at(currentIndex++); + shared_ptr indexExpression; - if (tryMatchingTokenKinds({TokenKind::IDENTIFIER}, true, true)) - return make_shared(token->getLexme()); + if (tryMatchingTokenKinds({TokenKind::LEFT_SQUARE_BRACKET}, true, true)) { + indexExpression = nextExpression(); + if (indexExpression == nullptr) + return nullptr; + + if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { + markError(TokenKind::RIGHT_SQUARE_BRACKET, {}); + return nullptr; + } + } - return nullptr; + return make_shared(idToken->getLexme(), indexExpression); } shared_ptr Parser::matchExpressionCall() { @@ -614,6 +651,46 @@ shared_ptr Parser::matchExpressionBlock(vector terminalTo return make_shared(statements); } +shared_ptr Parser::matchValueType() { + if (!tryMatchingTokenKinds({TokenKind::TYPE}, true, false)) + return nullptr; + shared_ptr typeToken = tokens.at(currentIndex++); + shared_ptr subType; + int valueArg = 0; + + if (tryMatchingTokenKinds({TokenKind::LESS}, true, true)) { + if (!tryMatchingTokenKinds({TokenKind::TYPE}, true, false)) { + markError(TokenKind::TYPE, {}); + return nullptr; + } + subType = matchValueType(); + if (subType == nullptr) + return subType; + + if (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) { + if (!tryMatchingTokenKinds({TokenKind::INTEGER_DEC, TokenKind::INTEGER_HEX, TokenKind::INTEGER_BIN, TokenKind::INTEGER_CHAR}, false, false)) { + markError({}, "Expected integer literal"); + return nullptr; + } + shared_ptr expressionValue = matchExpressionLiteral(); + if (expressionValue == nullptr) { + markError({}, "Expected integer literal"); + return nullptr; + } + + valueArg = dynamic_pointer_cast(expressionValue)->getSint32Value(); + } + + + if (!tryMatchingTokenKinds({TokenKind::GREATER}, true, true)) { + markError(TokenKind::GREATER, {}); + return nullptr; + } + } + + return ValueType::valueTypeForToken(typeToken, subType, valueArg); +} + bool Parser::tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance) { int requiredCount = shouldMatchAll ? kinds.size() : 1; if (currentIndex + requiredCount > tokens.size()) @@ -655,5 +732,9 @@ void Parser::markError(optional expectedTokenKind, optional m while (!tryMatchingTokenKinds(safeKinds, false, true)) currentIndex++; + // Last END should not be consumed + if (currentIndex > tokens.size() - 1) + currentIndex = tokens.size() - 1; + errors.push_back(Error::parserError(actualToken, expectedTokenKind, message)); } diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index 79dddee..952a087 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -3,9 +3,11 @@ #include +class Error; + class Token; enum class TokenKind; -class Error; +class ValueType; class Expression; class Statement; @@ -40,12 +42,15 @@ private: shared_ptr matchExpressionGrouping(); shared_ptr matchExpressionLiteral(); + shared_ptr matchExpressionArrayLiteral(); shared_ptr matchExpressionVariable(); shared_ptr matchExpressionCall(); shared_ptr matchExpressionIfElse(); shared_ptr matchExpressionBinary(shared_ptr left); shared_ptr matchExpressionBlock(vector terminalTokenKinds); + shared_ptr matchValueType(); + bool tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance); void markError(optional expectedTokenKind, optional message); diff --git a/src/Parser/Statement/StatementAssignment.cpp b/src/Parser/Statement/StatementAssignment.cpp index 24fcff6..ae0d24f 100644 --- a/src/Parser/Statement/StatementAssignment.cpp +++ b/src/Parser/Statement/StatementAssignment.cpp @@ -1,12 +1,16 @@ #include "StatementAssignment.h" -StatementAssignment::StatementAssignment(string name, shared_ptr expression): -Statement(StatementKind::ASSIGNMENT), name(name), expression(expression) { } +StatementAssignment::StatementAssignment(string name, shared_ptr indexExpression, shared_ptr expression): +Statement(StatementKind::ASSIGNMENT), name(name), indexExpression(indexExpression), expression(expression) { } string StatementAssignment::getName() { return name; } +shared_ptr StatementAssignment::getIndexExpression() { + return indexExpression; +} + shared_ptr StatementAssignment::getExpression() { return expression; } \ No newline at end of file diff --git a/src/Parser/Statement/StatementAssignment.h b/src/Parser/Statement/StatementAssignment.h index 0641e3f..0bb7f63 100644 --- a/src/Parser/Statement/StatementAssignment.h +++ b/src/Parser/Statement/StatementAssignment.h @@ -5,10 +5,12 @@ class Expression; class StatementAssignment: public Statement { private: string name; + shared_ptr indexExpression; shared_ptr expression; public: - StatementAssignment(string name, shared_ptr expression); + StatementAssignment(string name, shared_ptr indexExpressio, shared_ptr expression); string getName(); + shared_ptr getIndexExpression(); shared_ptr getExpression(); }; \ No newline at end of file diff --git a/src/Parser/ValueType.cpp b/src/Parser/ValueType.cpp index abd399f..6f5945d 100644 --- a/src/Parser/ValueType.cpp +++ b/src/Parser/ValueType.cpp @@ -2,36 +2,38 @@ #include "Lexer/Token.h" -shared_ptr ValueType::NONE = make_shared(ValueTypeKind::NONE); -shared_ptr ValueType::BOOL = make_shared(ValueTypeKind::BOOL); -shared_ptr ValueType::SINT32 = make_shared(ValueTypeKind::SINT32); -shared_ptr ValueType::REAL32 = make_shared(ValueTypeKind::REAL32); +shared_ptr ValueType::NONE = make_shared(ValueTypeKind::NONE, nullptr, 0); +shared_ptr ValueType::BOOL = make_shared(ValueTypeKind::BOOL, nullptr, 0); +shared_ptr ValueType::SINT32 = make_shared(ValueTypeKind::SINT32, nullptr, 0); +shared_ptr ValueType::REAL32 = make_shared(ValueTypeKind::REAL32, nullptr, 0); -ValueType::ValueType(ValueTypeKind kind): -kind(kind) { } +ValueType::ValueType(ValueTypeKind kind, shared_ptr subType, int valueArg): +kind(kind), subType(subType), valueArg(valueArg) { } -shared_ptr ValueType::valueTypeForToken(shared_ptr token) { +shared_ptr ValueType::valueTypeForToken(shared_ptr token, shared_ptr subType, int valueArg) { switch (token->getKind()) { case TokenKind::TYPE: { string lexme = token->getLexme(); if (lexme.compare("bool") == 0) - return make_shared(ValueTypeKind::BOOL); + return make_shared(ValueTypeKind::BOOL, subType, valueArg); else if (lexme.compare("sint32") == 0) - return make_shared(ValueTypeKind::SINT32); + return make_shared(ValueTypeKind::SINT32, subType, valueArg); else if (lexme.compare("real32") == 0) - return make_shared(ValueTypeKind::REAL32); + return make_shared(ValueTypeKind::REAL32, subType, valueArg); + else if (lexme.compare("data") == 0) + return make_shared(ValueTypeKind::DATA, subType, valueArg); else return nullptr; } case TokenKind::BOOL: - return make_shared(ValueTypeKind::BOOL); + return make_shared(ValueTypeKind::BOOL, nullptr, 0); case TokenKind::INTEGER_DEC: case TokenKind::INTEGER_HEX: case TokenKind::INTEGER_BIN: case TokenKind::INTEGER_CHAR: - return make_shared(ValueTypeKind::SINT32); + return make_shared(ValueTypeKind::SINT32, nullptr, 0); case TokenKind::REAL: - return make_shared(ValueTypeKind::REAL32); + return make_shared(ValueTypeKind::REAL32, nullptr, 0); default: return nullptr; } @@ -39,4 +41,12 @@ shared_ptr ValueType::valueTypeForToken(shared_ptr token) { ValueTypeKind ValueType::getKind() { return kind; +} + +shared_ptr ValueType::getSubType() { + return subType; +} + +int ValueType::getValueArg() { + return valueArg; } \ No newline at end of file diff --git a/src/Parser/ValueType.h b/src/Parser/ValueType.h index 2540cd8..ce118eb 100644 --- a/src/Parser/ValueType.h +++ b/src/Parser/ValueType.h @@ -11,22 +11,27 @@ enum class ValueTypeKind { NONE, BOOL, SINT32, - REAL32 + REAL32, + DATA }; class ValueType { private: ValueTypeKind kind; + shared_ptr subType; + int valueArg; public: static shared_ptr NONE; static shared_ptr BOOL; static shared_ptr SINT32; static shared_ptr REAL32; - static shared_ptr valueTypeForToken(shared_ptr token); + static shared_ptr valueTypeForToken(shared_ptr token, shared_ptr subType, int valueArg); - ValueType(ValueTypeKind kind); + ValueType(ValueTypeKind kind, shared_ptr subType, int valueArg); ValueTypeKind getKind(); + shared_ptr getSubType(); + int getValueArg(); }; #endif \ No newline at end of file diff --git a/src/Utils.cpp b/src/Utils.cpp new file mode 100644 index 0000000..d3ccdbd --- /dev/null +++ b/src/Utils.cpp @@ -0,0 +1,36 @@ +#include "Utils.h" + +optional Utils::charStringToInt(string charString) { + switch (charString.length()) { + case 1: + return charString[0]; + case 3: + return charString[1]; + case 4: + charString[0] = charString[1]; + charString[1] = charString[2]; + case 2: + if (charString[0] != '\\') + return {}; + switch (charString[1]) { + case 'b': + return '\b'; + case 'n': + return '\n'; + case 't': + return '\t'; + case '\\': + return '\\'; + case '\'': + return '\''; + case '\"': + return '\"'; + case '0': + return '\0'; + default: + return {}; + } + default: + return {}; + } +} \ No newline at end of file diff --git a/src/Utils.h b/src/Utils.h new file mode 100644 index 0000000..076808d --- /dev/null +++ b/src/Utils.h @@ -0,0 +1,13 @@ +#ifndef UTILS_H +#define UTILS_H + +#include + +using namespace std; + +class Utils { +public: + static optional charStringToInt(string charString); +}; + +#endif \ No newline at end of file