diff --git a/.gitignore b/.gitignore index 94ff068..b46f74d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +# ingnore files without extension +* +!*.* +!*/ + .DS_Store .vscode/settings.json @@ -7,3 +12,4 @@ build/ # brb build artifiacts *.o +*.asm \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json index 5574805..4b91f3e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,7 +6,7 @@ "type": "lldb-dap", "request": "launch", "program": "${command:cmake.launchTargetPath}", - "args": ["-v", "${workspaceFolder}/samples/hello.brc"], + "args": ["-v", "${workspaceFolder}/samples/test.brc"], "cwd": "${workspaceFolder}", "internalConsoleOptions": "openOnSessionStart", } diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 0fc81a3..99f9dd4 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -3,12 +3,17 @@ "tasks": [ { "label": "Build Bits Runner Builder (Debug)", - "type": "cmake", - "command": "build", + "type": "shell", + "command": "cmake -B build && cmake --build build --config Debug", "group": { "kind": "build", "isDefault": true } + }, + { + "label": "Clean", + "type": "shell", + "command": "rm -rf build *.o *.asm; find ./ -perm +100 -type f -maxdepth 1 -delete" } ] } \ No newline at end of file diff --git a/samples/fib.brc b/samples/fib.brc index 11fd516..9835c85 100644 --- a/samples/fib.brc +++ b/samples/fib.brc @@ -1,6 +1,6 @@ -@extern putchar fun: character sint32 -> sint32 +@extern putchar fun: character u32 -> u32 -fib fun: number sint32 -> sint32 +fib fun: number u32 -> u32 ret if number < 2: number else @@ -8,13 +8,13 @@ fib fun: number sint32 -> sint32 ; ; -printNum fun: number sint32 - biggest sint32 <- 10 +printNum fun: number u32 + biggest u32 <- 10 rep biggest <= number: biggest <- biggest * 10 biggest <- biggest / 10 rep biggest > 0: - digit sint32 <- number / biggest + digit u32 <- number / biggest putchar(digit + '0') number <- number % biggest biggest <- biggest / 10 @@ -22,9 +22,9 @@ printNum fun: number sint32 ; // Print first 20 fibonaci numbers -main fun -> sint32 - rep i sint32 <- 0, i < 20: - res sint32 <- fib(i) +main fun -> u32 + rep i u32 <- 0, i < 20: + res u32 <- fib(i) printNum(res) putchar('\n') i <- i + 1 diff --git a/samples/hello.brc b/samples/hello.brc index b8fa9d7..e93c560 100644 --- a/samples/hello.brc +++ b/samples/hello.brc @@ -1,9 +1,9 @@ -@extern putchar fun: character sint32 -> sint32 +@extern putchar fun: character u32 -> u32 -main fun -> sint32 - text data <- "Hello, world!\n" +main fun -> u32 + text data <- "Hello, world!\n" - rep i sint32 <- 0, text[i] != 0: + rep i u32 <- 0, text[i] != 0: putchar(text[i]) i <- i + 1 ; diff --git a/samples/test.brc b/samples/test.brc index f71b061..baa3e8f 100644 --- a/samples/test.brc +++ b/samples/test.brc @@ -1,4 +1,6 @@ -@extern putchar fun: character sint32 -> sint32 +//@extern putchar fun: character sint32 -> sint32 + +// ./build/brb samples/test.brc -S -x86-asm-syntax=intel /* User type @@ -18,8 +20,42 @@ i u32 <- 0, rep text[i] != 0: // text data <- "Hello world!" -main fun -> sint32 - text data <- "Hello string!\n" +/*addStuff asm<"+r, r">: num1 u32, num2 u32 -> u32 + add $1, $0 +;*/ + +/*normAdd fun: num1 sint32, num2 sint32 -> sint32 + ret num1 + num2 +;*/ + +/*rawAdd raw<"=r,r,r">: num1 sint32, num2 sint32 -> sint32 + add $1, $2 + mov $0, $1 +;*/ + +/*rawAdd raw: num1 sint32, num2 sint32 -> sint32 + add $1, $2 + mov $0, $1 +;*/ + +/*printChar raw + .global REGISTER + .text + .REGISTER: + .byte "Hello", 0xa0 + .long RegisterTable + //push 0x21 + +;*/ + +main fun -> u32 + num1 u8 <- 42 + num2 s8 <- 3 - +15 + num3 u32 <- 1234123 + num4 s32 <- -345345 + num5 r32 <- -42.58 + + num5 + num3 ret 0 ; \ No newline at end of file diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index 4e1aaa1..4fdd0d8 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -11,9 +11,11 @@ #include "Parser/Expression/ExpressionCall.h" #include "Parser/Expression/ExpressionIfElse.h" #include "Parser/Expression/ExpressionBinary.h" +#include "Parser/Expression/ExpressionUnary.h" #include "Parser/Expression/ExpressionBlock.h" #include "Parser/Statement/StatementFunction.h" +#include "Parser/Statement/StatementRawFunction.h" #include "Parser/Statement/StatementVariable.h" #include "Parser/Statement/StatementAssignment.h" #include "Parser/Statement/StatementReturn.h" @@ -31,8 +33,11 @@ moduleName(moduleName), sourceFileName(sourceFileName), statements(statements) { typeVoid = llvm::Type::getVoidTy(*context); typeBool = llvm::Type::getInt1Ty(*context); - typeSint32 = llvm::Type::getInt32Ty(*context); - typeReal32 = llvm::Type::getFloatTy(*context); + typeU8 = llvm::Type::getInt8Ty(*context); + typeU32 = llvm::Type::getInt32Ty(*context); + typeS8 = llvm::Type::getInt8Ty(*context); + typeS32 = llvm::Type::getInt32Ty(*context); + typeR32 = llvm::Type::getFloatTy(*context); } shared_ptr ModuleBuilder::getModule() { @@ -40,6 +45,12 @@ shared_ptr ModuleBuilder::getModule() { for (shared_ptr &statement : statements) buildStatement(statement); + // verify module + string errorMessage; + llvm::raw_string_ostream llvmErrorMessage(errorMessage); + if (llvm::verifyModule(*module, &llvmErrorMessage)) + markError(0, 0, errorMessage); + if (!errors.empty()) { for (shared_ptr &error : errors) Logger::print(error); @@ -52,7 +63,10 @@ shared_ptr ModuleBuilder::getModule() { void ModuleBuilder::buildStatement(shared_ptr statement) { switch (statement->getKind()) { case StatementKind::FUNCTION: - buildFunctionDeclaration(dynamic_pointer_cast(statement)); + buildFunction(dynamic_pointer_cast(statement)); + break; + case StatementKind::RAW_FUNCTION: + buildRawFunction(dynamic_pointer_cast(statement)); break; case StatementKind::VARIABLE: buildVarDeclaration(dynamic_pointer_cast(statement)); @@ -80,15 +94,21 @@ void ModuleBuilder::buildStatement(shared_ptr statement) { } } -void ModuleBuilder::buildFunctionDeclaration(shared_ptr statement) { - // get argument types - vector types; +void ModuleBuilder::buildFunction(shared_ptr statement) { + // function types + llvm::Type *returnType = typeForValueType(statement->getReturnValueType()); + if (returnType == nullptr) + return; + vector argTypes; for (pair> &arg : statement->getArguments()) { - types.push_back(typeForValueType(arg.second)); + llvm::Type *argType = typeForValueType(arg.second); + if (argType == nullptr) + return; + argTypes.push_back(argType); } // build function declaration - llvm::FunctionType *funType = llvm::FunctionType::get(typeForValueType(statement->getReturnValueType()), types, false); + llvm::FunctionType *funType = llvm::FunctionType::get(returnType, argTypes, false); llvm::Function *fun = llvm::Function::Create(funType, llvm::GlobalValue::ExternalLinkage, statement->getName(), module.get()); if (!setFun(statement->getName(), fun)) return; @@ -103,7 +123,7 @@ void ModuleBuilder::buildFunctionDeclaration(shared_ptr state int i=0; for (auto &arg : fun->args()) { string name = statement->getArguments()[i].first; - llvm::Type *type = types[i]; + llvm::Type *type = argTypes[i]; arg.setName(name); llvm::AllocaInst *alloca = builder->CreateAlloca(type, nullptr, name); @@ -126,6 +146,20 @@ void ModuleBuilder::buildFunctionDeclaration(shared_ptr state markError(0, 0, errorMessage); } +void ModuleBuilder::buildRawFunction(shared_ptr statement) { + // function types + llvm::Type *returnType = typeForValueType(statement->getReturnValueType()); + vector argTypes; + for (pair> &arg : statement->getArguments()) + argTypes.push_back(typeForValueType(arg.second)); + + // build function declaration & body + llvm::FunctionType *funType = llvm::FunctionType::get(returnType, argTypes, false); + llvm::InlineAsm *rawFun = llvm::InlineAsm::get(funType, statement->getRawSource(), statement->getConstraints(), true, false, llvm::InlineAsm::AsmDialect::AD_Intel); + if (!setRawFun(statement->getName(), rawFun)) + return; +} + void ModuleBuilder::buildVarDeclaration(shared_ptr statement) { if (statement->getValueType()->getKind() == ValueTypeKind::DATA) { vector values = valuesForExpression(statement->getExpression()); @@ -145,6 +179,8 @@ void ModuleBuilder::buildVarDeclaration(shared_ptr statement) } } else { llvm::Value *value = valueForExpression(statement->getExpression()); + if (value == nullptr) + return; llvm::AllocaInst *alloca = builder->CreateAlloca(typeForValueType(statement->getValueType(), 0), nullptr, statement->getName()); if (!setAlloca(statement->getName(), alloca)) @@ -244,7 +280,10 @@ void ModuleBuilder::buildMetaExternFunction(shared_ptrgetReturnValueType()), types, false); + llvm::Type *returnType = typeForValueType(statement->getReturnValueType()); + if (returnType == nullptr) + return; + llvm::FunctionType *funType = llvm::FunctionType::get(returnType, types, false); llvm::Function *fun = llvm::Function::Create(funType, llvm::GlobalValue::ExternalLinkage, statement->getName(), module.get()); if (!setFun(statement->getName(), fun)) return; @@ -270,6 +309,8 @@ llvm::Value *ModuleBuilder::valueForExpression(shared_ptr expression return valueForExpression(dynamic_pointer_cast(expression)->getExpression()); case ExpressionKind::BINARY: return valueForBinary(dynamic_pointer_cast(expression)); + case ExpressionKind::UNARY: + return valueForUnary(dynamic_pointer_cast(expression)); case ExpressionKind::IF_ELSE: return valueForIfElse(dynamic_pointer_cast(expression)); case ExpressionKind::VAR: @@ -301,10 +342,16 @@ llvm::Value *ModuleBuilder::valueForLiteral(shared_ptr expres return llvm::UndefValue::get(typeVoid); case ValueTypeKind::BOOL: return llvm::ConstantInt::get(typeBool, expression->getBoolValue(), true); - case ValueTypeKind::SINT32: - return llvm::ConstantInt::get(typeSint32, expression->getSint32Value(), true); - case ValueTypeKind::REAL32: - return llvm::ConstantInt::get(typeReal32, expression->getReal32Value(), true); + case ValueTypeKind::U8: + return llvm::ConstantInt::get(typeU8, expression->getU8Value(), true); + case ValueTypeKind::U32: + return llvm::ConstantInt::get(typeU32, expression->getU32Value(), true); + case ValueTypeKind::S8: + return llvm::ConstantInt::get(typeS8, expression->getS8Value(), true); + case ValueTypeKind::S32: + return llvm::ConstantInt::get(typeS32, expression->getS32Value(), true); + case ValueTypeKind::R32: + return llvm::ConstantFP::get(typeR32, expression->getR32Value()); } } @@ -331,9 +378,11 @@ llvm::Value *ModuleBuilder::valueForBinary(shared_ptr expressi if (type == typeBool) { return valueForBinaryBool(expression->getOperation(), leftValue, rightValue); - } else if (type == typeSint32 || type == typeVoid) { - return valueForBinaryInteger(expression->getOperation(), leftValue, rightValue); - } else if (type == typeReal32) { + } else if (type == typeU8 || type == typeU32) { + return valueForBinaryUnsignedInteger(expression->getOperation(), leftValue, rightValue); + } else if (type == typeS8 || type == typeS32) { + return valueForBinarySignedInteger(expression->getOperation(), leftValue, rightValue); + } else if (type == typeR32) { return valueForBinaryReal(expression->getOperation(), leftValue, rightValue); } @@ -348,35 +397,62 @@ llvm::Value *ModuleBuilder::valueForBinaryBool(ExpressionBinaryOperation operati case ExpressionBinaryOperation::NOT_EQUAL: return builder->CreateICmpNE(leftValue, rightValue); default: - markError(0, 0, "Unexpecgted operation for boolean operands"); + markError(0, 0, "Unexpected operation for boolean operands"); return nullptr; } } -llvm::Value *ModuleBuilder::valueForBinaryInteger(ExpressionBinaryOperation operation, llvm::Value *leftValue, llvm::Value *rightValue) { +llvm::Value *ModuleBuilder::valueForBinaryUnsignedInteger(ExpressionBinaryOperation operation, llvm::Value *leftValue, llvm::Value *rightValue) { switch (operation) { - case ExpressionBinaryOperation::EQUAL: - return builder->CreateICmpEQ(leftValue, rightValue); - case ExpressionBinaryOperation::NOT_EQUAL: - return builder->CreateICmpNE(leftValue, rightValue); - case ExpressionBinaryOperation::LESS: - return builder->CreateICmpSLT(leftValue, rightValue); - case ExpressionBinaryOperation::LESS_EQUAL: - return builder->CreateICmpSLE(leftValue, rightValue); - case ExpressionBinaryOperation::GREATER: - return builder->CreateICmpSGT(leftValue, rightValue); - case ExpressionBinaryOperation::GREATER_EQUAL: - return builder->CreateICmpSGE(leftValue, rightValue); - case ExpressionBinaryOperation::ADD: - return builder->CreateNSWAdd(leftValue, rightValue); - case ExpressionBinaryOperation::SUB: - return builder->CreateNSWSub(leftValue, rightValue); - case ExpressionBinaryOperation::MUL: - return builder->CreateNSWMul(leftValue, rightValue); - case ExpressionBinaryOperation::DIV: - return builder->CreateSDiv(leftValue, rightValue); - case ExpressionBinaryOperation::MOD: - return builder->CreateSRem(leftValue, rightValue); + case ExpressionBinaryOperation::EQUAL: + return builder->CreateICmpEQ(leftValue, rightValue); + case ExpressionBinaryOperation::NOT_EQUAL: + return builder->CreateICmpNE(leftValue, rightValue); + case ExpressionBinaryOperation::LESS: + return builder->CreateICmpSLT(leftValue, rightValue); + case ExpressionBinaryOperation::LESS_EQUAL: + return builder->CreateICmpSLE(leftValue, rightValue); + case ExpressionBinaryOperation::GREATER: + return builder->CreateICmpSGT(leftValue, rightValue); + case ExpressionBinaryOperation::GREATER_EQUAL: + return builder->CreateICmpSGE(leftValue, rightValue); + case ExpressionBinaryOperation::ADD: + return builder->CreateNUWAdd(leftValue, rightValue); + case ExpressionBinaryOperation::SUB: + return builder->CreateNUWSub(leftValue, rightValue); + case ExpressionBinaryOperation::MUL: + return builder->CreateNUWMul(leftValue, rightValue); + case ExpressionBinaryOperation::DIV: + return builder->CreateUDiv(leftValue, rightValue); + case ExpressionBinaryOperation::MOD: + return builder->CreateURem(leftValue, rightValue); + } +} + +llvm::Value *ModuleBuilder::valueForBinarySignedInteger(ExpressionBinaryOperation operation, llvm::Value *leftValue, llvm::Value *rightValue) { + switch (operation) { + case ExpressionBinaryOperation::EQUAL: + return builder->CreateICmpEQ(leftValue, rightValue); + case ExpressionBinaryOperation::NOT_EQUAL: + return builder->CreateICmpNE(leftValue, rightValue); + case ExpressionBinaryOperation::LESS: + return builder->CreateICmpSLT(leftValue, rightValue); + case ExpressionBinaryOperation::LESS_EQUAL: + return builder->CreateICmpSLE(leftValue, rightValue); + case ExpressionBinaryOperation::GREATER: + return builder->CreateICmpSGT(leftValue, rightValue); + case ExpressionBinaryOperation::GREATER_EQUAL: + return builder->CreateICmpSGE(leftValue, rightValue); + case ExpressionBinaryOperation::ADD: + return builder->CreateNSWAdd(leftValue, rightValue); + case ExpressionBinaryOperation::SUB: + return builder->CreateNSWSub(leftValue, rightValue); + case ExpressionBinaryOperation::MUL: + return builder->CreateNSWMul(leftValue, rightValue); + case ExpressionBinaryOperation::DIV: + return builder->CreateSDiv(leftValue, rightValue); + case ExpressionBinaryOperation::MOD: + return builder->CreateSRem(leftValue, rightValue); } } @@ -407,6 +483,26 @@ llvm::Value *ModuleBuilder::valueForBinaryReal(ExpressionBinaryOperation operati } } +llvm::Value *ModuleBuilder::valueForUnary(shared_ptr expression) { + llvm::Value *value = valueForExpression(expression->getExpression()); + llvm::Type *type = value->getType(); + + // do nothing for plus + if (expression->getOperation() == ExpressionUnaryOperation::PLUS) + return value; + + if (type == typeU8 || type == typeU32) { + return builder->CreateNeg(value); + } else if (type == typeS8 || type == typeS32) { + return builder->CreateNSWNeg(value); + } else if (type == typeR32) { + return builder->CreateFNeg(value); + } + + markError(0, 0, "Unexpected operation"); + return nullptr; +} + llvm::Value *ModuleBuilder::valueForIfElse(shared_ptr expression) { shared_ptr conditionExpression = expression->getCondition(); @@ -481,15 +577,28 @@ llvm::Value *ModuleBuilder::valueForVar(shared_ptr expressio llvm::Value *ModuleBuilder::valueForCall(shared_ptr expression) { llvm::Function *fun = getFun(expression->getName()); - if (fun == nullptr) - return nullptr; - llvm::FunctionType *funType = fun->getFunctionType(); - vector argValues; - for (shared_ptr &argumentExpression : expression->getArgumentExpressions()) { - llvm::Value *argValue = valueForExpression(argumentExpression); - argValues.push_back(argValue); + if (fun != nullptr) { + llvm::FunctionType *funType = fun->getFunctionType(); + vector argValues; + for (shared_ptr &argumentExpression : expression->getArgumentExpressions()) { + llvm::Value *argValue = valueForExpression(argumentExpression); + argValues.push_back(argValue); + } + return builder->CreateCall(funType, fun, llvm::ArrayRef(argValues)); } - return builder->CreateCall(funType, fun, llvm::ArrayRef(argValues)); + + llvm::InlineAsm *rawFun = getRawFun(expression->getName()); + if (rawFun != nullptr) { + vectorargValues; + for (shared_ptr &argumentExpression : expression->getArgumentExpressions()) { + llvm::Value *argValue = valueForExpression(argumentExpression); + argValues.push_back(argValue); + } + return builder->CreateCall(rawFun, llvm::ArrayRef(argValues)); + } + + markError(0, 0, format("Function \"{}\" not defined in scope", expression->getName())); + return nullptr; } bool ModuleBuilder::setAlloca(string name, llvm::AllocaInst *alloca) { @@ -518,7 +627,7 @@ llvm::AllocaInst* ModuleBuilder::getAlloca(string name) { bool ModuleBuilder::setFun(string name, llvm::Function *fun) { if (scopes.top().funMap[name] != nullptr) { - markError(0, 0, format("Function \"{}\" already defined", name)); + markError(0, 0, format("Function \"{}\" already defined in scope", name)); return false; } @@ -536,27 +645,59 @@ llvm::Function* ModuleBuilder::getFun(string name) { scopes.pop(); } - markError(0, 0, format("Function \"{}\" not defined in scope", name)); + return nullptr; +} + +bool ModuleBuilder::setRawFun(string name, llvm::InlineAsm *rawFun) { + if (scopes.top().rawFunMap[name] != nullptr) { + markError(0, 0, format("Raw function \"{}\" already defined in scope", name)); + return false; + } + + scopes.top().rawFunMap[name] = rawFun; + return true; +} + +llvm::InlineAsm *ModuleBuilder::getRawFun(string name) { + stack scopes = this->scopes; + + while (!scopes.empty()) { + llvm::InlineAsm *rawFun = scopes.top().rawFunMap[name]; + if (rawFun != nullptr) + return rawFun; + scopes.pop(); + } + return nullptr; } llvm::Type *ModuleBuilder::typeForValueType(shared_ptr valueType, int count) { + if (valueType == nullptr) { + markError(0, 0, "Missing type"); + return nullptr; + } + switch (valueType->getKind()) { case ValueTypeKind::NONE: return typeVoid; case ValueTypeKind::BOOL: return typeBool; - case ValueTypeKind::SINT32: - return typeSint32; - case ValueTypeKind::REAL32: - return typeReal32; + case ValueTypeKind::U8: + return typeU8; + case ValueTypeKind::U32: + return typeU32; + case ValueTypeKind::S8: + return typeS8; + case ValueTypeKind::S32: + return typeS32; + case ValueTypeKind::R32: + return typeR32; case ValueTypeKind::DATA: { if (valueType->getSubType() == nullptr) return nullptr; if (valueType->getValueArg() > 0) count = valueType->getValueArg(); return llvm::ArrayType::get(typeForValueType(valueType->getSubType(), count), count); - return nullptr; } } } diff --git a/src/Compiler/ModuleBuilder.h b/src/Compiler/ModuleBuilder.h index 783e984..872b4ad 100644 --- a/src/Compiler/ModuleBuilder.h +++ b/src/Compiler/ModuleBuilder.h @@ -10,6 +10,7 @@ #include #include #include +#include class Error; class ValueType; @@ -22,10 +23,12 @@ class ExpressionVariable; class ExpressionCall; class ExpressionIfElse; class ExpressionBinary; +class ExpressionUnary; enum class ExpressionBinaryOperation; class Statement; class StatementFunction; +class StatementRawFunction; class StatementVariable; class StatementAssignment; class StatementReturn; @@ -39,6 +42,7 @@ using namespace std; typedef struct { map allocaMap; map funMap; + map rawFunMap; } Scope; class ModuleBuilder { @@ -53,14 +57,18 @@ private: llvm::Type *typeVoid; llvm::Type *typeBool; - llvm::IntegerType *typeSint32; - llvm::Type *typeReal32; + llvm::IntegerType *typeU8; + llvm::IntegerType *typeU32; + llvm::IntegerType *typeS8; + llvm::IntegerType *typeS32; + llvm::Type *typeR32; vector> statements; stack scopes; void buildStatement(shared_ptr statement); - void buildFunctionDeclaration(shared_ptr statement); + void buildFunction(shared_ptr statement); + void buildRawFunction(shared_ptr statement); void buildVarDeclaration(shared_ptr statement); void buildAssignment(shared_ptr statement); void buildBlock(shared_ptr statement); @@ -76,8 +84,10 @@ private: llvm::Value *valueForGrouping(shared_ptr expression); llvm::Value *valueForBinary(shared_ptr expression); llvm::Value *valueForBinaryBool(ExpressionBinaryOperation operation, llvm::Value *leftValue, llvm::Value *rightValue); - llvm::Value *valueForBinaryInteger(ExpressionBinaryOperation operation, llvm::Value *leftValue, llvm::Value *rightValue); + llvm::Value *valueForBinaryUnsignedInteger(ExpressionBinaryOperation operation, llvm::Value *leftValue, llvm::Value *rightValue); + llvm::Value *valueForBinarySignedInteger(ExpressionBinaryOperation operation, llvm::Value *leftValue, llvm::Value *rightValue); llvm::Value *valueForBinaryReal(ExpressionBinaryOperation operation, llvm::Value *leftValue, llvm::Value *rightValue); + llvm::Value *valueForUnary(shared_ptr expression); llvm::Value *valueForIfElse(shared_ptr expression); llvm::Value *valueForVar(shared_ptr expression); llvm::Value *valueForCall(shared_ptr expression); @@ -88,6 +98,9 @@ private: bool setFun(string name, llvm::Function *fun); llvm::Function *getFun(string name); + bool setRawFun(string name, llvm::InlineAsm *rawFun); + llvm::InlineAsm *getRawFun(string name); + llvm::Type *typeForValueType(shared_ptr valueType, int count = 0); void markError(int line, int column, string message); diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index c00f19c..0706245 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -12,6 +12,8 @@ vector> Lexer::getTokens() { currentIndex = 0; currentLine = 0; currentColumn = 0; + foundRawSourceStart = false; + isParsingRawSource = false; tokens.clear(); errors.clear(); @@ -117,6 +119,11 @@ shared_ptr Lexer::nextToken() { return nextToken(); // gets rid of remaining white spaces without repeating the code } + // raw source + token = matchRawSourceLine(); + if (token != nullptr) + return token; + // structural token = match(TokenKind::LEFT_PAREN, "(", false); if (token != nullptr) @@ -212,6 +219,12 @@ shared_ptr Lexer::nextToken() { token = match(TokenKind::FUNCTION, "fun", true); if (token != nullptr) return token; + + token = match(TokenKind::RAW_FUNCTION, "raw", true); + if (token != nullptr) { + foundRawSourceStart = true; + return token; + } token = match(TokenKind::RETURN, "ret", true); if (token != nullptr) @@ -271,8 +284,10 @@ shared_ptr Lexer::nextToken() { // new line token = match(TokenKind::NEW_LINE, "\n", false); - if (token != nullptr) + if (token != nullptr) { + tryStartingRawSourceParsing(); return token; + } // eof token = matchEnd(); @@ -430,21 +445,6 @@ shared_ptr Lexer::matchString() { return token; } -shared_ptr Lexer::matchIdentifier() { - int nextIndex = currentIndex; - - while (nextIndex < source.length() && isIdentifier(nextIndex)) - nextIndex++; - - if (nextIndex == currentIndex || !isSeparator(nextIndex)) - return nullptr; - - string lexme = source.substr(currentIndex, nextIndex - currentIndex); - shared_ptr token = make_shared(TokenKind::IDENTIFIER, lexme, currentLine, currentColumn); - advanceWithToken(token); - return token; -} - shared_ptr Lexer::matchType() { int nextIndex = currentIndex; @@ -463,6 +463,52 @@ shared_ptr Lexer::matchType() { return token; } +shared_ptr Lexer::matchIdentifier() { + int nextIndex = currentIndex; + + while (nextIndex < source.length() && isIdentifier(nextIndex)) + nextIndex++; + + if (nextIndex == currentIndex || !isSeparator(nextIndex)) + return nullptr; + + string lexme = source.substr(currentIndex, nextIndex - currentIndex); + shared_ptr token = make_shared(TokenKind::IDENTIFIER, lexme, currentLine, currentColumn); + advanceWithToken(token); + return token; +} + +void Lexer::tryStartingRawSourceParsing() { + if (!foundRawSourceStart) + return; + + if (!tokens.at(tokens.size() - 1)->isOfKind({TokenKind::COLON, TokenKind::COMMA, TokenKind::RIGHT_ARROW})) { + foundRawSourceStart = false; + isParsingRawSource = true; + } +} + +shared_ptr Lexer::matchRawSourceLine() { + int nextIndex = currentIndex; + + if (!isParsingRawSource) + return nullptr; + + if (source.at(nextIndex) == ';') { + isParsingRawSource = false; + return nullptr; + } + + while (source.at(nextIndex) != '\n') + nextIndex++; + + string lexme = source.substr(currentIndex, nextIndex - currentIndex); + shared_ptr token = make_shared(TokenKind::RAW_SOURCE_LINE, lexme, currentLine, currentColumn); + advanceWithToken(token); + currentIndex++; // skip newline + return token; +} + shared_ptr Lexer::matchEnd() { if (currentIndex >= source.length()) return make_shared(TokenKind::END, "", currentLine, currentColumn); @@ -530,11 +576,15 @@ bool Lexer::isSeparator(int index) { } void Lexer::advanceWithToken(shared_ptr token) { - if (token->getKind() == TokenKind::NEW_LINE) { - currentLine++; - currentColumn = 0; - } else { - currentColumn += token->getLexme().length(); + switch (token->getKind()) { + case TokenKind::NEW_LINE: + case TokenKind::RAW_SOURCE_LINE: + currentLine++; + currentColumn = 0; + break; + default: + currentColumn += token->getLexme().length(); + break; } currentIndex += token->getLexme().length(); } diff --git a/src/Lexer/Lexer.h b/src/Lexer/Lexer.h index 9cc8cc6..805ad50 100644 --- a/src/Lexer/Lexer.h +++ b/src/Lexer/Lexer.h @@ -17,6 +17,8 @@ private: int currentColumn; vector> tokens; vector> errors; + bool foundRawSourceStart; + bool isParsingRawSource; shared_ptr nextToken(); shared_ptr match(TokenKind kind, string lexme, bool needsSeparator); @@ -28,6 +30,8 @@ private: shared_ptr matchString(); shared_ptr matchType(); shared_ptr matchIdentifier(); + void tryStartingRawSourceParsing(); + shared_ptr matchRawSourceLine(); shared_ptr matchEnd(); bool isWhiteSpace(int index); diff --git a/src/Lexer/Token.cpp b/src/Lexer/Token.cpp index 5fcc1df..2adddfe 100644 --- a/src/Lexer/Token.cpp +++ b/src/Lexer/Token.cpp @@ -4,21 +4,30 @@ vector Token::tokensEquality = { TokenKind::EQUAL, TokenKind::NOT_EQUAL }; + vector Token::tokensComparison = { TokenKind::LESS, TokenKind::LESS_EQUAL, TokenKind::GREATER, TokenKind::GREATER_EQUAL }; + vector Token::tokensTerm = { TokenKind::PLUS, TokenKind::MINUS }; + vector Token::tokensFactor = { TokenKind::STAR, TokenKind::SLASH, TokenKind::PERCENT }; + +vector Token::tokensUnary = { + TokenKind::PLUS, + TokenKind::MINUS +}; + vector Token::tokensBinary = { TokenKind::EQUAL, TokenKind::NOT_EQUAL, @@ -35,6 +44,7 @@ vector Token::tokensBinary = { TokenKind::SLASH, TokenKind::PERCENT }; + vector Token::tokensLiteral = { TokenKind::BOOL, TokenKind::INTEGER_DEC, diff --git a/src/Lexer/Token.h b/src/Lexer/Token.h index 2c4ce2e..4b1ab57 100644 --- a/src/Lexer/Token.h +++ b/src/Lexer/Token.h @@ -30,6 +30,8 @@ enum class TokenKind { RIGHT_ARROW, FUNCTION, + RAW_FUNCTION, + RAW_SOURCE_LINE, RETURN, REPEAT, IF, @@ -63,6 +65,7 @@ public: static vector tokensComparison; static vector tokensTerm; static vector tokensFactor; + static vector tokensUnary; static vector tokensBinary; static vector tokensLiteral; diff --git a/src/Logger.cpp b/src/Logger.cpp index 9055750..d5ca83d 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -11,6 +11,7 @@ #include "Parser/Statement/StatementMetaExternFunction.h" #include "Parser/Statement/StatementVariable.h" #include "Parser/Statement/StatementFunction.h" +#include "Parser/Statement/StatementRawFunction.h" #include "Parser/Statement/StatementBlock.h" #include "Parser/Statement/StatementAssignment.h" #include "Parser/Statement/StatementReturn.h" @@ -19,6 +20,7 @@ #include "Parser/Expression/Expression.h" #include "Parser/Expression/ExpressionBinary.h" +#include "Parser/Expression/ExpressionUnary.h" #include "Parser/Expression/ExpressionIfElse.h" #include "Parser/Expression/ExpressionVariable.h" #include "Parser/Expression/ExpressionGrouping.h" @@ -97,6 +99,10 @@ string Logger::toString(shared_ptr token) { return "ELSE"; case TokenKind::FUNCTION: return "FUN"; + case TokenKind::RAW_FUNCTION: + return "RAW"; + case TokenKind::RAW_SOURCE_LINE: + return format("RAW_SOURCE_LINE({})", token->getLexme()); case TokenKind::RETURN: return "RET"; case TokenKind::REPEAT: @@ -179,6 +185,8 @@ string Logger::toString(TokenKind tokenKind) { return "ELSE"; case TokenKind::FUNCTION: return "FUN"; + case TokenKind::RAW_FUNCTION: + return "RAW"; case TokenKind::RETURN: return "RET"; case TokenKind::REPEAT: @@ -195,15 +203,24 @@ string Logger::toString(TokenKind tokenKind) { } string Logger::toString(shared_ptr valueType) { + if (valueType == nullptr) + return "{INVALID}"; + switch (valueType->getKind()) { case ValueTypeKind::NONE: return "NONE"; case ValueTypeKind::BOOL: return "BOOL"; - case ValueTypeKind::SINT32: - return "SINT32"; - case ValueTypeKind::REAL32: - return "REAL32"; + case ValueTypeKind::U8: + return "U8"; + case ValueTypeKind::U32: + return "U32"; + case ValueTypeKind::S8: + return "S8"; + case ValueTypeKind::S32: + return "S32"; + case ValueTypeKind::R32: + return "R32"; case ValueTypeKind::DATA: return "[]"; } @@ -217,6 +234,8 @@ string Logger::toString(shared_ptr statement) { return toString(dynamic_pointer_cast(statement)); case StatementKind::FUNCTION: return toString(dynamic_pointer_cast(statement)); + case StatementKind::RAW_FUNCTION: + return toString(dynamic_pointer_cast(statement)); case StatementKind::BLOCK: return toString(dynamic_pointer_cast(statement)); case StatementKind::ASSIGNMENT: @@ -262,6 +281,21 @@ string Logger::toString(shared_ptr statement) { return text; } +string Logger::toString(shared_ptr statement) { + string text; + + string argsString; + for (int i = 0; i < statement->getArguments().size(); i++) { + auto arg = statement->getArguments().at(i); + argsString += format("ARG({}, {})", arg.first, toString(arg.second)); + } + text += format("RAW(\"{}\"|{}|{}):\n", statement->getName(), argsString, toString(statement->getReturnValueType())); + + text += statement->getRawSource(); + + return text; +} + string Logger::toString(shared_ptr statement) { string text; @@ -314,7 +348,9 @@ string Logger::toString(shared_ptr statement) { string Logger::toString(shared_ptr expression) { switch (expression->getKind()) { case ExpressionKind::BINARY: - return toString(dynamic_pointer_cast(expression)); + return toString(dynamic_pointer_cast(expression)); + case ExpressionKind::UNARY: + return toString(dynamic_pointer_cast(expression)); case ExpressionKind::IF_ELSE: return toString(dynamic_pointer_cast(expression)); case ExpressionKind::VAR: @@ -361,6 +397,17 @@ string Logger::toString(shared_ptr expression) { } } +string Logger::toString(shared_ptr expression) { + switch (expression->getOperation()) { + case ExpressionUnaryOperation::PLUS: + return "+" + toString(expression->getExpression()); + case ExpressionUnaryOperation::MINUS: + return "-" + toString(expression->getExpression()); + case ExpressionUnaryOperation::INVALID: + return "{INVALID}"; + } +} + string Logger::toString(shared_ptr expression) { string text; @@ -396,10 +443,16 @@ string Logger::toString(shared_ptr expression) { return "NONE"; case ValueTypeKind::BOOL: return expression->getBoolValue() ? "true" : "false"; - case ValueTypeKind::SINT32: - return to_string(expression->getSint32Value()); - case ValueTypeKind::REAL32: - return to_string(expression->getReal32Value()); + case ValueTypeKind::U8: + return to_string(expression->getU8Value()); + case ValueTypeKind::U32: + return to_string(expression->getU32Value()); + case ValueTypeKind::S8: + return to_string(expression->getS8Value()); + case ValueTypeKind::S32: + return to_string(expression->getS32Value()); + case ValueTypeKind::R32: + return to_string(expression->getR32Value()); default: return "?"; } @@ -458,7 +511,7 @@ void Logger::print(shared_ptr error) { switch (error->getKind()) { case ErrorKind::LEXER_ERROR: { string lexme = error->getLexme() ? *(error->getLexme()) : ""; - message = format("Unexpected token \"{}\" at line: {}, column: {}", lexme, error->getLine() + 1, error->getColumn() + 1); + message = format("At line {}, column {}: Unexpected token \"{}\"", error->getLine() + 1, error->getColumn() + 1, lexme); break; } case ErrorKind::PARSER_ERROR: { @@ -468,13 +521,13 @@ void Logger::print(shared_ptr error) { if (expectedTokenKind) { message = format( - "Expected token {} but instead found \"{}\" at line: {}, column: {}", - toString(*expectedTokenKind), token->getLexme(), token->getLine() + 1, token->getColumn() + 1 + "At line {}, column {}: Expected token {} but found {} instead", + token->getLine() + 1, token->getColumn() + 1, toString(*expectedTokenKind), toString(token) ); } else { message = format( - "Unexpected token \"{}\" found at line: {}, column: {}", - token->getLexme(), token->getLine() + 1, token->getColumn() + 1 + "At line {}, column {}: Unexpected token \"{}\" found", + token->getLine() + 1, token->getColumn() + 1, toString(token) ); } if (errorMessage) @@ -483,7 +536,7 @@ void Logger::print(shared_ptr error) { } case ErrorKind::BUILDER_ERROR: { string errorMessage = error->getMessage() ? *(error->getMessage()) : ""; - message = format("Error at line {}, column {}: {}", error->getLine(), error->getColumn(), errorMessage); + message = format("At line {}, column {}: {}", error->getLine(), error->getColumn(), errorMessage); break; } } diff --git a/src/Logger.h b/src/Logger.h index d5f1eba..72e6746 100644 --- a/src/Logger.h +++ b/src/Logger.h @@ -11,6 +11,7 @@ class Statement; class StatementMetaExternFunction; class StatementVariable; class StatementFunction; +class StatementRawFunction; class StatementBlock; class StatementAssignment; class StatementReturn; @@ -19,6 +20,7 @@ class StatementExpression; class Expression; class ExpressionBinary; +class ExpressionUnary; class ExpressionIfElse; class ExpressionVariable; class ExpressionGrouping; @@ -41,6 +43,7 @@ private: static string toString(shared_ptr statement); static string toString(shared_ptr statement); static string toString(shared_ptr statement); + static string toString(shared_ptr statement); static string toString(shared_ptr statement); static string toString(shared_ptr statement); static string toString(shared_ptr statement); @@ -49,6 +52,7 @@ private: static string toString(shared_ptr expression); static string toString(shared_ptr expression); + static string toString(shared_ptr expression); static string toString(shared_ptr expression); static string toString(shared_ptr expression); static string toString(shared_ptr expression); diff --git a/src/Parser/Expression/Expression.h b/src/Parser/Expression/Expression.h index 66bd170..a335052 100644 --- a/src/Parser/Expression/Expression.h +++ b/src/Parser/Expression/Expression.h @@ -12,6 +12,7 @@ enum class ExpressionKind { LITERAL, ARRAY_LITERAL, GROUPING, + UNARY, BINARY, IF_ELSE, VAR, diff --git a/src/Parser/Expression/ExpressionBinary.h b/src/Parser/Expression/ExpressionBinary.h index 0c5bb4c..56b31fa 100644 --- a/src/Parser/Expression/ExpressionBinary.h +++ b/src/Parser/Expression/ExpressionBinary.h @@ -1,4 +1,7 @@ -#include "Parser/Expression/Expression.h" +#ifndef EXPRESSION_BINARY_H +#define EXPRESSION_BINARY_H + +#include "Expression.h" enum class ExpressionBinaryOperation { EQUAL, @@ -26,4 +29,6 @@ public: ExpressionBinaryOperation getOperation(); shared_ptr getLeft(); shared_ptr getRight(); -}; \ No newline at end of file +}; + +#endif \ No newline at end of file diff --git a/src/Parser/Expression/ExpressionLiteral.cpp b/src/Parser/Expression/ExpressionLiteral.cpp index 648046e..4675d40 100644 --- a/src/Parser/Expression/ExpressionLiteral.cpp +++ b/src/Parser/Expression/ExpressionLiteral.cpp @@ -15,14 +15,14 @@ shared_ptr ExpressionLiteral::expressionLiteralForToken(share case TokenKind::INTEGER_DEC: { string numString = token->getLexme(); erase(numString, '_'); - expression->sint32Value = stoi(numString, nullptr, 10); + expression->s32Value = stoi(numString, nullptr, 10); expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; } case TokenKind::INTEGER_HEX: { string numString = token->getLexme(); erase(numString, '_'); - expression->sint32Value = stoi(numString, nullptr, 16); + expression->u32Value = stoul(numString, nullptr, 16); expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; } @@ -30,7 +30,7 @@ shared_ptr ExpressionLiteral::expressionLiteralForToken(share string numString = token->getLexme(); erase(numString, '_'); numString = numString.substr(2, numString.size()-1); - expression->sint32Value = stoi(numString, nullptr, 2); + expression->u32Value = stoul(numString, nullptr, 2); expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; } @@ -41,13 +41,14 @@ shared_ptr ExpressionLiteral::expressionLiteralForToken(share return nullptr; expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); - expression->sint32Value = *charValue; + expression->u32Value = *charValue; return expression; } - case TokenKind::REAL: - expression->real32Value = stof(token->getLexme()); + case TokenKind::REAL: { + expression->r32Value = stof(token->getLexme()); expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); break; + } default: return nullptr; } @@ -62,10 +63,22 @@ bool ExpressionLiteral::getBoolValue() { return boolValue; } -int32_t ExpressionLiteral::getSint32Value() { - return sint32Value; +uint8_t ExpressionLiteral::getU8Value() { + return u8Value; } -float ExpressionLiteral::getReal32Value() { - return real32Value; +uint32_t ExpressionLiteral::getU32Value() { + return u32Value; +} + +int8_t ExpressionLiteral::getS8Value() { + return s8Value; +} + +int32_t ExpressionLiteral::getS32Value() { + return s32Value; +} + +float ExpressionLiteral::getR32Value() { + return r32Value; } diff --git a/src/Parser/Expression/ExpressionLiteral.h b/src/Parser/Expression/ExpressionLiteral.h index 89b545e..89b8bf4 100644 --- a/src/Parser/Expression/ExpressionLiteral.h +++ b/src/Parser/Expression/ExpressionLiteral.h @@ -6,16 +6,22 @@ class ExpressionLiteral: public Expression { private: bool boolValue; - int32_t sint32Value; - float real32Value; + uint8_t u8Value; + uint32_t u32Value; + int8_t s8Value; + int32_t s32Value; + float r32Value; public: static shared_ptr expressionLiteralForToken(shared_ptr token); ExpressionLiteral(); bool getBoolValue(); - int32_t getSint32Value(); - float getReal32Value(); + uint8_t getU8Value(); + uint32_t getU32Value(); + int8_t getS8Value(); + int32_t getS32Value(); + float getR32Value(); }; #endif \ No newline at end of file diff --git a/src/Parser/Expression/ExpressionUnary.cpp b/src/Parser/Expression/ExpressionUnary.cpp new file mode 100644 index 0000000..ddc570d --- /dev/null +++ b/src/Parser/Expression/ExpressionUnary.cpp @@ -0,0 +1,29 @@ +#include "ExpressionUnary.h" + +#include "Lexer/Token.h" + +ExpressionUnary::ExpressionUnary(shared_ptr token, shared_ptr expression): +Expression(ExpressionKind::UNARY, nullptr), expression(expression) { + switch (token->getKind()) { + case TokenKind::PLUS: + operation = ExpressionUnaryOperation::PLUS; + valueType = expression->getValueType(); + break; + case TokenKind::MINUS: + operation = ExpressionUnaryOperation::MINUS; + valueType = expression->getValueType(); + break; + default: + operation = ExpressionUnaryOperation::INVALID; + valueType = nullptr; + break; + } +} + +ExpressionUnaryOperation ExpressionUnary::getOperation() { + return operation; +} + +shared_ptr ExpressionUnary::getExpression() { + return expression; +} \ No newline at end of file diff --git a/src/Parser/Expression/ExpressionUnary.h b/src/Parser/Expression/ExpressionUnary.h new file mode 100644 index 0000000..2ba56fd --- /dev/null +++ b/src/Parser/Expression/ExpressionUnary.h @@ -0,0 +1,23 @@ +#ifndef EXPRESSION_UNARY_H +#define EXPRESSION_UNARY_H + +#include "Expression.h" + +enum class ExpressionUnaryOperation { + PLUS, + MINUS, + INVALID +}; + +class ExpressionUnary: public Expression { +private: + ExpressionUnaryOperation operation; + shared_ptr expression; + +public: + ExpressionUnary(shared_ptr token, shared_ptr expression); + ExpressionUnaryOperation getOperation(); + shared_ptr getExpression(); +}; + +#endif \ No newline at end of file diff --git a/src/Parser/Parsee/Parsee.cpp b/src/Parser/Parsee/Parsee.cpp new file mode 100644 index 0000000..3ec80d9 --- /dev/null +++ b/src/Parser/Parsee/Parsee.cpp @@ -0,0 +1,44 @@ +#include "Parsee.h" + +Parsee Parsee::tokenParsee(TokenKind tokenKind, bool isRequired, bool shouldReturn) { + Parsee parsee; + parsee.kind = ParseeKind::TOKEN; + parsee.tokenKind = tokenKind; + parsee.isRequired = isRequired; + parsee.shouldReturn = shouldReturn; + return parsee; +} + +Parsee Parsee::valueTypeParsee(bool isRequired) { + Parsee parsee; + parsee.kind = ParseeKind::VALUE_TYPE; + parsee.isRequired = isRequired; + parsee.shouldReturn = true; + return parsee; +} + +Parsee Parsee::expressionParsee(bool isRequired) { + Parsee parsee; + parsee.kind = ParseeKind::EXPRESSION; + parsee.isRequired = isRequired; + parsee.shouldReturn = true; + return parsee; +} + +Parsee::Parsee() { } + +ParseeKind Parsee::getKind() { + return kind; +} + +TokenKind Parsee::getTokenKind() { + return tokenKind; +} + +bool Parsee::getIsRequired() { + return isRequired; +} + +bool Parsee::getShouldReturn() { + return shouldReturn; +} \ No newline at end of file diff --git a/src/Parser/Parsee/Parsee.h b/src/Parser/Parsee/Parsee.h new file mode 100644 index 0000000..6dfdf0e --- /dev/null +++ b/src/Parser/Parsee/Parsee.h @@ -0,0 +1,33 @@ +#ifndef PARSEE_H +#define PARSEE_H + +#include + +enum class TokenKind; + +enum class ParseeKind { + TOKEN, + VALUE_TYPE, + EXPRESSION +}; + +class Parsee { +private: + ParseeKind kind; + TokenKind tokenKind; + bool isRequired; + bool shouldReturn; + Parsee(); + +public: + static Parsee tokenParsee(TokenKind tokenKind, bool isRequired, bool shouldReturn); + static Parsee valueTypeParsee(bool isRequired); + static Parsee expressionParsee(bool isRequired); + + ParseeKind getKind(); + TokenKind getTokenKind(); + bool getIsRequired(); + bool getShouldReturn(); +}; + +#endif \ No newline at end of file diff --git a/src/Parser/Parsee/ParseeGroup.cpp b/src/Parser/Parsee/ParseeGroup.cpp new file mode 100644 index 0000000..ead5352 --- /dev/null +++ b/src/Parser/Parsee/ParseeGroup.cpp @@ -0,0 +1,20 @@ +#include "ParseeGroup.h" + +#include "Parsee.h" + +ParseeGroup::ParseeGroup(vector parsees, optional repeatedGroup): +parsees(parsees) { + if (repeatedGroup) { + this->repeatedGroup = *repeatedGroup; + } else { + this->repeatedGroup = {}; + } + } + +vector ParseeGroup::getParsees() { + return parsees; +} + +optional> ParseeGroup::getRepeatedGroup() { + return repeatedGroup; +} \ No newline at end of file diff --git a/src/Parser/Parsee/ParseeGroup.h b/src/Parser/Parsee/ParseeGroup.h new file mode 100644 index 0000000..4e4cc82 --- /dev/null +++ b/src/Parser/Parsee/ParseeGroup.h @@ -0,0 +1,22 @@ +#ifndef PARSEE_GROUP_H +#define PARSEE_GROUP_H + +#include +#include + +class Parsee; + +using namespace std; + +class ParseeGroup { +private: + vector parsees; + optional> repeatedGroup; + +public: + ParseeGroup(vector parsees, optional repeatedGroup); + vector getParsees(); + optional> getRepeatedGroup(); +}; + +#endif \ No newline at end of file diff --git a/src/Parser/Parsee/ParseeResult.cpp b/src/Parser/Parsee/ParseeResult.cpp new file mode 100644 index 0000000..5e09309 --- /dev/null +++ b/src/Parser/Parsee/ParseeResult.cpp @@ -0,0 +1,51 @@ +#include "ParseeResult.h" + +#include "Lexer/Token.h" +#include "Parser/ValueType.h" + +ParseeResult ParseeResult::tokenResult(shared_ptr token) { + ParseeResult parseeResult; + parseeResult.kind = ParseeResultKind::TOKEN; + parseeResult.token = token; + parseeResult.tokensCount = 1; + return parseeResult; +} + +ParseeResult ParseeResult::valueTypeResult(shared_ptr valueType, int tokensCount) { + ParseeResult parseeResult; + parseeResult.kind = ParseeResultKind::VALUE_TYPE; + parseeResult.valueType = valueType; + parseeResult.tokensCount = tokensCount; + return parseeResult; +} + +ParseeResult ParseeResult::expressionResult(shared_ptr expression, int tokensCount) { + ParseeResult result; + result.kind = ParseeResultKind::EXPRESSION; + result.expression = expression; + result.tokensCount = tokensCount; + return result; +} + +ParseeResult::ParseeResult() { } + + +ParseeResultKind ParseeResult::getKind() { + return kind; +} + +shared_ptr ParseeResult::getToken() { + return token; +} + +shared_ptr ParseeResult::getValueType() { + return valueType; +} + +shared_ptr ParseeResult::getExpression() { + return expression; +} + +int ParseeResult::getTokensCount() { + return tokensCount; +} \ No newline at end of file diff --git a/src/Parser/Parsee/ParseeResult.h b/src/Parser/Parsee/ParseeResult.h new file mode 100644 index 0000000..f59114e --- /dev/null +++ b/src/Parser/Parsee/ParseeResult.h @@ -0,0 +1,39 @@ +#ifndef PARSEE_RESULT_H +#define PARSEE_RESULT_H + +#include + +class Token; +class ValueType; +class Expression; + +using namespace std; + +enum class ParseeResultKind { + TOKEN, + VALUE_TYPE, + EXPRESSION +}; + +class ParseeResult { +private: + ParseeResultKind kind; + shared_ptr token; + shared_ptr valueType; + shared_ptr expression; + int tokensCount; + ParseeResult(); + +public: + static ParseeResult tokenResult(shared_ptr token); + static ParseeResult valueTypeResult(shared_ptr valueType, int tokensCount); + static ParseeResult expressionResult(shared_ptr expression, int tokensCount); + + ParseeResultKind getKind(); + shared_ptr getToken(); + shared_ptr getValueType(); + shared_ptr getExpression(); + int getTokensCount(); +}; + +#endif \ No newline at end of file diff --git a/src/Parser/Parsee/ParseeResultsGroup.cpp b/src/Parser/Parsee/ParseeResultsGroup.cpp new file mode 100644 index 0000000..ca3cc9c --- /dev/null +++ b/src/Parser/Parsee/ParseeResultsGroup.cpp @@ -0,0 +1,30 @@ +#include "ParseeResultsGroup.h" + +#include "ParseeResult.h" + +ParseeResultsGroup ParseeResultsGroup::success(vector results) { + ParseeResultsGroup resultsGroup; + resultsGroup.kind = ParseeResultsGroupKind::SUCCESS; + resultsGroup.results = results; + return resultsGroup; +} + +ParseeResultsGroup ParseeResultsGroup::noMatch() { + ParseeResultsGroup resultsGroup; + resultsGroup.kind = ParseeResultsGroupKind::NO_MATCH; + return resultsGroup; +} + +ParseeResultsGroup ParseeResultsGroup::failure() { + ParseeResultsGroup resultsGroup; + resultsGroup.kind = ParseeResultsGroupKind::FAILURE; + return resultsGroup; +} + +ParseeResultsGroupKind ParseeResultsGroup::getKind() { + return kind; +} + +vector ParseeResultsGroup::getResults() { + return results; +} \ No newline at end of file diff --git a/src/Parser/Parsee/ParseeResultsGroup.h b/src/Parser/Parsee/ParseeResultsGroup.h new file mode 100644 index 0000000..3ba23b4 --- /dev/null +++ b/src/Parser/Parsee/ParseeResultsGroup.h @@ -0,0 +1,30 @@ +#ifndef PARSEE_RESULTS_GROUP_H +#define PARSEE_RESULTS_GROUP_H + +#include + +class ParseeResult; + +using namespace std; + +enum class ParseeResultsGroupKind { + SUCCESS, + NO_MATCH, + FAILURE +}; + +class ParseeResultsGroup { +private: + ParseeResultsGroupKind kind; + vector results; + +public: + static ParseeResultsGroup success(vector results); + static ParseeResultsGroup noMatch(); + static ParseeResultsGroup failure(); + + ParseeResultsGroupKind getKind(); + vector getResults(); +}; + +#endif \ No newline at end of file diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index c8922ed..bdf291f 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -12,10 +12,12 @@ #include "Parser/Expression/ExpressionVariable.h" #include "Parser/Expression/ExpressionCall.h" #include "Parser/Expression/ExpressionIfElse.h" +#include "Parser/Expression/ExpressionUnary.h" #include "Parser/Expression/ExpressionBinary.h" #include "Parser/Expression/ExpressionBlock.h" #include "Parser/Statement/StatementFunction.h" +#include "Parser/Statement/StatementRawFunction.h" #include "Parser/Statement/StatementVariable.h" #include "Parser/Statement/StatementAssignment.h" #include "Parser/Statement/StatementReturn.h" @@ -24,6 +26,11 @@ #include "Parser/Statement/StatementBlock.h" #include "Parser/Statement/StatementRepeat.h" +#include "Parsee/Parsee.h" +#include "Parsee/ParseeGroup.h" +#include "Parsee/ParseeResult.h" +#include "Parsee/ParseeResultsGroup.h" + Parser::Parser(vector> tokens) : tokens(tokens) { } @@ -61,6 +68,10 @@ shared_ptr Parser::nextStatement() { if (statement != nullptr || errors.size() > errorsCount) return statement; + statement = matchStatementRawFunction(); + if (statement != nullptr || errors.size() > errorsCount) + return statement; + statement = matchStatementVariable(); if (statement != nullptr || errors.size() > errorsCount) return statement; @@ -102,112 +113,207 @@ shared_ptr Parser::nextInBlockStatement() { } shared_ptr Parser::matchStatementMetaExternFunction() { - if (!tryMatchingTokenKinds({TokenKind::M_EXTERN, TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) - return nullptr; + ParseeResultsGroup resultsGroup; - string name; + string identifier; vector>> arguments; shared_ptr returnType = ValueType::NONE; - currentIndex++; // skip meta - shared_ptr identifierToken = tokens.at(currentIndex++); - currentIndex++; // skip fun + // identifier + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::M_EXTERN, true, false), + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::tokenParsee(TokenKind::FUNCTION, true, false) + }, + {} + ) + ); + + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + identifier = resultsGroup.getResults().at(0).getToken()->getLexme(); + break; + case ParseeResultsGroupKind::NO_MATCH: + case ParseeResultsGroupKind::FAILURE: + return nullptr; + } // arguments - if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { - do { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) { - markError({}, "Expected function argument"); - return nullptr; + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::COLON, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true) + }, + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::COMMA, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true) + }, + {} + ) + ) + ); + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + for (int i=0; i> arg; + arg.first = resultsGroup.getResults().at(i).getToken()->getLexme(); + arg.second = resultsGroup.getResults().at(i+1).getValueType(); + arguments.push_back(arg); } - shared_ptr identifierToken = tokens.at(currentIndex++); - //shared_ptr argumentTypeToken = tokens.at(currentIndex++); - shared_ptr argumentType = matchValueType(); - if (argumentType == nullptr) { - markError(TokenKind::TYPE, {}); - return nullptr; - } - - arguments.push_back(pair>(identifierToken->getLexme(), argumentType)); - } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); - } - - // Return type - if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - - //shared_ptr returnTypeToken = tokens.at(currentIndex); - returnType = matchValueType(); - if (returnType == nullptr) { - markError(TokenKind::TYPE, {}); + break; + case ParseeResultsGroupKind::NO_MATCH: + break; + case ParseeResultsGroupKind::FAILURE: return nullptr; - } } - return make_shared(identifierToken->getLexme(), arguments, returnType); + // return type + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::RIGHT_ARROW, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::valueTypeParsee(true) + }, + {} + ) + ); + + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + returnType = resultsGroup.getResults().at(0).getValueType(); + break; + case ParseeResultsGroupKind::NO_MATCH: + break; + case ParseeResultsGroupKind::FAILURE: + return nullptr; + } + + return make_shared(identifier, arguments, returnType); } shared_ptr Parser::matchStatementVariable() { - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) + ParseeResultsGroup resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true), + Parsee::tokenParsee(TokenKind::LEFT_ARROW, true, false), + Parsee::expressionParsee(true) + }, + {} + ) + ); + + if (resultsGroup.getKind() != ParseeResultsGroupKind::SUCCESS) return nullptr; - shared_ptr identifierToken = tokens.at(currentIndex++); - shared_ptr valueType = matchValueType(); + string identifier = resultsGroup.getResults().at(0).getToken()->getLexme(); + shared_ptr valueType = resultsGroup.getResults().at(1).getValueType(); + shared_ptr expression = resultsGroup.getResults().at(2).getExpression(); - // Expect left arrow - if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) { - markError(TokenKind::LEFT_ARROW, {}); - return nullptr; - } - - shared_ptr expression = nextExpression(); - if (expression == nullptr) - return nullptr; - - return make_shared(identifierToken->getLexme(), valueType, expression); + return make_shared(identifier, valueType, expression); } shared_ptr Parser::matchStatementFunction() { - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::FUNCTION}, true, false)) - return nullptr; + bool hasError = false; + ParseeResultsGroup resultsGroup; string name; vector>> arguments; shared_ptr returnType = ValueType::NONE; shared_ptr statementBlock; - // name - name = tokens.at(currentIndex++)->getLexme(); - currentIndex++; // skip fun + // identifier + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::tokenParsee(TokenKind::FUNCTION, true, false) + }, + {} + ) + ); - // arguments - if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { - do { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) { - markError({}, "Expected function argument"); - return nullptr; - } - shared_ptr identifierToken = tokens.at(currentIndex++); - shared_ptr argumentType = matchValueType(); - if (argumentType == nullptr) { - markError(TokenKind::TYPE, {}); - return nullptr; - } - - arguments.push_back(pair>(identifierToken->getLexme(), argumentType)); - } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + name = resultsGroup.getResults().at(0).getToken()->getLexme(); + break; + case ParseeResultsGroupKind::NO_MATCH: + return nullptr; + case ParseeResultsGroupKind::FAILURE: + hasError = true; + break; } - // return type - if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { - tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line + // arguments + if (!hasError) { + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::COLON, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true) + }, + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::COMMA, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true) + }, + {} + ) + ) + ); + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + for (int i=0; i> arg; + arg.first = resultsGroup.getResults().at(i).getToken()->getLexme(); + arg.second = resultsGroup.getResults().at(i+1).getValueType(); + arguments.push_back(arg); + } + break; + case ParseeResultsGroupKind::NO_MATCH: + break; + case ParseeResultsGroupKind::FAILURE: + hasError = true; + break; + } + } - returnType = matchValueType(); - if (returnType == nullptr) { - markError(TokenKind::TYPE, {}); - return nullptr; + // return type + if (!hasError) { + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::RIGHT_ARROW, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::valueTypeParsee(true) + }, + {} + ) + ); + + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + returnType = resultsGroup.getResults().at(0).getValueType(); + break; + case ParseeResultsGroupKind::NO_MATCH: + break; + case ParseeResultsGroupKind::FAILURE: + hasError = true; + break; } } @@ -218,10 +324,11 @@ shared_ptr Parser::matchStatementFunction() { } // block - statementBlock = matchStatementBlock({TokenKind::SEMICOLON}); + statementBlock = matchStatementBlock({TokenKind::SEMICOLON, TokenKind::END}); if (statementBlock == nullptr) return nullptr; + // closing semicolon if(!tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true)) { markError(TokenKind::SEMICOLON, {}); return nullptr; @@ -230,6 +337,155 @@ shared_ptr Parser::matchStatementFunction() { return make_shared(name, arguments, returnType, dynamic_pointer_cast(statementBlock)); } +shared_ptr Parser::matchStatementRawFunction() { + bool hasError = false; + ParseeResultsGroup resultsGroup; + + string name; + string constraints; + vector>> arguments; + shared_ptr returnType = ValueType::NONE; + string rawSource; + + // identifier + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::tokenParsee(TokenKind::RAW_FUNCTION, true, false) + }, + {} + ) + ); + + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + name = resultsGroup.getResults().at(0).getToken()->getLexme(); + break; + case ParseeResultsGroupKind::NO_MATCH: + return nullptr; + break; + case ParseeResultsGroupKind::FAILURE: + hasError = true; + break; + } + + // constraints + if (!hasError) { + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::LESS, true, false), + Parsee::tokenParsee(TokenKind::STRING, true, true), + Parsee::tokenParsee(TokenKind::GREATER, true, false) + }, + {} + ) + ); + + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + constraints = resultsGroup.getResults().at(0).getToken()->getLexme(); + // remove enclosing quotes + if (constraints.length() >= 2) + constraints = constraints.substr(1, constraints.length() - 2); + break; + case ParseeResultsGroupKind::NO_MATCH: + return nullptr; + break; + case ParseeResultsGroupKind::FAILURE: + hasError = true; + break; + } + } + + // arguments + if (!hasError) { + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::COLON, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true) + }, + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::COMMA, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + Parsee::valueTypeParsee(true) + }, + {} + ) + ) + ); + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + for (int i=0; i> arg; + arg.first = resultsGroup.getResults().at(i).getToken()->getLexme(); + arg.second = resultsGroup.getResults().at(i+1).getValueType(); + arguments.push_back(arg); + } + break; + case ParseeResultsGroupKind::NO_MATCH: + break; + case ParseeResultsGroupKind::FAILURE: + hasError = true; + break; + } + } + + // return type + if (!hasError) { + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::RIGHT_ARROW, true, false), + Parsee::tokenParsee(TokenKind::NEW_LINE, false, false), + Parsee::valueTypeParsee(true) + }, + {} + ) + ); + + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + returnType = resultsGroup.getResults().at(0).getValueType(); + break; + case ParseeResultsGroupKind::NO_MATCH: + break; + case ParseeResultsGroupKind::FAILURE: + hasError = true; + break; + } + } + + // consume new line + if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) { + markError(TokenKind::NEW_LINE, {}); + return nullptr; + } + + // source + while (tryMatchingTokenKinds({TokenKind::RAW_SOURCE_LINE}, true, false)) { + if (!rawSource.empty()) + rawSource += "\n"; + rawSource += tokens.at(currentIndex++)->getLexme(); + + // Consume optional new line (for example because of a comment) + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); + } + + if(!tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true)) { + markError(TokenKind::SEMICOLON, {}); + return nullptr; + } + + return make_shared(name, constraints, arguments, returnType, rawSource); +} + shared_ptr Parser::matchStatementBlock(vector terminalTokenKinds) { vector> statements; @@ -251,44 +507,90 @@ shared_ptr Parser::matchStatementBlock(vector terminalToke shared_ptr Parser::matchStatementAssignment() { int startIndex = currentIndex; + ParseeResultsGroup resultsGroup; - if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER}, true, false)) - return nullptr; - shared_ptr identifierToken = tokens.at(currentIndex++); + string identifier; shared_ptr indexExpression; + shared_ptr expression; - if (tryMatchingTokenKinds({TokenKind::LEFT_SQUARE_BRACKET}, true, true)) { - indexExpression = nextExpression(); - if (indexExpression == nullptr) - return nullptr; + // identifier + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::IDENTIFIER, true, true), + }, + {} + ) + ); - if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { - markError(TokenKind::RIGHT_SQUARE_BRACKET, {}); - return nullptr; - } - } - - // assignment requires left arrow, otherwise abort - if (!tryMatchingTokenKinds({TokenKind::LEFT_ARROW}, true, true)) { - currentIndex = startIndex; - return nullptr; - } - - shared_ptr expression = nextExpression(); - if (expression == nullptr) + if (resultsGroup.getKind() != ParseeResultsGroupKind::SUCCESS) return nullptr; - return make_shared(identifierToken->getLexme(), indexExpression, expression); + identifier = resultsGroup.getResults().at(0).getToken()->getLexme(); + + // index expression + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::LEFT_SQUARE_BRACKET, true, false), + Parsee::expressionParsee(true), + Parsee::tokenParsee(TokenKind::RIGHT_SQUARE_BRACKET, true, false), + }, + {} + ) + ); + + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + indexExpression = resultsGroup.getResults().at(0).getExpression(); + break; + case ParseeResultsGroupKind::NO_MATCH: + break; + case ParseeResultsGroupKind::FAILURE: + return nullptr; + } + + // expression + resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::LEFT_ARROW, true, false), + Parsee::expressionParsee(true) + }, + {} + ) + ); + + switch (resultsGroup.getKind()) { + case ParseeResultsGroupKind::SUCCESS: + expression = resultsGroup.getResults().at(0).getExpression(); + break; + case ParseeResultsGroupKind::NO_MATCH: + currentIndex = startIndex; + return nullptr; + case ParseeResultsGroupKind::FAILURE: + return nullptr; + } + + return make_shared(identifier, indexExpression, expression); } shared_ptr Parser::matchStatementReturn() { - if (!tryMatchingTokenKinds({TokenKind::RETURN}, true, true)) + ParseeResultsGroup resultsGroup = parseeResultsGroupForParseeGroup( + ParseeGroup( + { + Parsee::tokenParsee(TokenKind::RETURN, true, false), + Parsee::expressionParsee(false) + }, + {} + ) + ); + + if (resultsGroup.getKind() != ParseeResultsGroupKind::SUCCESS) return nullptr; - shared_ptr expression = nextExpression(); - if (expression == nullptr) - return nullptr; - + shared_ptr expression = !resultsGroup.getResults().empty() ? resultsGroup.getResults().at(0).getExpression() : nullptr; + return make_shared(expression); } @@ -385,7 +687,6 @@ shared_ptr Parser::nextExpression() { if (expression != nullptr || errors.size() > errorsCount) return expression; - markError({}, {}); return nullptr; } @@ -423,7 +724,7 @@ shared_ptr Parser::matchTerm() { } shared_ptr Parser::matchFactor() { - shared_ptr expression = matchPrimary(); + shared_ptr expression = matchUnary(); if (expression == nullptr) return nullptr; @@ -433,6 +734,19 @@ shared_ptr Parser::matchFactor() { return expression; } +shared_ptr Parser::matchUnary() { + shared_ptr token = tokens.at(currentIndex); + + if (tryMatchingTokenKinds(Token::tokensUnary, false, true)) { + shared_ptr expression = matchPrimary(); + if (expression == nullptr) + return nullptr; + return make_shared(token, expression); + } + + return matchPrimary(); +} + shared_ptr Parser::matchPrimary() { shared_ptr expression; int errorsCount = errors.size(); @@ -442,8 +756,8 @@ shared_ptr Parser::matchPrimary() { return expression; expression = matchExpressionArrayLiteral(); - if (expression != nullptr || errors.size() > errorsCount) - return expression; + if (expression != nullptr || errors.size() > errorsCount) + return expression; expression = matchExpressionLiteral(); if (expression != nullptr || errors.size() > errorsCount) @@ -621,6 +935,7 @@ shared_ptr Parser::matchExpressionBinary(shared_ptr left } if (right == nullptr) { + markError({}, "Expected expression"); return nullptr; } else { return make_shared(token, left, right); @@ -651,44 +966,130 @@ shared_ptr Parser::matchExpressionBlock(vector terminalTo return make_shared(statements); } -shared_ptr Parser::matchValueType() { - if (!tryMatchingTokenKinds({TokenKind::TYPE}, true, false)) - return nullptr; - shared_ptr typeToken = tokens.at(currentIndex++); - shared_ptr subType; - int valueArg = 0; +ParseeResultsGroup Parser::parseeResultsGroupForParseeGroup(ParseeGroup group) { + int errorsCount = errors.size(); + int startIndex = currentIndex; + vector results; + bool mustFulfill = false; - if (tryMatchingTokenKinds({TokenKind::LESS}, true, true)) { - if (!tryMatchingTokenKinds({TokenKind::TYPE}, true, false)) { - markError(TokenKind::TYPE, {}); - return nullptr; - } - subType = matchValueType(); - if (subType == nullptr) - return subType; - - if (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)) { - if (!tryMatchingTokenKinds({TokenKind::INTEGER_DEC, TokenKind::INTEGER_HEX, TokenKind::INTEGER_BIN, TokenKind::INTEGER_CHAR}, false, false)) { - markError({}, "Expected integer literal"); - return nullptr; - } - shared_ptr expressionValue = matchExpressionLiteral(); - if (expressionValue == nullptr) { - markError({}, "Expected integer literal"); - return nullptr; - } - - valueArg = dynamic_pointer_cast(expressionValue)->getSint32Value(); + for (Parsee &parsee : group.getParsees()) { + optional result; + switch (parsee.getKind()) { + case ParseeKind::TOKEN: + result = tokenParseeResult(currentIndex, parsee.getTokenKind()); + break; + case ParseeKind::VALUE_TYPE: + result = valueTypeParseeResult(currentIndex); + break; + case ParseeKind::EXPRESSION: + result = expressionParseeResult(currentIndex); + break; } + // generated an error? + if (errors.size() > errorsCount) + return ParseeResultsGroup::failure(); - if (!tryMatchingTokenKinds({TokenKind::GREATER}, true, true)) { - markError(TokenKind::GREATER, {}); - return nullptr; + // if doesn't match on optional group + if (!result && parsee.getIsRequired() && !mustFulfill) { + currentIndex = startIndex; + //return vector(); + return ParseeResultsGroup::noMatch(); } + + // return matching token? + if (result && parsee.getShouldReturn()) + results.push_back(*result); + + // decide if we're decoding the expected sequence + if (!parsee.getIsRequired() && currentIndex > startIndex) + mustFulfill = true; + + // invalid sequence detected? + if (!result && parsee.getIsRequired() && mustFulfill) { + markError(parsee.getTokenKind(), {}); + //return {}; + return ParseeResultsGroup::failure(); + } + + // got to the next token if we got a match + if (result) + currentIndex += (*result).getTokensCount(); } - return ValueType::valueTypeForToken(typeToken, subType, valueArg); + if (group.getRepeatedGroup()) { + ParseeResultsGroup subResultsGroup; + do { + subResultsGroup = parseeResultsGroupForParseeGroup(*group.getRepeatedGroup()); + if (subResultsGroup.getKind() == ParseeResultsGroupKind::FAILURE) + return ParseeResultsGroup::failure(); + + for (ParseeResult &subResult : subResultsGroup.getResults()) + results.push_back(subResult); + } while (subResultsGroup.getKind() == ParseeResultsGroupKind::SUCCESS); + } + + return ParseeResultsGroup::success(results); +} + +optional Parser::tokenParseeResult(int index, TokenKind tokenKind) { + shared_ptr token = tokens.at(index); + if (token->isOfKind({tokenKind})) + return ParseeResult::tokenResult(token); + return {}; +} + +optional Parser::valueTypeParseeResult(int index) { + int startIndex = index; + + if (!tokens.at(index)->isOfKind({TokenKind::TYPE})) + return {}; + + shared_ptr typeToken = tokens.at(index++); + shared_ptr subType; + int typeArg = 0; + + if (tokens.at(index)->isOfKind({TokenKind::LESS})) { + index++; + optional subResult = valueTypeParseeResult(index); + if (!subResult) + return {}; + subType = (*subResult).getValueType(); + index += (*subResult).getTokensCount(); + + if (tokens.at(index)->isOfKind({TokenKind::COMMA})) { + index++; + + if (!tokens.at(index)->isOfKind({TokenKind::INTEGER_DEC, TokenKind::INTEGER_HEX, TokenKind::INTEGER_BIN, TokenKind::INTEGER_CHAR})) + return {}; + + int storedIndex = currentIndex; + currentIndex = index; + shared_ptr expressionValue = matchExpressionLiteral(); + typeArg = dynamic_pointer_cast(expressionValue)->getU32Value(); + currentIndex = storedIndex; + index++; + } + + if (!tokens.at(index)->isOfKind({TokenKind::GREATER})) + return {}; + index++; + } + + shared_ptr valueType = ValueType::valueTypeForToken(typeToken, subType, typeArg); + return ParseeResult::valueTypeResult(valueType, index - startIndex); +} + +optional Parser::expressionParseeResult(int index) { + int startIndex = currentIndex; + int errorsCount = errors.size(); + shared_ptr expression = nextExpression(); + if (errors.size() > errorsCount) + return {}; + + int tokensCount = currentIndex - startIndex; + currentIndex = startIndex; + return ParseeResult::expressionResult(expression, tokensCount); } bool Parser::tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance) { diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index 952a087..0e27960 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -12,6 +12,10 @@ class ValueType; class Expression; class Statement; +class ParseeGroup; +class ParseeResult; +class ParseeResultsGroup; + using namespace std; class Parser { @@ -26,6 +30,7 @@ private: shared_ptr matchStatementMetaExternFunction(); shared_ptr matchStatementVariable(); shared_ptr matchStatementFunction(); + shared_ptr matchStatementRawFunction(); shared_ptr matchStatementBlock(vector terminalTokenKinds); shared_ptr matchStatementAssignment(); @@ -38,7 +43,8 @@ private: shared_ptr matchComparison(); // <, <=, >, >= shared_ptr matchTerm(); // +, - shared_ptr matchFactor(); // *, /, % - shared_ptr matchPrimary(); // integer, () + shared_ptr matchUnary(); // +, - + shared_ptr matchPrimary(); // literal, () shared_ptr matchExpressionGrouping(); shared_ptr matchExpressionLiteral(); @@ -49,8 +55,10 @@ private: shared_ptr matchExpressionBinary(shared_ptr left); shared_ptr matchExpressionBlock(vector terminalTokenKinds); - shared_ptr matchValueType(); - + ParseeResultsGroup parseeResultsGroupForParseeGroup(ParseeGroup group); + optional tokenParseeResult(int index, TokenKind tokenKind); + optional valueTypeParseeResult(int index); + optional expressionParseeResult(int index); bool tryMatchingTokenKinds(vector kinds, bool shouldMatchAll, bool shouldAdvance); void markError(optional expectedTokenKind, optional message); diff --git a/src/Parser/Statement/Statement.h b/src/Parser/Statement/Statement.h index 727e480..963a9c5 100644 --- a/src/Parser/Statement/Statement.h +++ b/src/Parser/Statement/Statement.h @@ -10,6 +10,7 @@ enum class StatementKind { BLOCK, RETURN, FUNCTION, + RAW_FUNCTION, VARIABLE, ASSIGNMENT, REPEAT, diff --git a/src/Parser/Statement/StatementRawFunction.cpp b/src/Parser/Statement/StatementRawFunction.cpp new file mode 100644 index 0000000..6417c6a --- /dev/null +++ b/src/Parser/Statement/StatementRawFunction.cpp @@ -0,0 +1,24 @@ +#include "StatementRawFunction.h" + +StatementRawFunction::StatementRawFunction(string name, string constraints, vector>> arguments, shared_ptr returnValueType, string rawSource): +Statement(StatementKind::RAW_FUNCTION), name(name), constraints(constraints), arguments(arguments), returnValueType(returnValueType), rawSource(rawSource) { } + +string StatementRawFunction::getName() { + return name; +} + +string StatementRawFunction::getConstraints() { + return constraints; +} + +vector>> StatementRawFunction::getArguments() { + return arguments; +} + +shared_ptr StatementRawFunction::getReturnValueType() { + return returnValueType; +} + +string StatementRawFunction::getRawSource() { + return rawSource; +} \ No newline at end of file diff --git a/src/Parser/Statement/StatementRawFunction.h b/src/Parser/Statement/StatementRawFunction.h new file mode 100644 index 0000000..372592f --- /dev/null +++ b/src/Parser/Statement/StatementRawFunction.h @@ -0,0 +1,20 @@ +#include "Parser/Statement/Statement.h" + +class ValueType; + +class StatementRawFunction: public Statement { +private: + string name; + string constraints; + vector>> arguments; + shared_ptr returnValueType; + string rawSource; + +public: + StatementRawFunction(string name, string constraints, vector>> arguments, shared_ptr returnValueType, string rawSource); + string getName(); + string getConstraints(); + vector>> getArguments(); + shared_ptr getReturnValueType(); + string getRawSource(); +}; \ No newline at end of file diff --git a/src/Parser/ValueType.cpp b/src/Parser/ValueType.cpp index 6f5945d..566342b 100644 --- a/src/Parser/ValueType.cpp +++ b/src/Parser/ValueType.cpp @@ -4,8 +4,11 @@ shared_ptr ValueType::NONE = make_shared(ValueTypeKind::NONE, nullptr, 0); shared_ptr ValueType::BOOL = make_shared(ValueTypeKind::BOOL, nullptr, 0); -shared_ptr ValueType::SINT32 = make_shared(ValueTypeKind::SINT32, nullptr, 0); -shared_ptr ValueType::REAL32 = make_shared(ValueTypeKind::REAL32, nullptr, 0); +shared_ptr ValueType::U8 = make_shared(ValueTypeKind::U8, nullptr, 0); +shared_ptr ValueType::U32 = make_shared(ValueTypeKind::U32, nullptr, 0); +shared_ptr ValueType::S8 = make_shared(ValueTypeKind::S8, nullptr, 0); +shared_ptr ValueType::S32 = make_shared(ValueTypeKind::S32, nullptr, 0); +shared_ptr ValueType::R32 = make_shared(ValueTypeKind::R32, nullptr, 0); ValueType::ValueType(ValueTypeKind kind, shared_ptr subType, int valueArg): kind(kind), subType(subType), valueArg(valueArg) { } @@ -16,10 +19,16 @@ shared_ptr ValueType::valueTypeForToken(shared_ptr token, shar string lexme = token->getLexme(); if (lexme.compare("bool") == 0) return make_shared(ValueTypeKind::BOOL, subType, valueArg); - else if (lexme.compare("sint32") == 0) - return make_shared(ValueTypeKind::SINT32, subType, valueArg); - else if (lexme.compare("real32") == 0) - return make_shared(ValueTypeKind::REAL32, subType, valueArg); + else if (lexme.compare("u8") == 0) + return make_shared(ValueTypeKind::U8, subType, valueArg); + else if (lexme.compare("u32") == 0) + return make_shared(ValueTypeKind::U32, subType, valueArg); + else if (lexme.compare("s8") == 0) + return make_shared(ValueTypeKind::S8, subType, valueArg); + else if (lexme.compare("s32") == 0) + return make_shared(ValueTypeKind::S32, subType, valueArg); + else if (lexme.compare("r32") == 0) + return make_shared(ValueTypeKind::R32, subType, valueArg); else if (lexme.compare("data") == 0) return make_shared(ValueTypeKind::DATA, subType, valueArg); else @@ -28,12 +37,13 @@ shared_ptr ValueType::valueTypeForToken(shared_ptr token, shar case TokenKind::BOOL: return make_shared(ValueTypeKind::BOOL, nullptr, 0); case TokenKind::INTEGER_DEC: + return make_shared(ValueTypeKind::S32, nullptr, 0); case TokenKind::INTEGER_HEX: case TokenKind::INTEGER_BIN: case TokenKind::INTEGER_CHAR: - return make_shared(ValueTypeKind::SINT32, nullptr, 0); + return make_shared(ValueTypeKind::U32, nullptr, 0); case TokenKind::REAL: - return make_shared(ValueTypeKind::REAL32, nullptr, 0); + return make_shared(ValueTypeKind::R32, nullptr, 0); default: return nullptr; } diff --git a/src/Parser/ValueType.h b/src/Parser/ValueType.h index ce118eb..2b77f2f 100644 --- a/src/Parser/ValueType.h +++ b/src/Parser/ValueType.h @@ -10,8 +10,11 @@ using namespace std; enum class ValueTypeKind { NONE, BOOL, - SINT32, - REAL32, + U8, + U32, + S8, + S32, + R32, DATA }; @@ -24,8 +27,11 @@ private: public: static shared_ptr NONE; static shared_ptr BOOL; - static shared_ptr SINT32; - static shared_ptr REAL32; + static shared_ptr U8; + static shared_ptr U32; + static shared_ptr S8; + static shared_ptr S32; + static shared_ptr R32; static shared_ptr valueTypeForToken(shared_ptr token, shared_ptr subType, int valueArg); ValueType(ValueTypeKind kind, shared_ptr subType, int valueArg);