From c7812ccf43bbce83a15f6282b7d3903f19376bc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Sat, 12 Jul 2025 22:20:32 +0900 Subject: [PATCH 1/7] Addes statement raw function --- .vscode/launch.json | 2 +- samples/test.brc | 10 +++- src/Compiler/ModuleBuilder.cpp | 53 ++++++++++++++++++- src/Compiler/ModuleBuilder.h | 7 +++ src/Parser/Statement/Statement.h | 1 + src/Parser/Statement/StatementRawFunction.cpp | 12 +++++ src/Parser/Statement/StatementRawFunction.h | 14 +++++ 7 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 src/Parser/Statement/StatementRawFunction.cpp create mode 100644 src/Parser/Statement/StatementRawFunction.h diff --git a/.vscode/launch.json b/.vscode/launch.json index 5574805..4b91f3e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,7 +6,7 @@ "type": "lldb-dap", "request": "launch", "program": "${command:cmake.launchTargetPath}", - "args": ["-v", "${workspaceFolder}/samples/hello.brc"], + "args": ["-v", "${workspaceFolder}/samples/test.brc"], "cwd": "${workspaceFolder}", "internalConsoleOptions": "openOnSessionStart", } diff --git a/samples/test.brc b/samples/test.brc index f71b061..b9222ca 100644 --- a/samples/test.brc +++ b/samples/test.brc @@ -1,5 +1,7 @@ @extern putchar fun: character sint32 -> sint32 +// ./build/brb samples/test.brc -S -x86-asm-syntax=intel + /* User type name data @@ -18,8 +20,14 @@ i u32 <- 0, rep text[i] != 0: // text data <- "Hello world!" +/*addStuff asm<+r, r>: num1 u32, num2 u32 -> u32 + add $1, $0 +;*/ + main fun -> sint32 - text data <- "Hello string!\n" + //text data <- "Hello string!\n" + abc sint32 <- 0 + //addStuff() ret 0 ; \ No newline at end of file diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index 4e1aaa1..dcb14c2 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -14,6 +14,7 @@ #include "Parser/Expression/ExpressionBlock.h" #include "Parser/Statement/StatementFunction.h" +#include "Parser/Statement/StatementRawFunction.h" #include "Parser/Statement/StatementVariable.h" #include "Parser/Statement/StatementAssignment.h" #include "Parser/Statement/StatementReturn.h" @@ -54,6 +55,9 @@ void ModuleBuilder::buildStatement(shared_ptr statement) { case StatementKind::FUNCTION: buildFunctionDeclaration(dynamic_pointer_cast(statement)); break; + case StatementKind::RAW_FUNCTION: + buildRawFunction(dynamic_pointer_cast(statement)); + break; case StatementKind::VARIABLE: buildVarDeclaration(dynamic_pointer_cast(statement)); break; @@ -126,6 +130,29 @@ void ModuleBuilder::buildFunctionDeclaration(shared_ptr state markError(0, 0, errorMessage); } +void ModuleBuilder::buildRawFunction(shared_ptr statement) { + llvm::FunctionType *funType = llvm::FunctionType::get(llvm::Type::getVoidTy(*context), nullptr, false); + llvm::InlineAsm *rawFun = llvm::InlineAsm::get(funType, statement->getRawSource(), "", false, false, llvm::InlineAsm::AsmDialect::AD_Intel); + if (!setRawFun(statement->getName(), rawFun)) + return; + + /*int res; + int a = 42; + int b = 13; + + vector types; + types.push_back(typeSint32); + types.push_back(typeSint32); + llvm::FunctionType *asmType = llvm::FunctionType::get(typeSint32, types, false); + llvm::InlineAsm *asmm = llvm::InlineAsm::get(asmType, "add $0, $1", "+{ebx},i", false, false, llvm::InlineAsm::AsmDialect::AD_Intel); + + vectorargValues; + argValues.push_back(llvm::ConstantInt::get(typeSint32, 5, true)); + argValues.push_back(llvm::ConstantInt::get(typeSint32, 4, true)); + + llvm::Value *valu = builder->CreateCall(asmm, llvm::ArrayRef(argValues));*/ +} + void ModuleBuilder::buildVarDeclaration(shared_ptr statement) { if (statement->getValueType()->getKind() == ValueTypeKind::DATA) { vector values = valuesForExpression(statement->getExpression()); @@ -518,7 +545,7 @@ llvm::AllocaInst* ModuleBuilder::getAlloca(string name) { bool ModuleBuilder::setFun(string name, llvm::Function *fun) { if (scopes.top().funMap[name] != nullptr) { - markError(0, 0, format("Function \"{}\" already defined", name)); + markError(0, 0, format("Function \"{}\" already defined in scope", name)); return false; } @@ -540,6 +567,30 @@ llvm::Function* ModuleBuilder::getFun(string name) { return nullptr; } +bool ModuleBuilder::setRawFun(string name, llvm::Value *rawFun) { + if (scopes.top().rawFunMap[name] != nullptr) { + markError(0, 0, format("Raw function \"{}\" already defined in scope", name)); + return false; + } + + scopes.top().rawFunMap[name] = rawFun; + return true; +} + +llvm::Value *ModuleBuilder::getRawFun(string name) { + stack scopes = this->scopes; + + while (!scopes.empty()) { + llvm::Value *rawFun = scopes.top().rawFunMap[name]; + if (rawFun != nullptr) + return rawFun; + scopes.pop(); + } + + markError(0, 0, format("Raw function \"{}\" not defined in scope", name)); + return nullptr; +} + llvm::Type *ModuleBuilder::typeForValueType(shared_ptr valueType, int count) { switch (valueType->getKind()) { case ValueTypeKind::NONE: diff --git a/src/Compiler/ModuleBuilder.h b/src/Compiler/ModuleBuilder.h index 783e984..2631e54 100644 --- a/src/Compiler/ModuleBuilder.h +++ b/src/Compiler/ModuleBuilder.h @@ -10,6 +10,7 @@ #include #include #include +#include class Error; class ValueType; @@ -26,6 +27,7 @@ enum class ExpressionBinaryOperation; class Statement; class StatementFunction; +class StatementRawFunction; class StatementVariable; class StatementAssignment; class StatementReturn; @@ -39,6 +41,7 @@ using namespace std; typedef struct { map allocaMap; map funMap; + map rawFunMap; } Scope; class ModuleBuilder { @@ -61,6 +64,7 @@ private: void buildStatement(shared_ptr statement); void buildFunctionDeclaration(shared_ptr statement); + void buildRawFunction(shared_ptr statement); void buildVarDeclaration(shared_ptr statement); void buildAssignment(shared_ptr statement); void buildBlock(shared_ptr statement); @@ -88,6 +92,9 @@ private: bool setFun(string name, llvm::Function *fun); llvm::Function *getFun(string name); + bool setRawFun(string name, llvm::Value *rawFun); + llvm::Value *getRawFun(string name); + llvm::Type *typeForValueType(shared_ptr valueType, int count = 0); void markError(int line, int column, string message); diff --git a/src/Parser/Statement/Statement.h b/src/Parser/Statement/Statement.h index 727e480..963a9c5 100644 --- a/src/Parser/Statement/Statement.h +++ b/src/Parser/Statement/Statement.h @@ -10,6 +10,7 @@ enum class StatementKind { BLOCK, RETURN, FUNCTION, + RAW_FUNCTION, VARIABLE, ASSIGNMENT, REPEAT, diff --git a/src/Parser/Statement/StatementRawFunction.cpp b/src/Parser/Statement/StatementRawFunction.cpp new file mode 100644 index 0000000..e40d9da --- /dev/null +++ b/src/Parser/Statement/StatementRawFunction.cpp @@ -0,0 +1,12 @@ +#include "StatementRawFunction.h" + +StatementRawFunction::StatementRawFunction(string name, string rawSource): +Statement(StatementKind::RAW_FUNCTION), name(name), rawSource(rawSource) { } + +string StatementRawFunction::getName() { + return name; +} + +string StatementRawFunction::getRawSource() { + return rawSource; +} \ No newline at end of file diff --git a/src/Parser/Statement/StatementRawFunction.h b/src/Parser/Statement/StatementRawFunction.h new file mode 100644 index 0000000..1a51566 --- /dev/null +++ b/src/Parser/Statement/StatementRawFunction.h @@ -0,0 +1,14 @@ +#include "Parser/Statement/Statement.h" + +class Expression; + +class StatementRawFunction: public Statement { +private: + string name; + string rawSource; + +public: + StatementRawFunction(string name, string rawSource); + string getName(); + string getRawSource(); +}; \ No newline at end of file From 26c566f4f6ba99805d895298bf62562621ccf507 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Sun, 13 Jul 2025 11:12:35 +0900 Subject: [PATCH 2/7] Parse raw function --- samples/test.brc | 10 +++-- src/Lexer/Lexer.cpp | 92 +++++++++++++++++++++++++++++++++---------- src/Lexer/Lexer.h | 4 ++ src/Lexer/Token.h | 2 + src/Logger.cpp | 24 +++++++++-- src/Logger.h | 2 + src/Parser/Parser.cpp | 37 +++++++++++++++++ src/Parser/Parser.h | 1 + 8 files changed, 145 insertions(+), 27 deletions(-) diff --git a/samples/test.brc b/samples/test.brc index b9222ca..e6a4100 100644 --- a/samples/test.brc +++ b/samples/test.brc @@ -24,10 +24,14 @@ i u32 <- 0, rep text[i] != 0: add $1, $0 ;*/ +rawAdd raw + mov eax, 5 + mov ebx, 42 + add eax, ebx +; + main fun -> sint32 - //text data <- "Hello string!\n" - abc sint32 <- 0 - //addStuff() + rawAdd() ret 0 ; \ No newline at end of file diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index c00f19c..2985d2f 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -12,6 +12,8 @@ vector> Lexer::getTokens() { currentIndex = 0; currentLine = 0; currentColumn = 0; + foundRawSourceStart = false; + isParsingRawSource = false; tokens.clear(); errors.clear(); @@ -117,6 +119,11 @@ shared_ptr Lexer::nextToken() { return nextToken(); // gets rid of remaining white spaces without repeating the code } + // raw source + token = matchRawSourceLine(); + if (token != nullptr) + return token; + // structural token = match(TokenKind::LEFT_PAREN, "(", false); if (token != nullptr) @@ -212,6 +219,12 @@ shared_ptr Lexer::nextToken() { token = match(TokenKind::FUNCTION, "fun", true); if (token != nullptr) return token; + + token = match(TokenKind::RAW_FUNCTION, "raw", true); + if (token != nullptr) { + foundRawSourceStart = true; + return token; + } token = match(TokenKind::RETURN, "ret", true); if (token != nullptr) @@ -271,8 +284,10 @@ shared_ptr Lexer::nextToken() { // new line token = match(TokenKind::NEW_LINE, "\n", false); - if (token != nullptr) + if (token != nullptr) { + tryStartingRawSourceParsing(); return token; + } // eof token = matchEnd(); @@ -430,21 +445,6 @@ shared_ptr Lexer::matchString() { return token; } -shared_ptr Lexer::matchIdentifier() { - int nextIndex = currentIndex; - - while (nextIndex < source.length() && isIdentifier(nextIndex)) - nextIndex++; - - if (nextIndex == currentIndex || !isSeparator(nextIndex)) - return nullptr; - - string lexme = source.substr(currentIndex, nextIndex - currentIndex); - shared_ptr token = make_shared(TokenKind::IDENTIFIER, lexme, currentLine, currentColumn); - advanceWithToken(token); - return token; -} - shared_ptr Lexer::matchType() { int nextIndex = currentIndex; @@ -463,6 +463,52 @@ shared_ptr Lexer::matchType() { return token; } +shared_ptr Lexer::matchIdentifier() { + int nextIndex = currentIndex; + + while (nextIndex < source.length() && isIdentifier(nextIndex)) + nextIndex++; + + if (nextIndex == currentIndex || !isSeparator(nextIndex)) + return nullptr; + + string lexme = source.substr(currentIndex, nextIndex - currentIndex); + shared_ptr token = make_shared(TokenKind::IDENTIFIER, lexme, currentLine, currentColumn); + advanceWithToken(token); + return token; +} + +void Lexer::tryStartingRawSourceParsing() { + if (!foundRawSourceStart) + return; + + if (!tokens.at(tokens.size() - 2)->isOfKind({TokenKind::COLON, TokenKind::COMMA, TokenKind::RIGHT_ARROW})) { + foundRawSourceStart = false; + isParsingRawSource = true; + } +} + +shared_ptr Lexer::matchRawSourceLine() { + int nextIndex = currentIndex; + + if (!isParsingRawSource) + return nullptr; + + if (source.at(nextIndex) == ';') { + isParsingRawSource = false; + return nullptr; + } + + while (source.at(nextIndex) != '\n') + nextIndex++; + + string lexme = source.substr(currentIndex, nextIndex - currentIndex); + shared_ptr token = make_shared(TokenKind::RAW_SOURCE_LINE, lexme, currentLine, currentColumn); + advanceWithToken(token); + currentIndex++; // skip newline + return token; +} + shared_ptr Lexer::matchEnd() { if (currentIndex >= source.length()) return make_shared(TokenKind::END, "", currentLine, currentColumn); @@ -530,11 +576,15 @@ bool Lexer::isSeparator(int index) { } void Lexer::advanceWithToken(shared_ptr token) { - if (token->getKind() == TokenKind::NEW_LINE) { - currentLine++; - currentColumn = 0; - } else { - currentColumn += token->getLexme().length(); + switch (token->getKind()) { + case TokenKind::NEW_LINE: + case TokenKind::RAW_SOURCE_LINE: + currentLine++; + currentColumn = 0; + break; + default: + currentColumn += token->getLexme().length(); + break; } currentIndex += token->getLexme().length(); } diff --git a/src/Lexer/Lexer.h b/src/Lexer/Lexer.h index 9cc8cc6..805ad50 100644 --- a/src/Lexer/Lexer.h +++ b/src/Lexer/Lexer.h @@ -17,6 +17,8 @@ private: int currentColumn; vector> tokens; vector> errors; + bool foundRawSourceStart; + bool isParsingRawSource; shared_ptr nextToken(); shared_ptr match(TokenKind kind, string lexme, bool needsSeparator); @@ -28,6 +30,8 @@ private: shared_ptr matchString(); shared_ptr matchType(); shared_ptr matchIdentifier(); + void tryStartingRawSourceParsing(); + shared_ptr matchRawSourceLine(); shared_ptr matchEnd(); bool isWhiteSpace(int index); diff --git a/src/Lexer/Token.h b/src/Lexer/Token.h index 2c4ce2e..c6ee86a 100644 --- a/src/Lexer/Token.h +++ b/src/Lexer/Token.h @@ -30,6 +30,8 @@ enum class TokenKind { RIGHT_ARROW, FUNCTION, + RAW_FUNCTION, + RAW_SOURCE_LINE, RETURN, REPEAT, IF, diff --git a/src/Logger.cpp b/src/Logger.cpp index 9055750..8e22542 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -11,6 +11,7 @@ #include "Parser/Statement/StatementMetaExternFunction.h" #include "Parser/Statement/StatementVariable.h" #include "Parser/Statement/StatementFunction.h" +#include "Parser/Statement/StatementRawFunction.h" #include "Parser/Statement/StatementBlock.h" #include "Parser/Statement/StatementAssignment.h" #include "Parser/Statement/StatementReturn.h" @@ -97,6 +98,10 @@ string Logger::toString(shared_ptr token) { return "ELSE"; case TokenKind::FUNCTION: return "FUN"; + case TokenKind::RAW_FUNCTION: + return "RAW"; + case TokenKind::RAW_SOURCE_LINE: + return format("RAW_SOURCE_LINE({})", token->getLexme()); case TokenKind::RETURN: return "RET"; case TokenKind::REPEAT: @@ -179,6 +184,8 @@ string Logger::toString(TokenKind tokenKind) { return "ELSE"; case TokenKind::FUNCTION: return "FUN"; + case TokenKind::RAW_FUNCTION: + return "RAW"; case TokenKind::RETURN: return "RET"; case TokenKind::REPEAT: @@ -217,6 +224,8 @@ string Logger::toString(shared_ptr statement) { return toString(dynamic_pointer_cast(statement)); case StatementKind::FUNCTION: return toString(dynamic_pointer_cast(statement)); + case StatementKind::RAW_FUNCTION: + return toString(dynamic_pointer_cast(statement)); case StatementKind::BLOCK: return toString(dynamic_pointer_cast(statement)); case StatementKind::ASSIGNMENT: @@ -262,6 +271,15 @@ string Logger::toString(shared_ptr statement) { return text; } +string Logger::toString(shared_ptr statement) { + string text; + + text += format("RAW(\"{}\"):\n", statement->getName()); + text += statement->getRawSource(); + + return text; +} + string Logger::toString(shared_ptr statement) { string text; @@ -468,13 +486,13 @@ void Logger::print(shared_ptr error) { if (expectedTokenKind) { message = format( - "Expected token {} but instead found \"{}\" at line: {}, column: {}", - toString(*expectedTokenKind), token->getLexme(), token->getLine() + 1, token->getColumn() + 1 + "Expected token {} but instead found {} at line: {}, column: {}", + toString(*expectedTokenKind), toString(token), token->getLine() + 1, token->getColumn() + 1 ); } else { message = format( "Unexpected token \"{}\" found at line: {}, column: {}", - token->getLexme(), token->getLine() + 1, token->getColumn() + 1 + toString(token), token->getLine() + 1, token->getColumn() + 1 ); } if (errorMessage) diff --git a/src/Logger.h b/src/Logger.h index d5f1eba..3101911 100644 --- a/src/Logger.h +++ b/src/Logger.h @@ -11,6 +11,7 @@ class Statement; class StatementMetaExternFunction; class StatementVariable; class StatementFunction; +class StatementRawFunction; class StatementBlock; class StatementAssignment; class StatementReturn; @@ -41,6 +42,7 @@ private: static string toString(shared_ptr statement); static string toString(shared_ptr statement); static string toString(shared_ptr statement); + static string toString(shared_ptr statement); static string toString(shared_ptr statement); static string toString(shared_ptr statement); static string toString(shared_ptr statement); diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index c8922ed..62bb8f7 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -16,6 +16,7 @@ #include "Parser/Expression/ExpressionBlock.h" #include "Parser/Statement/StatementFunction.h" +#include "Parser/Statement/StatementRawFunction.h" #include "Parser/Statement/StatementVariable.h" #include "Parser/Statement/StatementAssignment.h" #include "Parser/Statement/StatementReturn.h" @@ -61,6 +62,10 @@ shared_ptr Parser::nextStatement() { if (statement != nullptr || errors.size() > errorsCount) return statement; + statement = matchStatementRawFunction(); + if (statement != nullptr || errors.size() > errorsCount) + return statement; + statement = matchStatementVariable(); if (statement != nullptr || errors.size() > errorsCount) return statement; @@ -230,6 +235,38 @@ shared_ptr Parser::matchStatementFunction() { return make_shared(name, arguments, returnType, dynamic_pointer_cast(statementBlock)); } +shared_ptr Parser::matchStatementRawFunction() { + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::RAW_FUNCTION}, true, false)) + return nullptr; + + string name; + string rawSource; + + // name + name = tokens.at(currentIndex++)->getLexme(); + currentIndex++; // skip raw + + // consume new line + if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) { + markError(TokenKind::NEW_LINE, {}); + return nullptr; + } + + // source + while (tryMatchingTokenKinds({TokenKind::RAW_SOURCE_LINE}, true, false)) { + if (!rawSource.empty()) + rawSource += "\n"; + rawSource += tokens.at(currentIndex++)->getLexme(); + } + + if(!tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true)) { + markError(TokenKind::SEMICOLON, {}); + return nullptr; + } + + return make_shared(name, rawSource); +} + shared_ptr Parser::matchStatementBlock(vector terminalTokenKinds) { vector> statements; diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index 952a087..fdfe4a2 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -26,6 +26,7 @@ private: shared_ptr matchStatementMetaExternFunction(); shared_ptr matchStatementVariable(); shared_ptr matchStatementFunction(); + shared_ptr matchStatementRawFunction(); shared_ptr matchStatementBlock(vector terminalTokenKinds); shared_ptr matchStatementAssignment(); From 228dd804236192769b7b94b0389dfa225669ac5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Sun, 13 Jul 2025 15:06:52 +0900 Subject: [PATCH 3/7] Build raw function --- samples/test.brc | 6 +++-- src/Compiler/ModuleBuilder.cpp | 40 +++++++++++++++++++++------------- src/Compiler/ModuleBuilder.h | 6 ++--- src/Parser/Parser.cpp | 3 +++ 4 files changed, 35 insertions(+), 20 deletions(-) diff --git a/samples/test.brc b/samples/test.brc index e6a4100..ec9d515 100644 --- a/samples/test.brc +++ b/samples/test.brc @@ -25,8 +25,10 @@ i u32 <- 0, rep text[i] != 0: ;*/ rawAdd raw - mov eax, 5 - mov ebx, 42 + //push rbx + mov ebx, 5 + //pop rbx + mov eax, 42 add eax, ebx ; diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index dcb14c2..979efa3 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -131,8 +131,11 @@ void ModuleBuilder::buildFunctionDeclaration(shared_ptr state } void ModuleBuilder::buildRawFunction(shared_ptr statement) { - llvm::FunctionType *funType = llvm::FunctionType::get(llvm::Type::getVoidTy(*context), nullptr, false); - llvm::InlineAsm *rawFun = llvm::InlineAsm::get(funType, statement->getRawSource(), "", false, false, llvm::InlineAsm::AsmDialect::AD_Intel); + // get argument types + vector types; + + llvm::FunctionType *funType = llvm::FunctionType::get(llvm::Type::getVoidTy(*context), types, false); + llvm::InlineAsm *rawFun = llvm::InlineAsm::get(funType, statement->getRawSource(), "~{ebx}", false, false, llvm::InlineAsm::AsmDialect::AD_Intel); if (!setRawFun(statement->getName(), rawFun)) return; @@ -508,15 +511,24 @@ llvm::Value *ModuleBuilder::valueForVar(shared_ptr expressio llvm::Value *ModuleBuilder::valueForCall(shared_ptr expression) { llvm::Function *fun = getFun(expression->getName()); - if (fun == nullptr) - return nullptr; - llvm::FunctionType *funType = fun->getFunctionType(); - vector argValues; - for (shared_ptr &argumentExpression : expression->getArgumentExpressions()) { - llvm::Value *argValue = valueForExpression(argumentExpression); - argValues.push_back(argValue); + if (fun != nullptr) { + llvm::FunctionType *funType = fun->getFunctionType(); + vector argValues; + for (shared_ptr &argumentExpression : expression->getArgumentExpressions()) { + llvm::Value *argValue = valueForExpression(argumentExpression); + argValues.push_back(argValue); + } + return builder->CreateCall(funType, fun, llvm::ArrayRef(argValues)); } - return builder->CreateCall(funType, fun, llvm::ArrayRef(argValues)); + + llvm::InlineAsm *rawFun = getRawFun(expression->getName()); + if (rawFun != nullptr) { + vectorargValues; + return builder->CreateCall(rawFun, llvm::ArrayRef(argValues)); + } + + markError(0, 0, format("Function \"{}\" not defined in scope", expression->getName())); + return nullptr; } bool ModuleBuilder::setAlloca(string name, llvm::AllocaInst *alloca) { @@ -563,11 +575,10 @@ llvm::Function* ModuleBuilder::getFun(string name) { scopes.pop(); } - markError(0, 0, format("Function \"{}\" not defined in scope", name)); return nullptr; } -bool ModuleBuilder::setRawFun(string name, llvm::Value *rawFun) { +bool ModuleBuilder::setRawFun(string name, llvm::InlineAsm *rawFun) { if (scopes.top().rawFunMap[name] != nullptr) { markError(0, 0, format("Raw function \"{}\" already defined in scope", name)); return false; @@ -577,17 +588,16 @@ bool ModuleBuilder::setRawFun(string name, llvm::Value *rawFun) { return true; } -llvm::Value *ModuleBuilder::getRawFun(string name) { +llvm::InlineAsm *ModuleBuilder::getRawFun(string name) { stack scopes = this->scopes; while (!scopes.empty()) { - llvm::Value *rawFun = scopes.top().rawFunMap[name]; + llvm::InlineAsm *rawFun = scopes.top().rawFunMap[name]; if (rawFun != nullptr) return rawFun; scopes.pop(); } - markError(0, 0, format("Raw function \"{}\" not defined in scope", name)); return nullptr; } diff --git a/src/Compiler/ModuleBuilder.h b/src/Compiler/ModuleBuilder.h index 2631e54..6518f6e 100644 --- a/src/Compiler/ModuleBuilder.h +++ b/src/Compiler/ModuleBuilder.h @@ -41,7 +41,7 @@ using namespace std; typedef struct { map allocaMap; map funMap; - map rawFunMap; + map rawFunMap; } Scope; class ModuleBuilder { @@ -92,8 +92,8 @@ private: bool setFun(string name, llvm::Function *fun); llvm::Function *getFun(string name); - bool setRawFun(string name, llvm::Value *rawFun); - llvm::Value *getRawFun(string name); + bool setRawFun(string name, llvm::InlineAsm *rawFun); + llvm::InlineAsm *getRawFun(string name); llvm::Type *typeForValueType(shared_ptr valueType, int count = 0); diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 62bb8f7..6759a14 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -257,6 +257,9 @@ shared_ptr Parser::matchStatementRawFunction() { if (!rawSource.empty()) rawSource += "\n"; rawSource += tokens.at(currentIndex++)->getLexme(); + + // Consume optional new line (for example because of a comment) + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); } if(!tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true)) { From 5616036c17507df465e30032369b522a79fc8579 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Mon, 14 Jul 2025 21:40:18 +0900 Subject: [PATCH 4/7] Pass in constraints --- samples/test.brc | 4 ++-- src/Compiler/ModuleBuilder.cpp | 2 +- src/Parser/Parser.cpp | 16 +++++++++++++++- src/Parser/Statement/StatementRawFunction.cpp | 8 ++++++-- src/Parser/Statement/StatementRawFunction.h | 4 +++- 5 files changed, 27 insertions(+), 7 deletions(-) diff --git a/samples/test.brc b/samples/test.brc index ec9d515..ed136ac 100644 --- a/samples/test.brc +++ b/samples/test.brc @@ -20,11 +20,11 @@ i u32 <- 0, rep text[i] != 0: // text data <- "Hello world!" -/*addStuff asm<+r, r>: num1 u32, num2 u32 -> u32 +/*addStuff asm<"+r, r">: num1 u32, num2 u32 -> u32 add $1, $0 ;*/ -rawAdd raw +rawAdd raw<"~{ebx}"> //push rbx mov ebx, 5 //pop rbx diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index 979efa3..d9b5194 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -135,7 +135,7 @@ void ModuleBuilder::buildRawFunction(shared_ptr statement) vector types; llvm::FunctionType *funType = llvm::FunctionType::get(llvm::Type::getVoidTy(*context), types, false); - llvm::InlineAsm *rawFun = llvm::InlineAsm::get(funType, statement->getRawSource(), "~{ebx}", false, false, llvm::InlineAsm::AsmDialect::AD_Intel); + llvm::InlineAsm *rawFun = llvm::InlineAsm::get(funType, statement->getRawSource(), statement->getConstraints(), false, false, llvm::InlineAsm::AsmDialect::AD_Intel); if (!setRawFun(statement->getName(), rawFun)) return; diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 6759a14..54c4e1f 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -240,12 +240,26 @@ shared_ptr Parser::matchStatementRawFunction() { return nullptr; string name; + string constraints; string rawSource; // name name = tokens.at(currentIndex++)->getLexme(); currentIndex++; // skip raw + // constraints + + if (tryMatchingTokenKinds({TokenKind::LESS}, true, true)) { + if (tokens.at(currentIndex)->isOfKind({TokenKind::STRING})) { + constraints = tokens.at(currentIndex++)->getLexme(); + // remove enclosing quotes + if (constraints.length() >= 2) + constraints = constraints.substr(1, constraints.length() - 2); + } + if (!tryMatchingTokenKinds({TokenKind::GREATER}, true, true)) + markError({TokenKind::GREATER}, {}); + } + // consume new line if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) { markError(TokenKind::NEW_LINE, {}); @@ -267,7 +281,7 @@ shared_ptr Parser::matchStatementRawFunction() { return nullptr; } - return make_shared(name, rawSource); + return make_shared(name, constraints, rawSource); } shared_ptr Parser::matchStatementBlock(vector terminalTokenKinds) { diff --git a/src/Parser/Statement/StatementRawFunction.cpp b/src/Parser/Statement/StatementRawFunction.cpp index e40d9da..5369a42 100644 --- a/src/Parser/Statement/StatementRawFunction.cpp +++ b/src/Parser/Statement/StatementRawFunction.cpp @@ -1,12 +1,16 @@ #include "StatementRawFunction.h" -StatementRawFunction::StatementRawFunction(string name, string rawSource): -Statement(StatementKind::RAW_FUNCTION), name(name), rawSource(rawSource) { } +StatementRawFunction::StatementRawFunction(string name, string constraints, string rawSource): +Statement(StatementKind::RAW_FUNCTION), name(name), constraints(constraints), rawSource(rawSource) { } string StatementRawFunction::getName() { return name; } +string StatementRawFunction::getConstraints() { + return constraints; +} + string StatementRawFunction::getRawSource() { return rawSource; } \ No newline at end of file diff --git a/src/Parser/Statement/StatementRawFunction.h b/src/Parser/Statement/StatementRawFunction.h index 1a51566..989e5ea 100644 --- a/src/Parser/Statement/StatementRawFunction.h +++ b/src/Parser/Statement/StatementRawFunction.h @@ -5,10 +5,12 @@ class Expression; class StatementRawFunction: public Statement { private: string name; + string constraints; string rawSource; public: - StatementRawFunction(string name, string rawSource); + StatementRawFunction(string name, string constraints, string rawSource); string getName(); + string getConstraints(); string getRawSource(); }; \ No newline at end of file From 51115f5883c5b6d7fd5c8e676fdf30e1f00444a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 15 Jul 2025 10:23:32 +0900 Subject: [PATCH 5/7] Added args and return type --- src/Compiler/ModuleBuilder.cpp | 26 +++++-------------- src/Parser/Parser.cpp | 8 +++++- src/Parser/Statement/StatementRawFunction.cpp | 12 +++++++-- src/Parser/Statement/StatementRawFunction.h | 8 ++++-- 4 files changed, 30 insertions(+), 24 deletions(-) diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index d9b5194..e06d831 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -131,29 +131,17 @@ void ModuleBuilder::buildFunctionDeclaration(shared_ptr state } void ModuleBuilder::buildRawFunction(shared_ptr statement) { - // get argument types - vector types; + // function types + llvm::Type *returnType = typeForValueType(statement->getReturnValueType()); + vector argTypes; + for (pair> &arg : statement->getArguments()) + argTypes.push_back(typeForValueType(arg.second)); - llvm::FunctionType *funType = llvm::FunctionType::get(llvm::Type::getVoidTy(*context), types, false); + // function declaration & body + llvm::FunctionType *funType = llvm::FunctionType::get(returnType, argTypes, false); llvm::InlineAsm *rawFun = llvm::InlineAsm::get(funType, statement->getRawSource(), statement->getConstraints(), false, false, llvm::InlineAsm::AsmDialect::AD_Intel); if (!setRawFun(statement->getName(), rawFun)) return; - - /*int res; - int a = 42; - int b = 13; - - vector types; - types.push_back(typeSint32); - types.push_back(typeSint32); - llvm::FunctionType *asmType = llvm::FunctionType::get(typeSint32, types, false); - llvm::InlineAsm *asmm = llvm::InlineAsm::get(asmType, "add $0, $1", "+{ebx},i", false, false, llvm::InlineAsm::AsmDialect::AD_Intel); - - vectorargValues; - argValues.push_back(llvm::ConstantInt::get(typeSint32, 5, true)); - argValues.push_back(llvm::ConstantInt::get(typeSint32, 4, true)); - - llvm::Value *valu = builder->CreateCall(asmm, llvm::ArrayRef(argValues));*/ } void ModuleBuilder::buildVarDeclaration(shared_ptr statement) { diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 54c4e1f..c012bf6 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -241,6 +241,8 @@ shared_ptr Parser::matchStatementRawFunction() { string name; string constraints; + vector>> arguments; + shared_ptr returnType = ValueType::NONE; string rawSource; // name @@ -260,6 +262,10 @@ shared_ptr Parser::matchStatementRawFunction() { markError({TokenKind::GREATER}, {}); } + // arguments + + // return type + // consume new line if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) { markError(TokenKind::NEW_LINE, {}); @@ -281,7 +287,7 @@ shared_ptr Parser::matchStatementRawFunction() { return nullptr; } - return make_shared(name, constraints, rawSource); + return make_shared(name, constraints, arguments, returnType, rawSource); } shared_ptr Parser::matchStatementBlock(vector terminalTokenKinds) { diff --git a/src/Parser/Statement/StatementRawFunction.cpp b/src/Parser/Statement/StatementRawFunction.cpp index 5369a42..6417c6a 100644 --- a/src/Parser/Statement/StatementRawFunction.cpp +++ b/src/Parser/Statement/StatementRawFunction.cpp @@ -1,7 +1,7 @@ #include "StatementRawFunction.h" -StatementRawFunction::StatementRawFunction(string name, string constraints, string rawSource): -Statement(StatementKind::RAW_FUNCTION), name(name), constraints(constraints), rawSource(rawSource) { } +StatementRawFunction::StatementRawFunction(string name, string constraints, vector>> arguments, shared_ptr returnValueType, string rawSource): +Statement(StatementKind::RAW_FUNCTION), name(name), constraints(constraints), arguments(arguments), returnValueType(returnValueType), rawSource(rawSource) { } string StatementRawFunction::getName() { return name; @@ -11,6 +11,14 @@ string StatementRawFunction::getConstraints() { return constraints; } +vector>> StatementRawFunction::getArguments() { + return arguments; +} + +shared_ptr StatementRawFunction::getReturnValueType() { + return returnValueType; +} + string StatementRawFunction::getRawSource() { return rawSource; } \ No newline at end of file diff --git a/src/Parser/Statement/StatementRawFunction.h b/src/Parser/Statement/StatementRawFunction.h index 989e5ea..372592f 100644 --- a/src/Parser/Statement/StatementRawFunction.h +++ b/src/Parser/Statement/StatementRawFunction.h @@ -1,16 +1,20 @@ #include "Parser/Statement/Statement.h" -class Expression; +class ValueType; class StatementRawFunction: public Statement { private: string name; string constraints; + vector>> arguments; + shared_ptr returnValueType; string rawSource; public: - StatementRawFunction(string name, string constraints, string rawSource); + StatementRawFunction(string name, string constraints, vector>> arguments, shared_ptr returnValueType, string rawSource); string getName(); string getConstraints(); + vector>> getArguments(); + shared_ptr getReturnValueType(); string getRawSource(); }; \ No newline at end of file From 9d991f46a25544651cdc636e3e6823bb40d73bd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 15 Jul 2025 10:27:50 +0900 Subject: [PATCH 6/7] Renamed --- src/Compiler/ModuleBuilder.cpp | 20 ++++++++++---------- src/Compiler/ModuleBuilder.h | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index e06d831..36adc31 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -53,7 +53,7 @@ shared_ptr ModuleBuilder::getModule() { void ModuleBuilder::buildStatement(shared_ptr statement) { switch (statement->getKind()) { case StatementKind::FUNCTION: - buildFunctionDeclaration(dynamic_pointer_cast(statement)); + buildFunction(dynamic_pointer_cast(statement)); break; case StatementKind::RAW_FUNCTION: buildRawFunction(dynamic_pointer_cast(statement)); @@ -84,15 +84,15 @@ void ModuleBuilder::buildStatement(shared_ptr statement) { } } -void ModuleBuilder::buildFunctionDeclaration(shared_ptr statement) { - // get argument types - vector types; - for (pair> &arg : statement->getArguments()) { - types.push_back(typeForValueType(arg.second)); - } +void ModuleBuilder::buildFunction(shared_ptr statement) { + // function types + llvm::Type *returnType = typeForValueType(statement->getReturnValueType()); + vector argTypes; + for (pair> &arg : statement->getArguments()) + argTypes.push_back(typeForValueType(arg.second)); // build function declaration - llvm::FunctionType *funType = llvm::FunctionType::get(typeForValueType(statement->getReturnValueType()), types, false); + llvm::FunctionType *funType = llvm::FunctionType::get(returnType, argTypes, false); llvm::Function *fun = llvm::Function::Create(funType, llvm::GlobalValue::ExternalLinkage, statement->getName(), module.get()); if (!setFun(statement->getName(), fun)) return; @@ -107,7 +107,7 @@ void ModuleBuilder::buildFunctionDeclaration(shared_ptr state int i=0; for (auto &arg : fun->args()) { string name = statement->getArguments()[i].first; - llvm::Type *type = types[i]; + llvm::Type *type = argTypes[i]; arg.setName(name); llvm::AllocaInst *alloca = builder->CreateAlloca(type, nullptr, name); @@ -137,7 +137,7 @@ void ModuleBuilder::buildRawFunction(shared_ptr statement) for (pair> &arg : statement->getArguments()) argTypes.push_back(typeForValueType(arg.second)); - // function declaration & body + // build function declaration & body llvm::FunctionType *funType = llvm::FunctionType::get(returnType, argTypes, false); llvm::InlineAsm *rawFun = llvm::InlineAsm::get(funType, statement->getRawSource(), statement->getConstraints(), false, false, llvm::InlineAsm::AsmDialect::AD_Intel); if (!setRawFun(statement->getName(), rawFun)) diff --git a/src/Compiler/ModuleBuilder.h b/src/Compiler/ModuleBuilder.h index 6518f6e..3378856 100644 --- a/src/Compiler/ModuleBuilder.h +++ b/src/Compiler/ModuleBuilder.h @@ -63,7 +63,7 @@ private: stack scopes; void buildStatement(shared_ptr statement); - void buildFunctionDeclaration(shared_ptr statement); + void buildFunction(shared_ptr statement); void buildRawFunction(shared_ptr statement); void buildVarDeclaration(shared_ptr statement); void buildAssignment(shared_ptr statement); From fbc71f4a31f34e3bc86b85992d3b9ed92ef9e1bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= Date: Tue, 15 Jul 2025 13:10:03 +0900 Subject: [PATCH 7/7] Arguments for raw calls --- samples/test.brc | 28 +++++++++++++++++++++------- src/Compiler/ModuleBuilder.cpp | 12 +++++++++++- src/Lexer/Lexer.cpp | 2 +- src/Logger.cpp | 8 +++++++- src/Parser/Parser.cpp | 26 ++++++++++++++++++++++++++ 5 files changed, 66 insertions(+), 10 deletions(-) diff --git a/samples/test.brc b/samples/test.brc index ed136ac..64e0a6e 100644 --- a/samples/test.brc +++ b/samples/test.brc @@ -24,16 +24,30 @@ i u32 <- 0, rep text[i] != 0: add $1, $0 ;*/ -rawAdd raw<"~{ebx}"> - //push rbx - mov ebx, 5 - //pop rbx - mov eax, 42 - add eax, ebx +normAdd fun: num1 sint32, num2 sint32 -> sint32 + ret num1 + num2 ; +rawAdd raw<"=r,r,r">: num1 sint32, num2 sint32 -> sint32 + add $1, $2 + mov $0, $1 +; + +/*printChar raw + .global REGISTER + .text + .REGISTER: + .byte "Hello", 0xa0 + .long RegisterTable + //push 0x21 + +;*/ + main fun -> sint32 - rawAdd() + //printChar() + + res1 sint32 <- normAdd(4, 5) + res2 sint32 <- rawAdd(4, 5) ret 0 ; \ No newline at end of file diff --git a/src/Compiler/ModuleBuilder.cpp b/src/Compiler/ModuleBuilder.cpp index 36adc31..525652c 100644 --- a/src/Compiler/ModuleBuilder.cpp +++ b/src/Compiler/ModuleBuilder.cpp @@ -41,6 +41,12 @@ shared_ptr ModuleBuilder::getModule() { for (shared_ptr &statement : statements) buildStatement(statement); + // verify module + string errorMessage; + llvm::raw_string_ostream llvmErrorMessage(errorMessage); + if (llvm::verifyModule(*module, &llvmErrorMessage)) + markError(0, 0, errorMessage); + if (!errors.empty()) { for (shared_ptr &error : errors) Logger::print(error); @@ -139,7 +145,7 @@ void ModuleBuilder::buildRawFunction(shared_ptr statement) // build function declaration & body llvm::FunctionType *funType = llvm::FunctionType::get(returnType, argTypes, false); - llvm::InlineAsm *rawFun = llvm::InlineAsm::get(funType, statement->getRawSource(), statement->getConstraints(), false, false, llvm::InlineAsm::AsmDialect::AD_Intel); + llvm::InlineAsm *rawFun = llvm::InlineAsm::get(funType, statement->getRawSource(), statement->getConstraints(), true, false, llvm::InlineAsm::AsmDialect::AD_Intel); if (!setRawFun(statement->getName(), rawFun)) return; } @@ -512,6 +518,10 @@ llvm::Value *ModuleBuilder::valueForCall(shared_ptr expression) llvm::InlineAsm *rawFun = getRawFun(expression->getName()); if (rawFun != nullptr) { vectorargValues; + for (shared_ptr &argumentExpression : expression->getArgumentExpressions()) { + llvm::Value *argValue = valueForExpression(argumentExpression); + argValues.push_back(argValue); + } return builder->CreateCall(rawFun, llvm::ArrayRef(argValues)); } diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index 2985d2f..0706245 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -482,7 +482,7 @@ void Lexer::tryStartingRawSourceParsing() { if (!foundRawSourceStart) return; - if (!tokens.at(tokens.size() - 2)->isOfKind({TokenKind::COLON, TokenKind::COMMA, TokenKind::RIGHT_ARROW})) { + if (!tokens.at(tokens.size() - 1)->isOfKind({TokenKind::COLON, TokenKind::COMMA, TokenKind::RIGHT_ARROW})) { foundRawSourceStart = false; isParsingRawSource = true; } diff --git a/src/Logger.cpp b/src/Logger.cpp index 8e22542..b8f85e0 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -274,7 +274,13 @@ string Logger::toString(shared_ptr statement) { string Logger::toString(shared_ptr statement) { string text; - text += format("RAW(\"{}\"):\n", statement->getName()); + string argsString; + for (int i = 0; i < statement->getArguments().size(); i++) { + auto arg = statement->getArguments().at(i); + argsString += format("ARG({}, {})", arg.first, toString(arg.second)); + } + text += format("RAW(\"{}\"|{}|{}):\n", statement->getName(), argsString, toString(statement->getReturnValueType())); + text += statement->getRawSource(); return text; diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index c012bf6..9852fa2 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -263,8 +263,34 @@ shared_ptr Parser::matchStatementRawFunction() { } // arguments + if (tryMatchingTokenKinds({TokenKind::COLON}, true, true)) { + do { + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::TYPE}, true, false)) { + markError({}, "Expected function argument"); + return nullptr; + } + shared_ptr identifierToken = tokens.at(currentIndex++); + shared_ptr argumentType = matchValueType(); + if (argumentType == nullptr) { + markError(TokenKind::TYPE, {}); + return nullptr; + } + + arguments.push_back(pair>(identifierToken->getLexme(), argumentType)); + } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); + } // return type + if (tryMatchingTokenKinds({TokenKind::RIGHT_ARROW}, true, true)) { + tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true); // skip new line + + returnType = matchValueType(); + if (returnType == nullptr) { + markError(TokenKind::TYPE, {}); + return nullptr; + } + } // consume new line if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) {