diff --git a/samples/test.brc b/samples/test.brc index b9222ca..e6a4100 100644 --- a/samples/test.brc +++ b/samples/test.brc @@ -24,10 +24,14 @@ i u32 <- 0, rep text[i] != 0: add $1, $0 ;*/ +rawAdd raw + mov eax, 5 + mov ebx, 42 + add eax, ebx +; + main fun -> sint32 - //text data <- "Hello string!\n" - abc sint32 <- 0 - //addStuff() + rawAdd() ret 0 ; \ No newline at end of file diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index c00f19c..2985d2f 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -12,6 +12,8 @@ vector> Lexer::getTokens() { currentIndex = 0; currentLine = 0; currentColumn = 0; + foundRawSourceStart = false; + isParsingRawSource = false; tokens.clear(); errors.clear(); @@ -117,6 +119,11 @@ shared_ptr Lexer::nextToken() { return nextToken(); // gets rid of remaining white spaces without repeating the code } + // raw source + token = matchRawSourceLine(); + if (token != nullptr) + return token; + // structural token = match(TokenKind::LEFT_PAREN, "(", false); if (token != nullptr) @@ -212,6 +219,12 @@ shared_ptr Lexer::nextToken() { token = match(TokenKind::FUNCTION, "fun", true); if (token != nullptr) return token; + + token = match(TokenKind::RAW_FUNCTION, "raw", true); + if (token != nullptr) { + foundRawSourceStart = true; + return token; + } token = match(TokenKind::RETURN, "ret", true); if (token != nullptr) @@ -271,8 +284,10 @@ shared_ptr Lexer::nextToken() { // new line token = match(TokenKind::NEW_LINE, "\n", false); - if (token != nullptr) + if (token != nullptr) { + tryStartingRawSourceParsing(); return token; + } // eof token = matchEnd(); @@ -430,21 +445,6 @@ shared_ptr Lexer::matchString() { return token; } -shared_ptr Lexer::matchIdentifier() { - int nextIndex = currentIndex; - - while (nextIndex < source.length() && isIdentifier(nextIndex)) - nextIndex++; - - if (nextIndex == currentIndex || !isSeparator(nextIndex)) - return nullptr; - - string lexme = source.substr(currentIndex, nextIndex - currentIndex); - shared_ptr token = make_shared(TokenKind::IDENTIFIER, lexme, currentLine, currentColumn); - advanceWithToken(token); - return token; -} - shared_ptr Lexer::matchType() { int nextIndex = currentIndex; @@ -463,6 +463,52 @@ shared_ptr Lexer::matchType() { return token; } +shared_ptr Lexer::matchIdentifier() { + int nextIndex = currentIndex; + + while (nextIndex < source.length() && isIdentifier(nextIndex)) + nextIndex++; + + if (nextIndex == currentIndex || !isSeparator(nextIndex)) + return nullptr; + + string lexme = source.substr(currentIndex, nextIndex - currentIndex); + shared_ptr token = make_shared(TokenKind::IDENTIFIER, lexme, currentLine, currentColumn); + advanceWithToken(token); + return token; +} + +void Lexer::tryStartingRawSourceParsing() { + if (!foundRawSourceStart) + return; + + if (!tokens.at(tokens.size() - 2)->isOfKind({TokenKind::COLON, TokenKind::COMMA, TokenKind::RIGHT_ARROW})) { + foundRawSourceStart = false; + isParsingRawSource = true; + } +} + +shared_ptr Lexer::matchRawSourceLine() { + int nextIndex = currentIndex; + + if (!isParsingRawSource) + return nullptr; + + if (source.at(nextIndex) == ';') { + isParsingRawSource = false; + return nullptr; + } + + while (source.at(nextIndex) != '\n') + nextIndex++; + + string lexme = source.substr(currentIndex, nextIndex - currentIndex); + shared_ptr token = make_shared(TokenKind::RAW_SOURCE_LINE, lexme, currentLine, currentColumn); + advanceWithToken(token); + currentIndex++; // skip newline + return token; +} + shared_ptr Lexer::matchEnd() { if (currentIndex >= source.length()) return make_shared(TokenKind::END, "", currentLine, currentColumn); @@ -530,11 +576,15 @@ bool Lexer::isSeparator(int index) { } void Lexer::advanceWithToken(shared_ptr token) { - if (token->getKind() == TokenKind::NEW_LINE) { - currentLine++; - currentColumn = 0; - } else { - currentColumn += token->getLexme().length(); + switch (token->getKind()) { + case TokenKind::NEW_LINE: + case TokenKind::RAW_SOURCE_LINE: + currentLine++; + currentColumn = 0; + break; + default: + currentColumn += token->getLexme().length(); + break; } currentIndex += token->getLexme().length(); } diff --git a/src/Lexer/Lexer.h b/src/Lexer/Lexer.h index 9cc8cc6..805ad50 100644 --- a/src/Lexer/Lexer.h +++ b/src/Lexer/Lexer.h @@ -17,6 +17,8 @@ private: int currentColumn; vector> tokens; vector> errors; + bool foundRawSourceStart; + bool isParsingRawSource; shared_ptr nextToken(); shared_ptr match(TokenKind kind, string lexme, bool needsSeparator); @@ -28,6 +30,8 @@ private: shared_ptr matchString(); shared_ptr matchType(); shared_ptr matchIdentifier(); + void tryStartingRawSourceParsing(); + shared_ptr matchRawSourceLine(); shared_ptr matchEnd(); bool isWhiteSpace(int index); diff --git a/src/Lexer/Token.h b/src/Lexer/Token.h index 2c4ce2e..c6ee86a 100644 --- a/src/Lexer/Token.h +++ b/src/Lexer/Token.h @@ -30,6 +30,8 @@ enum class TokenKind { RIGHT_ARROW, FUNCTION, + RAW_FUNCTION, + RAW_SOURCE_LINE, RETURN, REPEAT, IF, diff --git a/src/Logger.cpp b/src/Logger.cpp index 9055750..8e22542 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -11,6 +11,7 @@ #include "Parser/Statement/StatementMetaExternFunction.h" #include "Parser/Statement/StatementVariable.h" #include "Parser/Statement/StatementFunction.h" +#include "Parser/Statement/StatementRawFunction.h" #include "Parser/Statement/StatementBlock.h" #include "Parser/Statement/StatementAssignment.h" #include "Parser/Statement/StatementReturn.h" @@ -97,6 +98,10 @@ string Logger::toString(shared_ptr token) { return "ELSE"; case TokenKind::FUNCTION: return "FUN"; + case TokenKind::RAW_FUNCTION: + return "RAW"; + case TokenKind::RAW_SOURCE_LINE: + return format("RAW_SOURCE_LINE({})", token->getLexme()); case TokenKind::RETURN: return "RET"; case TokenKind::REPEAT: @@ -179,6 +184,8 @@ string Logger::toString(TokenKind tokenKind) { return "ELSE"; case TokenKind::FUNCTION: return "FUN"; + case TokenKind::RAW_FUNCTION: + return "RAW"; case TokenKind::RETURN: return "RET"; case TokenKind::REPEAT: @@ -217,6 +224,8 @@ string Logger::toString(shared_ptr statement) { return toString(dynamic_pointer_cast(statement)); case StatementKind::FUNCTION: return toString(dynamic_pointer_cast(statement)); + case StatementKind::RAW_FUNCTION: + return toString(dynamic_pointer_cast(statement)); case StatementKind::BLOCK: return toString(dynamic_pointer_cast(statement)); case StatementKind::ASSIGNMENT: @@ -262,6 +271,15 @@ string Logger::toString(shared_ptr statement) { return text; } +string Logger::toString(shared_ptr statement) { + string text; + + text += format("RAW(\"{}\"):\n", statement->getName()); + text += statement->getRawSource(); + + return text; +} + string Logger::toString(shared_ptr statement) { string text; @@ -468,13 +486,13 @@ void Logger::print(shared_ptr error) { if (expectedTokenKind) { message = format( - "Expected token {} but instead found \"{}\" at line: {}, column: {}", - toString(*expectedTokenKind), token->getLexme(), token->getLine() + 1, token->getColumn() + 1 + "Expected token {} but instead found {} at line: {}, column: {}", + toString(*expectedTokenKind), toString(token), token->getLine() + 1, token->getColumn() + 1 ); } else { message = format( "Unexpected token \"{}\" found at line: {}, column: {}", - token->getLexme(), token->getLine() + 1, token->getColumn() + 1 + toString(token), token->getLine() + 1, token->getColumn() + 1 ); } if (errorMessage) diff --git a/src/Logger.h b/src/Logger.h index d5f1eba..3101911 100644 --- a/src/Logger.h +++ b/src/Logger.h @@ -11,6 +11,7 @@ class Statement; class StatementMetaExternFunction; class StatementVariable; class StatementFunction; +class StatementRawFunction; class StatementBlock; class StatementAssignment; class StatementReturn; @@ -41,6 +42,7 @@ private: static string toString(shared_ptr statement); static string toString(shared_ptr statement); static string toString(shared_ptr statement); + static string toString(shared_ptr statement); static string toString(shared_ptr statement); static string toString(shared_ptr statement); static string toString(shared_ptr statement); diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index c8922ed..62bb8f7 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -16,6 +16,7 @@ #include "Parser/Expression/ExpressionBlock.h" #include "Parser/Statement/StatementFunction.h" +#include "Parser/Statement/StatementRawFunction.h" #include "Parser/Statement/StatementVariable.h" #include "Parser/Statement/StatementAssignment.h" #include "Parser/Statement/StatementReturn.h" @@ -61,6 +62,10 @@ shared_ptr Parser::nextStatement() { if (statement != nullptr || errors.size() > errorsCount) return statement; + statement = matchStatementRawFunction(); + if (statement != nullptr || errors.size() > errorsCount) + return statement; + statement = matchStatementVariable(); if (statement != nullptr || errors.size() > errorsCount) return statement; @@ -230,6 +235,38 @@ shared_ptr Parser::matchStatementFunction() { return make_shared(name, arguments, returnType, dynamic_pointer_cast(statementBlock)); } +shared_ptr Parser::matchStatementRawFunction() { + if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::RAW_FUNCTION}, true, false)) + return nullptr; + + string name; + string rawSource; + + // name + name = tokens.at(currentIndex++)->getLexme(); + currentIndex++; // skip raw + + // consume new line + if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) { + markError(TokenKind::NEW_LINE, {}); + return nullptr; + } + + // source + while (tryMatchingTokenKinds({TokenKind::RAW_SOURCE_LINE}, true, false)) { + if (!rawSource.empty()) + rawSource += "\n"; + rawSource += tokens.at(currentIndex++)->getLexme(); + } + + if(!tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true)) { + markError(TokenKind::SEMICOLON, {}); + return nullptr; + } + + return make_shared(name, rawSource); +} + shared_ptr Parser::matchStatementBlock(vector terminalTokenKinds) { vector> statements; diff --git a/src/Parser/Parser.h b/src/Parser/Parser.h index 952a087..fdfe4a2 100644 --- a/src/Parser/Parser.h +++ b/src/Parser/Parser.h @@ -26,6 +26,7 @@ private: shared_ptr matchStatementMetaExternFunction(); shared_ptr matchStatementVariable(); shared_ptr matchStatementFunction(); + shared_ptr matchStatementRawFunction(); shared_ptr matchStatementBlock(vector terminalTokenKinds); shared_ptr matchStatementAssignment();