Parse raw function

This commit is contained in:
Rafał Grodziński
2025-07-13 11:12:35 +09:00
parent c7812ccf43
commit 26c566f4f6
8 changed files with 145 additions and 27 deletions

View File

@@ -24,10 +24,14 @@ i u32 <- 0, rep text[i] != 0:
add $1, $0 add $1, $0
;*/ ;*/
rawAdd raw
mov eax, 5
mov ebx, 42
add eax, ebx
;
main fun -> sint32 main fun -> sint32
//text data<sint32> <- "Hello string!\n" rawAdd()
abc sint32 <- 0
//addStuff()
ret 0 ret 0
; ;

View File

@@ -12,6 +12,8 @@ vector<shared_ptr<Token>> Lexer::getTokens() {
currentIndex = 0; currentIndex = 0;
currentLine = 0; currentLine = 0;
currentColumn = 0; currentColumn = 0;
foundRawSourceStart = false;
isParsingRawSource = false;
tokens.clear(); tokens.clear();
errors.clear(); errors.clear();
@@ -117,6 +119,11 @@ shared_ptr<Token> Lexer::nextToken() {
return nextToken(); // gets rid of remaining white spaces without repeating the code return nextToken(); // gets rid of remaining white spaces without repeating the code
} }
// raw source
token = matchRawSourceLine();
if (token != nullptr)
return token;
// structural // structural
token = match(TokenKind::LEFT_PAREN, "(", false); token = match(TokenKind::LEFT_PAREN, "(", false);
if (token != nullptr) if (token != nullptr)
@@ -213,6 +220,12 @@ shared_ptr<Token> Lexer::nextToken() {
if (token != nullptr) if (token != nullptr)
return token; return token;
token = match(TokenKind::RAW_FUNCTION, "raw", true);
if (token != nullptr) {
foundRawSourceStart = true;
return token;
}
token = match(TokenKind::RETURN, "ret", true); token = match(TokenKind::RETURN, "ret", true);
if (token != nullptr) if (token != nullptr)
return token; return token;
@@ -271,8 +284,10 @@ shared_ptr<Token> Lexer::nextToken() {
// new line // new line
token = match(TokenKind::NEW_LINE, "\n", false); token = match(TokenKind::NEW_LINE, "\n", false);
if (token != nullptr) if (token != nullptr) {
tryStartingRawSourceParsing();
return token; return token;
}
// eof // eof
token = matchEnd(); token = matchEnd();
@@ -430,21 +445,6 @@ shared_ptr<Token> Lexer::matchString() {
return token; return token;
} }
shared_ptr<Token> Lexer::matchIdentifier() {
int nextIndex = currentIndex;
while (nextIndex < source.length() && isIdentifier(nextIndex))
nextIndex++;
if (nextIndex == currentIndex || !isSeparator(nextIndex))
return nullptr;
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
shared_ptr<Token> token = make_shared<Token>(TokenKind::IDENTIFIER, lexme, currentLine, currentColumn);
advanceWithToken(token);
return token;
}
shared_ptr<Token> Lexer::matchType() { shared_ptr<Token> Lexer::matchType() {
int nextIndex = currentIndex; int nextIndex = currentIndex;
@@ -463,6 +463,52 @@ shared_ptr<Token> Lexer::matchType() {
return token; return token;
} }
shared_ptr<Token> Lexer::matchIdentifier() {
int nextIndex = currentIndex;
while (nextIndex < source.length() && isIdentifier(nextIndex))
nextIndex++;
if (nextIndex == currentIndex || !isSeparator(nextIndex))
return nullptr;
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
shared_ptr<Token> token = make_shared<Token>(TokenKind::IDENTIFIER, lexme, currentLine, currentColumn);
advanceWithToken(token);
return token;
}
void Lexer::tryStartingRawSourceParsing() {
if (!foundRawSourceStart)
return;
if (!tokens.at(tokens.size() - 2)->isOfKind({TokenKind::COLON, TokenKind::COMMA, TokenKind::RIGHT_ARROW})) {
foundRawSourceStart = false;
isParsingRawSource = true;
}
}
shared_ptr<Token> Lexer::matchRawSourceLine() {
int nextIndex = currentIndex;
if (!isParsingRawSource)
return nullptr;
if (source.at(nextIndex) == ';') {
isParsingRawSource = false;
return nullptr;
}
while (source.at(nextIndex) != '\n')
nextIndex++;
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
shared_ptr<Token> token = make_shared<Token>(TokenKind::RAW_SOURCE_LINE, lexme, currentLine, currentColumn);
advanceWithToken(token);
currentIndex++; // skip newline
return token;
}
shared_ptr<Token> Lexer::matchEnd() { shared_ptr<Token> Lexer::matchEnd() {
if (currentIndex >= source.length()) if (currentIndex >= source.length())
return make_shared<Token>(TokenKind::END, "", currentLine, currentColumn); return make_shared<Token>(TokenKind::END, "", currentLine, currentColumn);
@@ -530,11 +576,15 @@ bool Lexer::isSeparator(int index) {
} }
void Lexer::advanceWithToken(shared_ptr<Token> token) { void Lexer::advanceWithToken(shared_ptr<Token> token) {
if (token->getKind() == TokenKind::NEW_LINE) { switch (token->getKind()) {
currentLine++; case TokenKind::NEW_LINE:
currentColumn = 0; case TokenKind::RAW_SOURCE_LINE:
} else { currentLine++;
currentColumn += token->getLexme().length(); currentColumn = 0;
break;
default:
currentColumn += token->getLexme().length();
break;
} }
currentIndex += token->getLexme().length(); currentIndex += token->getLexme().length();
} }

View File

@@ -17,6 +17,8 @@ private:
int currentColumn; int currentColumn;
vector<shared_ptr<Token>> tokens; vector<shared_ptr<Token>> tokens;
vector<shared_ptr<Error>> errors; vector<shared_ptr<Error>> errors;
bool foundRawSourceStart;
bool isParsingRawSource;
shared_ptr<Token> nextToken(); shared_ptr<Token> nextToken();
shared_ptr<Token> match(TokenKind kind, string lexme, bool needsSeparator); shared_ptr<Token> match(TokenKind kind, string lexme, bool needsSeparator);
@@ -28,6 +30,8 @@ private:
shared_ptr<Token> matchString(); shared_ptr<Token> matchString();
shared_ptr<Token> matchType(); shared_ptr<Token> matchType();
shared_ptr<Token> matchIdentifier(); shared_ptr<Token> matchIdentifier();
void tryStartingRawSourceParsing();
shared_ptr<Token> matchRawSourceLine();
shared_ptr<Token> matchEnd(); shared_ptr<Token> matchEnd();
bool isWhiteSpace(int index); bool isWhiteSpace(int index);

View File

@@ -30,6 +30,8 @@ enum class TokenKind {
RIGHT_ARROW, RIGHT_ARROW,
FUNCTION, FUNCTION,
RAW_FUNCTION,
RAW_SOURCE_LINE,
RETURN, RETURN,
REPEAT, REPEAT,
IF, IF,

View File

@@ -11,6 +11,7 @@
#include "Parser/Statement/StatementMetaExternFunction.h" #include "Parser/Statement/StatementMetaExternFunction.h"
#include "Parser/Statement/StatementVariable.h" #include "Parser/Statement/StatementVariable.h"
#include "Parser/Statement/StatementFunction.h" #include "Parser/Statement/StatementFunction.h"
#include "Parser/Statement/StatementRawFunction.h"
#include "Parser/Statement/StatementBlock.h" #include "Parser/Statement/StatementBlock.h"
#include "Parser/Statement/StatementAssignment.h" #include "Parser/Statement/StatementAssignment.h"
#include "Parser/Statement/StatementReturn.h" #include "Parser/Statement/StatementReturn.h"
@@ -97,6 +98,10 @@ string Logger::toString(shared_ptr<Token> token) {
return "ELSE"; return "ELSE";
case TokenKind::FUNCTION: case TokenKind::FUNCTION:
return "FUN"; return "FUN";
case TokenKind::RAW_FUNCTION:
return "RAW";
case TokenKind::RAW_SOURCE_LINE:
return format("RAW_SOURCE_LINE({})", token->getLexme());
case TokenKind::RETURN: case TokenKind::RETURN:
return "RET"; return "RET";
case TokenKind::REPEAT: case TokenKind::REPEAT:
@@ -179,6 +184,8 @@ string Logger::toString(TokenKind tokenKind) {
return "ELSE"; return "ELSE";
case TokenKind::FUNCTION: case TokenKind::FUNCTION:
return "FUN"; return "FUN";
case TokenKind::RAW_FUNCTION:
return "RAW";
case TokenKind::RETURN: case TokenKind::RETURN:
return "RET"; return "RET";
case TokenKind::REPEAT: case TokenKind::REPEAT:
@@ -217,6 +224,8 @@ string Logger::toString(shared_ptr<Statement> statement) {
return toString(dynamic_pointer_cast<StatementVariable>(statement)); return toString(dynamic_pointer_cast<StatementVariable>(statement));
case StatementKind::FUNCTION: case StatementKind::FUNCTION:
return toString(dynamic_pointer_cast<StatementFunction>(statement)); return toString(dynamic_pointer_cast<StatementFunction>(statement));
case StatementKind::RAW_FUNCTION:
return toString(dynamic_pointer_cast<StatementRawFunction>(statement));
case StatementKind::BLOCK: case StatementKind::BLOCK:
return toString(dynamic_pointer_cast<StatementBlock>(statement)); return toString(dynamic_pointer_cast<StatementBlock>(statement));
case StatementKind::ASSIGNMENT: case StatementKind::ASSIGNMENT:
@@ -262,6 +271,15 @@ string Logger::toString(shared_ptr<StatementFunction> statement) {
return text; return text;
} }
string Logger::toString(shared_ptr<StatementRawFunction> statement) {
string text;
text += format("RAW(\"{}\"):\n", statement->getName());
text += statement->getRawSource();
return text;
}
string Logger::toString(shared_ptr<StatementBlock> statement) { string Logger::toString(shared_ptr<StatementBlock> statement) {
string text; string text;
@@ -468,13 +486,13 @@ void Logger::print(shared_ptr<Error> error) {
if (expectedTokenKind) { if (expectedTokenKind) {
message = format( message = format(
"Expected token {} but instead found \"{}\" at line: {}, column: {}", "Expected token {} but instead found {} at line: {}, column: {}",
toString(*expectedTokenKind), token->getLexme(), token->getLine() + 1, token->getColumn() + 1 toString(*expectedTokenKind), toString(token), token->getLine() + 1, token->getColumn() + 1
); );
} else { } else {
message = format( message = format(
"Unexpected token \"{}\" found at line: {}, column: {}", "Unexpected token \"{}\" found at line: {}, column: {}",
token->getLexme(), token->getLine() + 1, token->getColumn() + 1 toString(token), token->getLine() + 1, token->getColumn() + 1
); );
} }
if (errorMessage) if (errorMessage)

View File

@@ -11,6 +11,7 @@ class Statement;
class StatementMetaExternFunction; class StatementMetaExternFunction;
class StatementVariable; class StatementVariable;
class StatementFunction; class StatementFunction;
class StatementRawFunction;
class StatementBlock; class StatementBlock;
class StatementAssignment; class StatementAssignment;
class StatementReturn; class StatementReturn;
@@ -41,6 +42,7 @@ private:
static string toString(shared_ptr<StatementMetaExternFunction> statement); static string toString(shared_ptr<StatementMetaExternFunction> statement);
static string toString(shared_ptr<StatementVariable> statement); static string toString(shared_ptr<StatementVariable> statement);
static string toString(shared_ptr<StatementFunction> statement); static string toString(shared_ptr<StatementFunction> statement);
static string toString(shared_ptr<StatementRawFunction> statement);
static string toString(shared_ptr<StatementBlock> statement); static string toString(shared_ptr<StatementBlock> statement);
static string toString(shared_ptr<StatementAssignment> statement); static string toString(shared_ptr<StatementAssignment> statement);
static string toString(shared_ptr<StatementReturn> statement); static string toString(shared_ptr<StatementReturn> statement);

View File

@@ -16,6 +16,7 @@
#include "Parser/Expression/ExpressionBlock.h" #include "Parser/Expression/ExpressionBlock.h"
#include "Parser/Statement/StatementFunction.h" #include "Parser/Statement/StatementFunction.h"
#include "Parser/Statement/StatementRawFunction.h"
#include "Parser/Statement/StatementVariable.h" #include "Parser/Statement/StatementVariable.h"
#include "Parser/Statement/StatementAssignment.h" #include "Parser/Statement/StatementAssignment.h"
#include "Parser/Statement/StatementReturn.h" #include "Parser/Statement/StatementReturn.h"
@@ -61,6 +62,10 @@ shared_ptr<Statement> Parser::nextStatement() {
if (statement != nullptr || errors.size() > errorsCount) if (statement != nullptr || errors.size() > errorsCount)
return statement; return statement;
statement = matchStatementRawFunction();
if (statement != nullptr || errors.size() > errorsCount)
return statement;
statement = matchStatementVariable(); statement = matchStatementVariable();
if (statement != nullptr || errors.size() > errorsCount) if (statement != nullptr || errors.size() > errorsCount)
return statement; return statement;
@@ -230,6 +235,38 @@ shared_ptr<Statement> Parser::matchStatementFunction() {
return make_shared<StatementFunction>(name, arguments, returnType, dynamic_pointer_cast<StatementBlock>(statementBlock)); return make_shared<StatementFunction>(name, arguments, returnType, dynamic_pointer_cast<StatementBlock>(statementBlock));
} }
shared_ptr<Statement> Parser::matchStatementRawFunction() {
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::RAW_FUNCTION}, true, false))
return nullptr;
string name;
string rawSource;
// name
name = tokens.at(currentIndex++)->getLexme();
currentIndex++; // skip raw
// consume new line
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) {
markError(TokenKind::NEW_LINE, {});
return nullptr;
}
// source
while (tryMatchingTokenKinds({TokenKind::RAW_SOURCE_LINE}, true, false)) {
if (!rawSource.empty())
rawSource += "\n";
rawSource += tokens.at(currentIndex++)->getLexme();
}
if(!tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true)) {
markError(TokenKind::SEMICOLON, {});
return nullptr;
}
return make_shared<StatementRawFunction>(name, rawSource);
}
shared_ptr<Statement> Parser::matchStatementBlock(vector<TokenKind> terminalTokenKinds) { shared_ptr<Statement> Parser::matchStatementBlock(vector<TokenKind> terminalTokenKinds) {
vector<shared_ptr<Statement>> statements; vector<shared_ptr<Statement>> statements;

View File

@@ -26,6 +26,7 @@ private:
shared_ptr<Statement> matchStatementMetaExternFunction(); shared_ptr<Statement> matchStatementMetaExternFunction();
shared_ptr<Statement> matchStatementVariable(); shared_ptr<Statement> matchStatementVariable();
shared_ptr<Statement> matchStatementFunction(); shared_ptr<Statement> matchStatementFunction();
shared_ptr<Statement> matchStatementRawFunction();
shared_ptr<Statement> matchStatementBlock(vector<TokenKind> terminalTokenKinds); shared_ptr<Statement> matchStatementBlock(vector<TokenKind> terminalTokenKinds);
shared_ptr<Statement> matchStatementAssignment(); shared_ptr<Statement> matchStatementAssignment();