Parse raw function
This commit is contained in:
@@ -24,10 +24,14 @@ i u32 <- 0, rep text[i] != 0:
|
|||||||
add $1, $0
|
add $1, $0
|
||||||
;*/
|
;*/
|
||||||
|
|
||||||
|
rawAdd raw
|
||||||
|
mov eax, 5
|
||||||
|
mov ebx, 42
|
||||||
|
add eax, ebx
|
||||||
|
;
|
||||||
|
|
||||||
main fun -> sint32
|
main fun -> sint32
|
||||||
//text data<sint32> <- "Hello string!\n"
|
rawAdd()
|
||||||
abc sint32 <- 0
|
|
||||||
//addStuff()
|
|
||||||
|
|
||||||
ret 0
|
ret 0
|
||||||
;
|
;
|
||||||
@@ -12,6 +12,8 @@ vector<shared_ptr<Token>> Lexer::getTokens() {
|
|||||||
currentIndex = 0;
|
currentIndex = 0;
|
||||||
currentLine = 0;
|
currentLine = 0;
|
||||||
currentColumn = 0;
|
currentColumn = 0;
|
||||||
|
foundRawSourceStart = false;
|
||||||
|
isParsingRawSource = false;
|
||||||
|
|
||||||
tokens.clear();
|
tokens.clear();
|
||||||
errors.clear();
|
errors.clear();
|
||||||
@@ -117,6 +119,11 @@ shared_ptr<Token> Lexer::nextToken() {
|
|||||||
return nextToken(); // gets rid of remaining white spaces without repeating the code
|
return nextToken(); // gets rid of remaining white spaces without repeating the code
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// raw source
|
||||||
|
token = matchRawSourceLine();
|
||||||
|
if (token != nullptr)
|
||||||
|
return token;
|
||||||
|
|
||||||
// structural
|
// structural
|
||||||
token = match(TokenKind::LEFT_PAREN, "(", false);
|
token = match(TokenKind::LEFT_PAREN, "(", false);
|
||||||
if (token != nullptr)
|
if (token != nullptr)
|
||||||
@@ -212,6 +219,12 @@ shared_ptr<Token> Lexer::nextToken() {
|
|||||||
token = match(TokenKind::FUNCTION, "fun", true);
|
token = match(TokenKind::FUNCTION, "fun", true);
|
||||||
if (token != nullptr)
|
if (token != nullptr)
|
||||||
return token;
|
return token;
|
||||||
|
|
||||||
|
token = match(TokenKind::RAW_FUNCTION, "raw", true);
|
||||||
|
if (token != nullptr) {
|
||||||
|
foundRawSourceStart = true;
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
token = match(TokenKind::RETURN, "ret", true);
|
token = match(TokenKind::RETURN, "ret", true);
|
||||||
if (token != nullptr)
|
if (token != nullptr)
|
||||||
@@ -271,8 +284,10 @@ shared_ptr<Token> Lexer::nextToken() {
|
|||||||
|
|
||||||
// new line
|
// new line
|
||||||
token = match(TokenKind::NEW_LINE, "\n", false);
|
token = match(TokenKind::NEW_LINE, "\n", false);
|
||||||
if (token != nullptr)
|
if (token != nullptr) {
|
||||||
|
tryStartingRawSourceParsing();
|
||||||
return token;
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
// eof
|
// eof
|
||||||
token = matchEnd();
|
token = matchEnd();
|
||||||
@@ -430,21 +445,6 @@ shared_ptr<Token> Lexer::matchString() {
|
|||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
|
|
||||||
shared_ptr<Token> Lexer::matchIdentifier() {
|
|
||||||
int nextIndex = currentIndex;
|
|
||||||
|
|
||||||
while (nextIndex < source.length() && isIdentifier(nextIndex))
|
|
||||||
nextIndex++;
|
|
||||||
|
|
||||||
if (nextIndex == currentIndex || !isSeparator(nextIndex))
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
|
|
||||||
shared_ptr<Token> token = make_shared<Token>(TokenKind::IDENTIFIER, lexme, currentLine, currentColumn);
|
|
||||||
advanceWithToken(token);
|
|
||||||
return token;
|
|
||||||
}
|
|
||||||
|
|
||||||
shared_ptr<Token> Lexer::matchType() {
|
shared_ptr<Token> Lexer::matchType() {
|
||||||
int nextIndex = currentIndex;
|
int nextIndex = currentIndex;
|
||||||
|
|
||||||
@@ -463,6 +463,52 @@ shared_ptr<Token> Lexer::matchType() {
|
|||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
shared_ptr<Token> Lexer::matchIdentifier() {
|
||||||
|
int nextIndex = currentIndex;
|
||||||
|
|
||||||
|
while (nextIndex < source.length() && isIdentifier(nextIndex))
|
||||||
|
nextIndex++;
|
||||||
|
|
||||||
|
if (nextIndex == currentIndex || !isSeparator(nextIndex))
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
|
||||||
|
shared_ptr<Token> token = make_shared<Token>(TokenKind::IDENTIFIER, lexme, currentLine, currentColumn);
|
||||||
|
advanceWithToken(token);
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Lexer::tryStartingRawSourceParsing() {
|
||||||
|
if (!foundRawSourceStart)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (!tokens.at(tokens.size() - 2)->isOfKind({TokenKind::COLON, TokenKind::COMMA, TokenKind::RIGHT_ARROW})) {
|
||||||
|
foundRawSourceStart = false;
|
||||||
|
isParsingRawSource = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
shared_ptr<Token> Lexer::matchRawSourceLine() {
|
||||||
|
int nextIndex = currentIndex;
|
||||||
|
|
||||||
|
if (!isParsingRawSource)
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
if (source.at(nextIndex) == ';') {
|
||||||
|
isParsingRawSource = false;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (source.at(nextIndex) != '\n')
|
||||||
|
nextIndex++;
|
||||||
|
|
||||||
|
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
|
||||||
|
shared_ptr<Token> token = make_shared<Token>(TokenKind::RAW_SOURCE_LINE, lexme, currentLine, currentColumn);
|
||||||
|
advanceWithToken(token);
|
||||||
|
currentIndex++; // skip newline
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
shared_ptr<Token> Lexer::matchEnd() {
|
shared_ptr<Token> Lexer::matchEnd() {
|
||||||
if (currentIndex >= source.length())
|
if (currentIndex >= source.length())
|
||||||
return make_shared<Token>(TokenKind::END, "", currentLine, currentColumn);
|
return make_shared<Token>(TokenKind::END, "", currentLine, currentColumn);
|
||||||
@@ -530,11 +576,15 @@ bool Lexer::isSeparator(int index) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Lexer::advanceWithToken(shared_ptr<Token> token) {
|
void Lexer::advanceWithToken(shared_ptr<Token> token) {
|
||||||
if (token->getKind() == TokenKind::NEW_LINE) {
|
switch (token->getKind()) {
|
||||||
currentLine++;
|
case TokenKind::NEW_LINE:
|
||||||
currentColumn = 0;
|
case TokenKind::RAW_SOURCE_LINE:
|
||||||
} else {
|
currentLine++;
|
||||||
currentColumn += token->getLexme().length();
|
currentColumn = 0;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
currentColumn += token->getLexme().length();
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
currentIndex += token->getLexme().length();
|
currentIndex += token->getLexme().length();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,8 @@ private:
|
|||||||
int currentColumn;
|
int currentColumn;
|
||||||
vector<shared_ptr<Token>> tokens;
|
vector<shared_ptr<Token>> tokens;
|
||||||
vector<shared_ptr<Error>> errors;
|
vector<shared_ptr<Error>> errors;
|
||||||
|
bool foundRawSourceStart;
|
||||||
|
bool isParsingRawSource;
|
||||||
|
|
||||||
shared_ptr<Token> nextToken();
|
shared_ptr<Token> nextToken();
|
||||||
shared_ptr<Token> match(TokenKind kind, string lexme, bool needsSeparator);
|
shared_ptr<Token> match(TokenKind kind, string lexme, bool needsSeparator);
|
||||||
@@ -28,6 +30,8 @@ private:
|
|||||||
shared_ptr<Token> matchString();
|
shared_ptr<Token> matchString();
|
||||||
shared_ptr<Token> matchType();
|
shared_ptr<Token> matchType();
|
||||||
shared_ptr<Token> matchIdentifier();
|
shared_ptr<Token> matchIdentifier();
|
||||||
|
void tryStartingRawSourceParsing();
|
||||||
|
shared_ptr<Token> matchRawSourceLine();
|
||||||
shared_ptr<Token> matchEnd();
|
shared_ptr<Token> matchEnd();
|
||||||
|
|
||||||
bool isWhiteSpace(int index);
|
bool isWhiteSpace(int index);
|
||||||
|
|||||||
@@ -30,6 +30,8 @@ enum class TokenKind {
|
|||||||
RIGHT_ARROW,
|
RIGHT_ARROW,
|
||||||
|
|
||||||
FUNCTION,
|
FUNCTION,
|
||||||
|
RAW_FUNCTION,
|
||||||
|
RAW_SOURCE_LINE,
|
||||||
RETURN,
|
RETURN,
|
||||||
REPEAT,
|
REPEAT,
|
||||||
IF,
|
IF,
|
||||||
|
|||||||
@@ -11,6 +11,7 @@
|
|||||||
#include "Parser/Statement/StatementMetaExternFunction.h"
|
#include "Parser/Statement/StatementMetaExternFunction.h"
|
||||||
#include "Parser/Statement/StatementVariable.h"
|
#include "Parser/Statement/StatementVariable.h"
|
||||||
#include "Parser/Statement/StatementFunction.h"
|
#include "Parser/Statement/StatementFunction.h"
|
||||||
|
#include "Parser/Statement/StatementRawFunction.h"
|
||||||
#include "Parser/Statement/StatementBlock.h"
|
#include "Parser/Statement/StatementBlock.h"
|
||||||
#include "Parser/Statement/StatementAssignment.h"
|
#include "Parser/Statement/StatementAssignment.h"
|
||||||
#include "Parser/Statement/StatementReturn.h"
|
#include "Parser/Statement/StatementReturn.h"
|
||||||
@@ -97,6 +98,10 @@ string Logger::toString(shared_ptr<Token> token) {
|
|||||||
return "ELSE";
|
return "ELSE";
|
||||||
case TokenKind::FUNCTION:
|
case TokenKind::FUNCTION:
|
||||||
return "FUN";
|
return "FUN";
|
||||||
|
case TokenKind::RAW_FUNCTION:
|
||||||
|
return "RAW";
|
||||||
|
case TokenKind::RAW_SOURCE_LINE:
|
||||||
|
return format("RAW_SOURCE_LINE({})", token->getLexme());
|
||||||
case TokenKind::RETURN:
|
case TokenKind::RETURN:
|
||||||
return "RET";
|
return "RET";
|
||||||
case TokenKind::REPEAT:
|
case TokenKind::REPEAT:
|
||||||
@@ -179,6 +184,8 @@ string Logger::toString(TokenKind tokenKind) {
|
|||||||
return "ELSE";
|
return "ELSE";
|
||||||
case TokenKind::FUNCTION:
|
case TokenKind::FUNCTION:
|
||||||
return "FUN";
|
return "FUN";
|
||||||
|
case TokenKind::RAW_FUNCTION:
|
||||||
|
return "RAW";
|
||||||
case TokenKind::RETURN:
|
case TokenKind::RETURN:
|
||||||
return "RET";
|
return "RET";
|
||||||
case TokenKind::REPEAT:
|
case TokenKind::REPEAT:
|
||||||
@@ -217,6 +224,8 @@ string Logger::toString(shared_ptr<Statement> statement) {
|
|||||||
return toString(dynamic_pointer_cast<StatementVariable>(statement));
|
return toString(dynamic_pointer_cast<StatementVariable>(statement));
|
||||||
case StatementKind::FUNCTION:
|
case StatementKind::FUNCTION:
|
||||||
return toString(dynamic_pointer_cast<StatementFunction>(statement));
|
return toString(dynamic_pointer_cast<StatementFunction>(statement));
|
||||||
|
case StatementKind::RAW_FUNCTION:
|
||||||
|
return toString(dynamic_pointer_cast<StatementRawFunction>(statement));
|
||||||
case StatementKind::BLOCK:
|
case StatementKind::BLOCK:
|
||||||
return toString(dynamic_pointer_cast<StatementBlock>(statement));
|
return toString(dynamic_pointer_cast<StatementBlock>(statement));
|
||||||
case StatementKind::ASSIGNMENT:
|
case StatementKind::ASSIGNMENT:
|
||||||
@@ -262,6 +271,15 @@ string Logger::toString(shared_ptr<StatementFunction> statement) {
|
|||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string Logger::toString(shared_ptr<StatementRawFunction> statement) {
|
||||||
|
string text;
|
||||||
|
|
||||||
|
text += format("RAW(\"{}\"):\n", statement->getName());
|
||||||
|
text += statement->getRawSource();
|
||||||
|
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
string Logger::toString(shared_ptr<StatementBlock> statement) {
|
string Logger::toString(shared_ptr<StatementBlock> statement) {
|
||||||
string text;
|
string text;
|
||||||
|
|
||||||
@@ -468,13 +486,13 @@ void Logger::print(shared_ptr<Error> error) {
|
|||||||
|
|
||||||
if (expectedTokenKind) {
|
if (expectedTokenKind) {
|
||||||
message = format(
|
message = format(
|
||||||
"Expected token {} but instead found \"{}\" at line: {}, column: {}",
|
"Expected token {} but instead found {} at line: {}, column: {}",
|
||||||
toString(*expectedTokenKind), token->getLexme(), token->getLine() + 1, token->getColumn() + 1
|
toString(*expectedTokenKind), toString(token), token->getLine() + 1, token->getColumn() + 1
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
message = format(
|
message = format(
|
||||||
"Unexpected token \"{}\" found at line: {}, column: {}",
|
"Unexpected token \"{}\" found at line: {}, column: {}",
|
||||||
token->getLexme(), token->getLine() + 1, token->getColumn() + 1
|
toString(token), token->getLine() + 1, token->getColumn() + 1
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
if (errorMessage)
|
if (errorMessage)
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ class Statement;
|
|||||||
class StatementMetaExternFunction;
|
class StatementMetaExternFunction;
|
||||||
class StatementVariable;
|
class StatementVariable;
|
||||||
class StatementFunction;
|
class StatementFunction;
|
||||||
|
class StatementRawFunction;
|
||||||
class StatementBlock;
|
class StatementBlock;
|
||||||
class StatementAssignment;
|
class StatementAssignment;
|
||||||
class StatementReturn;
|
class StatementReturn;
|
||||||
@@ -41,6 +42,7 @@ private:
|
|||||||
static string toString(shared_ptr<StatementMetaExternFunction> statement);
|
static string toString(shared_ptr<StatementMetaExternFunction> statement);
|
||||||
static string toString(shared_ptr<StatementVariable> statement);
|
static string toString(shared_ptr<StatementVariable> statement);
|
||||||
static string toString(shared_ptr<StatementFunction> statement);
|
static string toString(shared_ptr<StatementFunction> statement);
|
||||||
|
static string toString(shared_ptr<StatementRawFunction> statement);
|
||||||
static string toString(shared_ptr<StatementBlock> statement);
|
static string toString(shared_ptr<StatementBlock> statement);
|
||||||
static string toString(shared_ptr<StatementAssignment> statement);
|
static string toString(shared_ptr<StatementAssignment> statement);
|
||||||
static string toString(shared_ptr<StatementReturn> statement);
|
static string toString(shared_ptr<StatementReturn> statement);
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
#include "Parser/Expression/ExpressionBlock.h"
|
#include "Parser/Expression/ExpressionBlock.h"
|
||||||
|
|
||||||
#include "Parser/Statement/StatementFunction.h"
|
#include "Parser/Statement/StatementFunction.h"
|
||||||
|
#include "Parser/Statement/StatementRawFunction.h"
|
||||||
#include "Parser/Statement/StatementVariable.h"
|
#include "Parser/Statement/StatementVariable.h"
|
||||||
#include "Parser/Statement/StatementAssignment.h"
|
#include "Parser/Statement/StatementAssignment.h"
|
||||||
#include "Parser/Statement/StatementReturn.h"
|
#include "Parser/Statement/StatementReturn.h"
|
||||||
@@ -61,6 +62,10 @@ shared_ptr<Statement> Parser::nextStatement() {
|
|||||||
if (statement != nullptr || errors.size() > errorsCount)
|
if (statement != nullptr || errors.size() > errorsCount)
|
||||||
return statement;
|
return statement;
|
||||||
|
|
||||||
|
statement = matchStatementRawFunction();
|
||||||
|
if (statement != nullptr || errors.size() > errorsCount)
|
||||||
|
return statement;
|
||||||
|
|
||||||
statement = matchStatementVariable();
|
statement = matchStatementVariable();
|
||||||
if (statement != nullptr || errors.size() > errorsCount)
|
if (statement != nullptr || errors.size() > errorsCount)
|
||||||
return statement;
|
return statement;
|
||||||
@@ -230,6 +235,38 @@ shared_ptr<Statement> Parser::matchStatementFunction() {
|
|||||||
return make_shared<StatementFunction>(name, arguments, returnType, dynamic_pointer_cast<StatementBlock>(statementBlock));
|
return make_shared<StatementFunction>(name, arguments, returnType, dynamic_pointer_cast<StatementBlock>(statementBlock));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
shared_ptr<Statement> Parser::matchStatementRawFunction() {
|
||||||
|
if (!tryMatchingTokenKinds({TokenKind::IDENTIFIER, TokenKind::RAW_FUNCTION}, true, false))
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
string name;
|
||||||
|
string rawSource;
|
||||||
|
|
||||||
|
// name
|
||||||
|
name = tokens.at(currentIndex++)->getLexme();
|
||||||
|
currentIndex++; // skip raw
|
||||||
|
|
||||||
|
// consume new line
|
||||||
|
if (!tryMatchingTokenKinds({TokenKind::NEW_LINE}, true, true)) {
|
||||||
|
markError(TokenKind::NEW_LINE, {});
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// source
|
||||||
|
while (tryMatchingTokenKinds({TokenKind::RAW_SOURCE_LINE}, true, false)) {
|
||||||
|
if (!rawSource.empty())
|
||||||
|
rawSource += "\n";
|
||||||
|
rawSource += tokens.at(currentIndex++)->getLexme();
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!tryMatchingTokenKinds({TokenKind::SEMICOLON}, false, true)) {
|
||||||
|
markError(TokenKind::SEMICOLON, {});
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
return make_shared<StatementRawFunction>(name, rawSource);
|
||||||
|
}
|
||||||
|
|
||||||
shared_ptr<Statement> Parser::matchStatementBlock(vector<TokenKind> terminalTokenKinds) {
|
shared_ptr<Statement> Parser::matchStatementBlock(vector<TokenKind> terminalTokenKinds) {
|
||||||
vector<shared_ptr<Statement>> statements;
|
vector<shared_ptr<Statement>> statements;
|
||||||
|
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ private:
|
|||||||
shared_ptr<Statement> matchStatementMetaExternFunction();
|
shared_ptr<Statement> matchStatementMetaExternFunction();
|
||||||
shared_ptr<Statement> matchStatementVariable();
|
shared_ptr<Statement> matchStatementVariable();
|
||||||
shared_ptr<Statement> matchStatementFunction();
|
shared_ptr<Statement> matchStatementFunction();
|
||||||
|
shared_ptr<Statement> matchStatementRawFunction();
|
||||||
|
|
||||||
shared_ptr<Statement> matchStatementBlock(vector<TokenKind> terminalTokenKinds);
|
shared_ptr<Statement> matchStatementBlock(vector<TokenKind> terminalTokenKinds);
|
||||||
shared_ptr<Statement> matchStatementAssignment();
|
shared_ptr<Statement> matchStatementAssignment();
|
||||||
|
|||||||
Reference in New Issue
Block a user