diff --git a/.gitignore b/.gitignore index 6ec6bb1..94ff068 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,9 @@ -# ignore files without extensions -* -!*.* -# brb build artifiacts -*.o - .DS_Store .vscode/settings.json # project build artifacts *.dSYM build/ + +# brb build artifiacts +*.o diff --git a/samples/test.brc b/samples/test.brc new file mode 100644 index 0000000..724f225 --- /dev/null +++ b/samples/test.brc @@ -0,0 +1,7 @@ +@extern putchar fun: character sint32 -> sint32 + +main fun -> sint32 + text data <- "Hello string!\n" + + ret 0 +; \ No newline at end of file diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index b2a1b71..c00f19c 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -250,6 +250,10 @@ shared_ptr Lexer::nextToken() { if (token != nullptr) return token; + token = matchString(); + if (token != nullptr) + return token; + // type token = matchType(); if (token != nullptr) @@ -405,6 +409,27 @@ shared_ptr Lexer::matchReal() { return token; } +shared_ptr Lexer::matchString() { + int nextIndex = currentIndex; + + if (currentIndex >= source.size() || source.at(nextIndex) != '\"') + return nullptr; + + bool isClosing = false; + do { + nextIndex++; + isClosing = source.at(nextIndex) == '\"' && source.at(nextIndex - 1) != '\\'; + } while (nextIndex < source.length() && !isClosing); + + if (!isClosing) + return nullptr; + + string lexme = source.substr(currentIndex, nextIndex - currentIndex + 1); + shared_ptr token = make_shared(TokenKind::STRING, lexme, currentLine, currentColumn); + advanceWithToken(token); + return token; +} + shared_ptr Lexer::matchIdentifier() { int nextIndex = currentIndex; diff --git a/src/Lexer/Lexer.h b/src/Lexer/Lexer.h index 41393c8..9cc8cc6 100644 --- a/src/Lexer/Lexer.h +++ b/src/Lexer/Lexer.h @@ -25,6 +25,7 @@ private: shared_ptr matchIntegerBin(); shared_ptr matchIntegerChar(); shared_ptr matchReal(); + shared_ptr matchString(); shared_ptr matchType(); shared_ptr matchIdentifier(); shared_ptr matchEnd(); diff --git a/src/Lexer/Token.cpp b/src/Lexer/Token.cpp index 2d4817d..5fcc1df 100644 --- a/src/Lexer/Token.cpp +++ b/src/Lexer/Token.cpp @@ -41,7 +41,8 @@ vector Token::tokensLiteral = { TokenKind::INTEGER_HEX, TokenKind::INTEGER_BIN, TokenKind::INTEGER_CHAR, - TokenKind::REAL + TokenKind::REAL, + TokenKind::STRING }; Token::Token(TokenKind kind, string lexme, int line, int column): diff --git a/src/Lexer/Token.h b/src/Lexer/Token.h index 7c15be1..2c4ce2e 100644 --- a/src/Lexer/Token.h +++ b/src/Lexer/Token.h @@ -41,6 +41,7 @@ enum class TokenKind { INTEGER_BIN, INTEGER_CHAR, REAL, + STRING, IDENTIFIER, TYPE, diff --git a/src/Logger.cpp b/src/Logger.cpp index cc48ef9..ef0883f 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -84,6 +84,8 @@ string Logger::toString(shared_ptr token) { return "INT_CHAR(" + token->getLexme() + ")"; case TokenKind::REAL: return "REAL(" + token->getLexme() + ")"; + case TokenKind::STRING: + return "STRING(" + token->getLexme() + ")"; case TokenKind::IDENTIFIER: return "ID(" + token->getLexme() + ")"; case TokenKind::TYPE: @@ -164,6 +166,8 @@ string Logger::toString(TokenKind tokenKind) { return "LITERAL(INTEGER)"; case TokenKind::REAL: return "LITERAL(REAL)"; + case TokenKind::STRING: + return "LITERAL(STRING)"; case TokenKind::IDENTIFIER: return "LITERAL(ID)"; case TokenKind::TYPE: diff --git a/src/Parser/Expression/ExpressionArrayLiteral.cpp b/src/Parser/Expression/ExpressionArrayLiteral.cpp index 2e6a7e5..73a7598 100644 --- a/src/Parser/Expression/ExpressionArrayLiteral.cpp +++ b/src/Parser/Expression/ExpressionArrayLiteral.cpp @@ -1,8 +1,34 @@ #include "ExpressionArrayLiteral.h" +#include "Lexer/Token.h" +#include "Parser/Expression/ExpressionLiteral.h" + ExpressionArrayLiteral::ExpressionArrayLiteral(vector> expressions): Expression(ExpressionKind::ARRAY_LITERAL, nullptr), expressions(expressions) { } +shared_ptr ExpressionArrayLiteral::expressionArrayLiteralForExpressions(vector> expressions) { + return make_shared(expressions); +} + +shared_ptr ExpressionArrayLiteral::expressionArrayLiteralForTokenString(shared_ptr tokenString) { + if (tokenString->getKind() != TokenKind::STRING) + return nullptr; + + vector> expressions; + string stringValue = tokenString->getLexme(); + for (int i=1; i token = make_shared(TokenKind::INTEGER_CHAR, lexme, tokenString->getLine(), tokenString->getColumn() + i); + shared_ptr expression = ExpressionLiteral::expressionLiteralForToken(token); + expressions.push_back(expression); + } + return make_shared(expressions); +} + vector> ExpressionArrayLiteral::getExpressions() { return expressions; -} \ No newline at end of file +} \ No newline at end of file diff --git a/src/Parser/Expression/ExpressionArrayLiteral.h b/src/Parser/Expression/ExpressionArrayLiteral.h index ae56585..d2a2faf 100644 --- a/src/Parser/Expression/ExpressionArrayLiteral.h +++ b/src/Parser/Expression/ExpressionArrayLiteral.h @@ -9,6 +9,9 @@ private: public: ExpressionArrayLiteral(vector> expressions); + //ExpressionArrayLiteral(shared_ptr tokenString); + static shared_ptr expressionArrayLiteralForExpressions(vector> expressions); + static shared_ptr expressionArrayLiteralForTokenString(shared_ptr tokenString); vector> getExpressions(); }; diff --git a/src/Parser/Expression/ExpressionLiteral.cpp b/src/Parser/Expression/ExpressionLiteral.cpp index de518d5..a66913c 100644 --- a/src/Parser/Expression/ExpressionLiteral.cpp +++ b/src/Parser/Expression/ExpressionLiteral.cpp @@ -1,8 +1,26 @@ #include "ExpressionLiteral.h" +#include "Utils.h" #include "Lexer/Token.h" #include "Parser/ValueType.h" +shared_ptr ExpressionLiteral::expressionLiteralForToken(shared_ptr token) { + switch (token->getKind()) { + case TokenKind::INTEGER_CHAR: { + string charString = token->getLexme(); + optional charValue = Utils::charStringToInt(charString); + if (!charValue) + return nullptr; + shared_ptr expression = make_shared(); + expression->valueType = ValueType::valueTypeForToken(token, nullptr, 0); + expression->sint32Value = *charValue; + return expression; + } + default: + return nullptr; + } +} + ExpressionLiteral::ExpressionLiteral(): Expression(ExpressionKind::LITERAL, nullptr) { } diff --git a/src/Parser/Expression/ExpressionLiteral.h b/src/Parser/Expression/ExpressionLiteral.h index a693cd0..6e61279 100644 --- a/src/Parser/Expression/ExpressionLiteral.h +++ b/src/Parser/Expression/ExpressionLiteral.h @@ -10,6 +10,8 @@ private: float real32Value; public: + static shared_ptr expressionLiteralForToken(shared_ptr token); + ExpressionLiteral(shared_ptr token); ExpressionLiteral(); bool getBoolValue(); diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 177203d..c715d08 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -421,11 +421,11 @@ shared_ptr Parser::matchPrimary() { if (expression != nullptr) return expression; - expression = matchExpressionLiteral(); - if (expression != nullptr) - return expression; - expression = matchExpressionArrayLiteral(); + if (expression != nullptr) + return expression; + + expression = matchExpressionLiteral(); if (expression != nullptr) return expression; @@ -466,25 +466,28 @@ shared_ptr Parser::matchExpressionLiteral() { } shared_ptr Parser::matchExpressionArrayLiteral() { - if (!tryMatchingTokenKinds({TokenKind::LEFT_SQUARE_BRACKET}, true, true)) - return nullptr; - - vector> expressions; - if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { - do { - shared_ptr expression = nextExpression(); - if (expression != nullptr) - expressions.push_back(expression); - } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); - + if (tryMatchingTokenKinds({TokenKind::STRING}, true, false)) { + return ExpressionArrayLiteral::expressionArrayLiteralForTokenString(tokens.at(currentIndex++)); + } else if (tryMatchingTokenKinds({TokenKind::LEFT_SQUARE_BRACKET}, true, true)) { + vector> expressions; if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { - markError(TokenKind::RIGHT_SQUARE_BRACKET, {}); - return nullptr; + do { + shared_ptr expression = nextExpression(); + if (expression != nullptr) + expressions.push_back(expression); + } while (tryMatchingTokenKinds({TokenKind::COMMA}, true, true)); + + if (!tryMatchingTokenKinds({TokenKind::RIGHT_SQUARE_BRACKET}, true, true)) { + markError(TokenKind::RIGHT_SQUARE_BRACKET, {}); + return nullptr; + } } + + + return make_shared(expressions); } - - return make_shared(expressions); + return nullptr; } shared_ptr Parser::matchExpressionVariable() { diff --git a/src/Utils.cpp b/src/Utils.cpp new file mode 100644 index 0000000..ab42c4c --- /dev/null +++ b/src/Utils.cpp @@ -0,0 +1,34 @@ +#include "Utils.h" + +optional Utils::charStringToInt(string charString) { + switch (charString.length()) { + case 1: + return charString[0]; + case 3: + return charString[1]; + case 4: + charString[0] = charString[1]; + charString[1] = charString[2]; + case 2: + if (charString[0] != '\\') + return {}; + switch (charString[1]) { + case 'b': + return '\b'; + case 'n': + return '\n'; + case 't': + return '\t'; + case '\\': + return '\\'; + case '\'': + return '\''; + case '\"': + return '\"'; + default: + return {}; + } + default: + return {}; + } +} \ No newline at end of file diff --git a/src/Utils.h b/src/Utils.h new file mode 100644 index 0000000..076808d --- /dev/null +++ b/src/Utils.h @@ -0,0 +1,13 @@ +#ifndef UTILS_H +#define UTILS_H + +#include + +using namespace std; + +class Utils { +public: + static optional charStringToInt(string charString); +}; + +#endif \ No newline at end of file