diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 642a98e..f09fb19 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -4,8 +4,8 @@ Lexer::Lexer(string source): source(source) { } vector> Lexer::getTokens() { - vector> tokens; shared_ptr token = nullptr; + tokens.clear(); do { token = nextToken(); // Got a nullptr, shouldn't have happened @@ -99,6 +99,39 @@ shared_ptr Lexer::nextToken() { return nextToken(); // gets rid of remaining white spaces without repeating the code } + // structural + token = match(TokenKind::LEFT_PAREN, "(", false); + if (token != nullptr) + return token; + + token = match(TokenKind::RIGHT_PAREN, ")", false); + if (token != nullptr) + return token; + + token = match(TokenKind::COLON, ":", false); + if (token != nullptr) + return token; + + token = match(TokenKind::SEMICOLON, ";", false); + if (token != nullptr) + return token; + + token = match(TokenKind::QUESTION_QUESTION, "??", false); + if (token != nullptr) + return token; + + token = match(TokenKind::QUESTION, "?", false); + if (token != nullptr) + return token; + + token = match(TokenKind::LEFT_ARROW, "<-", false); + if (token != nullptr) + return token; + + token = match(TokenKind::RIGHT_ARROW, "->", false); + if (token != nullptr) + return token; + // arithmetic token = match(TokenKind::PLUS, "+", false); if (token != nullptr) @@ -145,31 +178,6 @@ shared_ptr Lexer::nextToken() { if (token != nullptr) return token; - // structural - token = match(TokenKind::LEFT_PAREN, "(", false); - if (token != nullptr) - return token; - - token = match(TokenKind::RIGHT_PAREN, ")", false); - if (token != nullptr) - return token; - - token = match(TokenKind::COLON, ":", false); - if (token != nullptr) - return token; - - token = match(TokenKind::SEMICOLON, ";", false); - if (token != nullptr) - return token; - - token = match(TokenKind::QUESTION_QUESTION, "??", false); - if (token != nullptr) - return token; - - token = match(TokenKind::QUESTION, "?", false); - if (token != nullptr) - return token; - // keywords token = match(TokenKind::FUNCTION, "fun", true); if (token != nullptr) @@ -197,6 +205,10 @@ shared_ptr Lexer::nextToken() { return token; // identifier + token = matchType(); + if (token != nullptr) + return token; + token = matchIdentifier(); if (token != nullptr) return token; @@ -264,6 +276,30 @@ shared_ptr Lexer::matchReal() { return token; } +shared_ptr Lexer::matchType() { + bool isVarDec = tokens.size() >= 2 && + tokens.at(tokens.size() - 1)->getKind() == TokenKind::COLON && + tokens.at(tokens.size() - 2)->getKind() == TokenKind::IDENTIFIER; + + bool isFunDec = tokens.size() >= 1 && + tokens.at(tokens.size() - 1)->getKind() == TokenKind::RIGHT_ARROW; + + if (!isVarDec && !isFunDec) + return nullptr; + + int nextIndex = currentIndex; + while (nextIndex < source.length() && isIdentifier(nextIndex)) + nextIndex++; + + if (nextIndex == currentIndex || !isSeparator(nextIndex)) + return nullptr; + + string lexme = source.substr(currentIndex, nextIndex - currentIndex); + shared_ptr token = make_shared(TokenKind::TYPE, lexme, currentLine, currentColumn); + advanceWithToken(token); + return token; +} + shared_ptr Lexer::matchIdentifier() { int nextIndex = currentIndex; diff --git a/src/Lexer.h b/src/Lexer.h index a7b8784..6dfe653 100644 --- a/src/Lexer.h +++ b/src/Lexer.h @@ -14,10 +14,13 @@ private: int currentLine = 0; int currentColumn = 0; + vector> tokens; + shared_ptr nextToken(); shared_ptr match(TokenKind kind, string lexme, bool needsSeparator); shared_ptr matchInteger(); shared_ptr matchReal(); + shared_ptr matchType(); shared_ptr matchIdentifier(); shared_ptr matchEnd(); shared_ptr matchInvalid(); diff --git a/src/Token.cpp b/src/Token.cpp index 0ae6294..56f7bc0 100644 --- a/src/Token.cpp +++ b/src/Token.cpp @@ -72,41 +72,45 @@ bool Token::isOfKind(vector kinds) { string Token::toString() { switch (kind) { case TokenKind::PLUS: - return "PLUS"; + return "+"; case TokenKind::MINUS: - return "MINUS"; + return "-"; case TokenKind::STAR: - return "STAR"; + return "*"; case TokenKind::SLASH: - return "SLASH"; + return "/"; case TokenKind::PERCENT: - return "PERCENT"; + return "%"; case TokenKind::EQUAL: - return "EQUAL"; + return "="; case TokenKind::NOT_EQUAL: - return "NOT_EQUAL"; + return "≠"; case TokenKind::LESS: - return "LESS"; + return "<"; case TokenKind::LESS_EQUAL: - return "LESS_EQUAL"; + return "≤"; case TokenKind::GREATER: - return "GREATER"; + return ">"; case TokenKind::GREATER_EQUAL: - return "GREATER_EQUAL"; + return "≥"; case TokenKind::LEFT_PAREN: - return "LEFT_PAREN"; + return "("; case TokenKind::RIGHT_PAREN: - return "RIGHT_PAREN"; + return ")"; case TokenKind::COLON: - return "COLON"; + return ":"; case TokenKind::SEMICOLON: - return "SEMICOLON"; + return ";"; case TokenKind::QUESTION_QUESTION: - return "QUESTION_QUESTION"; + return "??"; case TokenKind::QUESTION: - return "QUESTION"; + return "?"; + case TokenKind::LEFT_ARROW: + return "←"; + case TokenKind::RIGHT_ARROW: + return "→"; case TokenKind::BOOL: return "BOOL(" + lexme + ")"; @@ -116,6 +120,8 @@ string Token::toString() { return "REAL(" + lexme + ")"; case TokenKind::IDENTIFIER: return "IDENTIFIER(" + lexme + ")"; + case TokenKind::TYPE: + return "TYPE(" + lexme + ")"; case TokenKind::FUNCTION: return "FUNCTION"; @@ -123,7 +129,7 @@ string Token::toString() { return "RETURN"; case TokenKind::NEW_LINE: - return "NEW_LINE"; + return "↲"; case TokenKind::END: return "END"; case TokenKind::INVALID: diff --git a/src/Types.h b/src/Types.h index 7dd52c1..7cd06d9 100644 --- a/src/Types.h +++ b/src/Types.h @@ -21,6 +21,8 @@ enum class TokenKind { SEMICOLON, QUESTION, QUESTION_QUESTION, + LEFT_ARROW, + RIGHT_ARROW, FUNCTION, RETURN, @@ -29,6 +31,7 @@ enum class TokenKind { INTEGER, REAL, IDENTIFIER, + TYPE, NEW_LINE, END, diff --git a/src/main.cpp b/src/main.cpp index 02c65ea..a4aad53 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -37,9 +37,9 @@ int main(int argc, char **argv) { Lexer lexer(source); vector> tokens = lexer.getTokens(); for (int i=0; itoString(); + cout << i << "|" << tokens.at(i)->toString(); if (i < tokens.size() - 1) - cout << " "; + cout << ", "; } cout << endl << endl;