Scan type and better debug logging
This commit is contained in:
@@ -4,8 +4,8 @@ Lexer::Lexer(string source): source(source) {
|
||||
}
|
||||
|
||||
vector<shared_ptr<Token>> Lexer::getTokens() {
|
||||
vector<shared_ptr<Token>> tokens;
|
||||
shared_ptr<Token> token = nullptr;
|
||||
tokens.clear();
|
||||
do {
|
||||
token = nextToken();
|
||||
// Got a nullptr, shouldn't have happened
|
||||
@@ -99,6 +99,39 @@ shared_ptr<Token> Lexer::nextToken() {
|
||||
return nextToken(); // gets rid of remaining white spaces without repeating the code
|
||||
}
|
||||
|
||||
// structural
|
||||
token = match(TokenKind::LEFT_PAREN, "(", false);
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
token = match(TokenKind::RIGHT_PAREN, ")", false);
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
token = match(TokenKind::COLON, ":", false);
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
token = match(TokenKind::SEMICOLON, ";", false);
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
token = match(TokenKind::QUESTION_QUESTION, "??", false);
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
token = match(TokenKind::QUESTION, "?", false);
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
token = match(TokenKind::LEFT_ARROW, "<-", false);
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
token = match(TokenKind::RIGHT_ARROW, "->", false);
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
// arithmetic
|
||||
token = match(TokenKind::PLUS, "+", false);
|
||||
if (token != nullptr)
|
||||
@@ -145,31 +178,6 @@ shared_ptr<Token> Lexer::nextToken() {
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
// structural
|
||||
token = match(TokenKind::LEFT_PAREN, "(", false);
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
token = match(TokenKind::RIGHT_PAREN, ")", false);
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
token = match(TokenKind::COLON, ":", false);
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
token = match(TokenKind::SEMICOLON, ";", false);
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
token = match(TokenKind::QUESTION_QUESTION, "??", false);
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
token = match(TokenKind::QUESTION, "?", false);
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
// keywords
|
||||
token = match(TokenKind::FUNCTION, "fun", true);
|
||||
if (token != nullptr)
|
||||
@@ -197,6 +205,10 @@ shared_ptr<Token> Lexer::nextToken() {
|
||||
return token;
|
||||
|
||||
// identifier
|
||||
token = matchType();
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
|
||||
token = matchIdentifier();
|
||||
if (token != nullptr)
|
||||
return token;
|
||||
@@ -264,6 +276,30 @@ shared_ptr<Token> Lexer::matchReal() {
|
||||
return token;
|
||||
}
|
||||
|
||||
shared_ptr<Token> Lexer::matchType() {
|
||||
bool isVarDec = tokens.size() >= 2 &&
|
||||
tokens.at(tokens.size() - 1)->getKind() == TokenKind::COLON &&
|
||||
tokens.at(tokens.size() - 2)->getKind() == TokenKind::IDENTIFIER;
|
||||
|
||||
bool isFunDec = tokens.size() >= 1 &&
|
||||
tokens.at(tokens.size() - 1)->getKind() == TokenKind::RIGHT_ARROW;
|
||||
|
||||
if (!isVarDec && !isFunDec)
|
||||
return nullptr;
|
||||
|
||||
int nextIndex = currentIndex;
|
||||
while (nextIndex < source.length() && isIdentifier(nextIndex))
|
||||
nextIndex++;
|
||||
|
||||
if (nextIndex == currentIndex || !isSeparator(nextIndex))
|
||||
return nullptr;
|
||||
|
||||
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
|
||||
shared_ptr<Token> token = make_shared<Token>(TokenKind::TYPE, lexme, currentLine, currentColumn);
|
||||
advanceWithToken(token);
|
||||
return token;
|
||||
}
|
||||
|
||||
shared_ptr<Token> Lexer::matchIdentifier() {
|
||||
int nextIndex = currentIndex;
|
||||
|
||||
|
||||
@@ -14,10 +14,13 @@ private:
|
||||
int currentLine = 0;
|
||||
int currentColumn = 0;
|
||||
|
||||
vector<shared_ptr<Token>> tokens;
|
||||
|
||||
shared_ptr<Token> nextToken();
|
||||
shared_ptr<Token> match(TokenKind kind, string lexme, bool needsSeparator);
|
||||
shared_ptr<Token> matchInteger();
|
||||
shared_ptr<Token> matchReal();
|
||||
shared_ptr<Token> matchType();
|
||||
shared_ptr<Token> matchIdentifier();
|
||||
shared_ptr<Token> matchEnd();
|
||||
shared_ptr<Token> matchInvalid();
|
||||
|
||||
@@ -72,41 +72,45 @@ bool Token::isOfKind(vector<TokenKind> kinds) {
|
||||
string Token::toString() {
|
||||
switch (kind) {
|
||||
case TokenKind::PLUS:
|
||||
return "PLUS";
|
||||
return "+";
|
||||
case TokenKind::MINUS:
|
||||
return "MINUS";
|
||||
return "-";
|
||||
case TokenKind::STAR:
|
||||
return "STAR";
|
||||
return "*";
|
||||
case TokenKind::SLASH:
|
||||
return "SLASH";
|
||||
return "/";
|
||||
case TokenKind::PERCENT:
|
||||
return "PERCENT";
|
||||
return "%";
|
||||
|
||||
case TokenKind::EQUAL:
|
||||
return "EQUAL";
|
||||
return "=";
|
||||
case TokenKind::NOT_EQUAL:
|
||||
return "NOT_EQUAL";
|
||||
return "≠";
|
||||
case TokenKind::LESS:
|
||||
return "LESS";
|
||||
return "<";
|
||||
case TokenKind::LESS_EQUAL:
|
||||
return "LESS_EQUAL";
|
||||
return "≤";
|
||||
case TokenKind::GREATER:
|
||||
return "GREATER";
|
||||
return ">";
|
||||
case TokenKind::GREATER_EQUAL:
|
||||
return "GREATER_EQUAL";
|
||||
return "≥";
|
||||
|
||||
case TokenKind::LEFT_PAREN:
|
||||
return "LEFT_PAREN";
|
||||
return "(";
|
||||
case TokenKind::RIGHT_PAREN:
|
||||
return "RIGHT_PAREN";
|
||||
return ")";
|
||||
case TokenKind::COLON:
|
||||
return "COLON";
|
||||
return ":";
|
||||
case TokenKind::SEMICOLON:
|
||||
return "SEMICOLON";
|
||||
return ";";
|
||||
case TokenKind::QUESTION_QUESTION:
|
||||
return "QUESTION_QUESTION";
|
||||
return "??";
|
||||
case TokenKind::QUESTION:
|
||||
return "QUESTION";
|
||||
return "?";
|
||||
case TokenKind::LEFT_ARROW:
|
||||
return "←";
|
||||
case TokenKind::RIGHT_ARROW:
|
||||
return "→";
|
||||
|
||||
case TokenKind::BOOL:
|
||||
return "BOOL(" + lexme + ")";
|
||||
@@ -116,6 +120,8 @@ string Token::toString() {
|
||||
return "REAL(" + lexme + ")";
|
||||
case TokenKind::IDENTIFIER:
|
||||
return "IDENTIFIER(" + lexme + ")";
|
||||
case TokenKind::TYPE:
|
||||
return "TYPE(" + lexme + ")";
|
||||
|
||||
case TokenKind::FUNCTION:
|
||||
return "FUNCTION";
|
||||
@@ -123,7 +129,7 @@ string Token::toString() {
|
||||
return "RETURN";
|
||||
|
||||
case TokenKind::NEW_LINE:
|
||||
return "NEW_LINE";
|
||||
return "↲";
|
||||
case TokenKind::END:
|
||||
return "END";
|
||||
case TokenKind::INVALID:
|
||||
|
||||
@@ -21,6 +21,8 @@ enum class TokenKind {
|
||||
SEMICOLON,
|
||||
QUESTION,
|
||||
QUESTION_QUESTION,
|
||||
LEFT_ARROW,
|
||||
RIGHT_ARROW,
|
||||
|
||||
FUNCTION,
|
||||
RETURN,
|
||||
@@ -29,6 +31,7 @@ enum class TokenKind {
|
||||
INTEGER,
|
||||
REAL,
|
||||
IDENTIFIER,
|
||||
TYPE,
|
||||
|
||||
NEW_LINE,
|
||||
END,
|
||||
|
||||
@@ -37,9 +37,9 @@ int main(int argc, char **argv) {
|
||||
Lexer lexer(source);
|
||||
vector<shared_ptr<Token>> tokens = lexer.getTokens();
|
||||
for (int i=0; i<tokens.size(); i++) {
|
||||
cout << tokens.at(i)->toString();
|
||||
cout << i << "|" << tokens.at(i)->toString();
|
||||
if (i < tokens.size() - 1)
|
||||
cout << " ";
|
||||
cout << ", ";
|
||||
}
|
||||
cout << endl << endl;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user