Scan type and better debug logging

This commit is contained in:
Rafał Grodziński
2025-06-08 17:35:07 +09:00
parent 5102637068
commit a28ddf7d87
5 changed files with 94 additions and 46 deletions

View File

@@ -4,8 +4,8 @@ Lexer::Lexer(string source): source(source) {
}
vector<shared_ptr<Token>> Lexer::getTokens() {
vector<shared_ptr<Token>> tokens;
shared_ptr<Token> token = nullptr;
tokens.clear();
do {
token = nextToken();
// Got a nullptr, shouldn't have happened
@@ -99,6 +99,39 @@ shared_ptr<Token> Lexer::nextToken() {
return nextToken(); // gets rid of remaining white spaces without repeating the code
}
// structural
token = match(TokenKind::LEFT_PAREN, "(", false);
if (token != nullptr)
return token;
token = match(TokenKind::RIGHT_PAREN, ")", false);
if (token != nullptr)
return token;
token = match(TokenKind::COLON, ":", false);
if (token != nullptr)
return token;
token = match(TokenKind::SEMICOLON, ";", false);
if (token != nullptr)
return token;
token = match(TokenKind::QUESTION_QUESTION, "??", false);
if (token != nullptr)
return token;
token = match(TokenKind::QUESTION, "?", false);
if (token != nullptr)
return token;
token = match(TokenKind::LEFT_ARROW, "<-", false);
if (token != nullptr)
return token;
token = match(TokenKind::RIGHT_ARROW, "->", false);
if (token != nullptr)
return token;
// arithmetic
token = match(TokenKind::PLUS, "+", false);
if (token != nullptr)
@@ -145,31 +178,6 @@ shared_ptr<Token> Lexer::nextToken() {
if (token != nullptr)
return token;
// structural
token = match(TokenKind::LEFT_PAREN, "(", false);
if (token != nullptr)
return token;
token = match(TokenKind::RIGHT_PAREN, ")", false);
if (token != nullptr)
return token;
token = match(TokenKind::COLON, ":", false);
if (token != nullptr)
return token;
token = match(TokenKind::SEMICOLON, ";", false);
if (token != nullptr)
return token;
token = match(TokenKind::QUESTION_QUESTION, "??", false);
if (token != nullptr)
return token;
token = match(TokenKind::QUESTION, "?", false);
if (token != nullptr)
return token;
// keywords
token = match(TokenKind::FUNCTION, "fun", true);
if (token != nullptr)
@@ -197,6 +205,10 @@ shared_ptr<Token> Lexer::nextToken() {
return token;
// identifier
token = matchType();
if (token != nullptr)
return token;
token = matchIdentifier();
if (token != nullptr)
return token;
@@ -264,6 +276,30 @@ shared_ptr<Token> Lexer::matchReal() {
return token;
}
shared_ptr<Token> Lexer::matchType() {
bool isVarDec = tokens.size() >= 2 &&
tokens.at(tokens.size() - 1)->getKind() == TokenKind::COLON &&
tokens.at(tokens.size() - 2)->getKind() == TokenKind::IDENTIFIER;
bool isFunDec = tokens.size() >= 1 &&
tokens.at(tokens.size() - 1)->getKind() == TokenKind::RIGHT_ARROW;
if (!isVarDec && !isFunDec)
return nullptr;
int nextIndex = currentIndex;
while (nextIndex < source.length() && isIdentifier(nextIndex))
nextIndex++;
if (nextIndex == currentIndex || !isSeparator(nextIndex))
return nullptr;
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
shared_ptr<Token> token = make_shared<Token>(TokenKind::TYPE, lexme, currentLine, currentColumn);
advanceWithToken(token);
return token;
}
shared_ptr<Token> Lexer::matchIdentifier() {
int nextIndex = currentIndex;

View File

@@ -14,10 +14,13 @@ private:
int currentLine = 0;
int currentColumn = 0;
vector<shared_ptr<Token>> tokens;
shared_ptr<Token> nextToken();
shared_ptr<Token> match(TokenKind kind, string lexme, bool needsSeparator);
shared_ptr<Token> matchInteger();
shared_ptr<Token> matchReal();
shared_ptr<Token> matchType();
shared_ptr<Token> matchIdentifier();
shared_ptr<Token> matchEnd();
shared_ptr<Token> matchInvalid();

View File

@@ -72,41 +72,45 @@ bool Token::isOfKind(vector<TokenKind> kinds) {
string Token::toString() {
switch (kind) {
case TokenKind::PLUS:
return "PLUS";
return "+";
case TokenKind::MINUS:
return "MINUS";
return "-";
case TokenKind::STAR:
return "STAR";
return "*";
case TokenKind::SLASH:
return "SLASH";
return "/";
case TokenKind::PERCENT:
return "PERCENT";
return "%";
case TokenKind::EQUAL:
return "EQUAL";
return "=";
case TokenKind::NOT_EQUAL:
return "NOT_EQUAL";
return "";
case TokenKind::LESS:
return "LESS";
return "<";
case TokenKind::LESS_EQUAL:
return "LESS_EQUAL";
return "";
case TokenKind::GREATER:
return "GREATER";
return ">";
case TokenKind::GREATER_EQUAL:
return "GREATER_EQUAL";
return "";
case TokenKind::LEFT_PAREN:
return "LEFT_PAREN";
return "(";
case TokenKind::RIGHT_PAREN:
return "RIGHT_PAREN";
return ")";
case TokenKind::COLON:
return "COLON";
return ":";
case TokenKind::SEMICOLON:
return "SEMICOLON";
return ";";
case TokenKind::QUESTION_QUESTION:
return "QUESTION_QUESTION";
return "??";
case TokenKind::QUESTION:
return "QUESTION";
return "?";
case TokenKind::LEFT_ARROW:
return "";
case TokenKind::RIGHT_ARROW:
return "";
case TokenKind::BOOL:
return "BOOL(" + lexme + ")";
@@ -116,6 +120,8 @@ string Token::toString() {
return "REAL(" + lexme + ")";
case TokenKind::IDENTIFIER:
return "IDENTIFIER(" + lexme + ")";
case TokenKind::TYPE:
return "TYPE(" + lexme + ")";
case TokenKind::FUNCTION:
return "FUNCTION";
@@ -123,7 +129,7 @@ string Token::toString() {
return "RETURN";
case TokenKind::NEW_LINE:
return "NEW_LINE";
return "";
case TokenKind::END:
return "END";
case TokenKind::INVALID:

View File

@@ -21,6 +21,8 @@ enum class TokenKind {
SEMICOLON,
QUESTION,
QUESTION_QUESTION,
LEFT_ARROW,
RIGHT_ARROW,
FUNCTION,
RETURN,
@@ -29,6 +31,7 @@ enum class TokenKind {
INTEGER,
REAL,
IDENTIFIER,
TYPE,
NEW_LINE,
END,

View File

@@ -37,9 +37,9 @@ int main(int argc, char **argv) {
Lexer lexer(source);
vector<shared_ptr<Token>> tokens = lexer.getTokens();
for (int i=0; i<tokens.size(); i++) {
cout << tokens.at(i)->toString();
cout << i << "|" << tokens.at(i)->toString();
if (i < tokens.size() - 1)
cout << " ";
cout << ", ";
}
cout << endl << endl;