Scan type and better debug logging

This commit is contained in:
Rafał Grodziński
2025-06-08 17:35:07 +09:00
parent 5102637068
commit a28ddf7d87
5 changed files with 94 additions and 46 deletions

View File

@@ -4,8 +4,8 @@ Lexer::Lexer(string source): source(source) {
} }
vector<shared_ptr<Token>> Lexer::getTokens() { vector<shared_ptr<Token>> Lexer::getTokens() {
vector<shared_ptr<Token>> tokens;
shared_ptr<Token> token = nullptr; shared_ptr<Token> token = nullptr;
tokens.clear();
do { do {
token = nextToken(); token = nextToken();
// Got a nullptr, shouldn't have happened // Got a nullptr, shouldn't have happened
@@ -99,6 +99,39 @@ shared_ptr<Token> Lexer::nextToken() {
return nextToken(); // gets rid of remaining white spaces without repeating the code return nextToken(); // gets rid of remaining white spaces without repeating the code
} }
// structural
token = match(TokenKind::LEFT_PAREN, "(", false);
if (token != nullptr)
return token;
token = match(TokenKind::RIGHT_PAREN, ")", false);
if (token != nullptr)
return token;
token = match(TokenKind::COLON, ":", false);
if (token != nullptr)
return token;
token = match(TokenKind::SEMICOLON, ";", false);
if (token != nullptr)
return token;
token = match(TokenKind::QUESTION_QUESTION, "??", false);
if (token != nullptr)
return token;
token = match(TokenKind::QUESTION, "?", false);
if (token != nullptr)
return token;
token = match(TokenKind::LEFT_ARROW, "<-", false);
if (token != nullptr)
return token;
token = match(TokenKind::RIGHT_ARROW, "->", false);
if (token != nullptr)
return token;
// arithmetic // arithmetic
token = match(TokenKind::PLUS, "+", false); token = match(TokenKind::PLUS, "+", false);
if (token != nullptr) if (token != nullptr)
@@ -145,31 +178,6 @@ shared_ptr<Token> Lexer::nextToken() {
if (token != nullptr) if (token != nullptr)
return token; return token;
// structural
token = match(TokenKind::LEFT_PAREN, "(", false);
if (token != nullptr)
return token;
token = match(TokenKind::RIGHT_PAREN, ")", false);
if (token != nullptr)
return token;
token = match(TokenKind::COLON, ":", false);
if (token != nullptr)
return token;
token = match(TokenKind::SEMICOLON, ";", false);
if (token != nullptr)
return token;
token = match(TokenKind::QUESTION_QUESTION, "??", false);
if (token != nullptr)
return token;
token = match(TokenKind::QUESTION, "?", false);
if (token != nullptr)
return token;
// keywords // keywords
token = match(TokenKind::FUNCTION, "fun", true); token = match(TokenKind::FUNCTION, "fun", true);
if (token != nullptr) if (token != nullptr)
@@ -197,6 +205,10 @@ shared_ptr<Token> Lexer::nextToken() {
return token; return token;
// identifier // identifier
token = matchType();
if (token != nullptr)
return token;
token = matchIdentifier(); token = matchIdentifier();
if (token != nullptr) if (token != nullptr)
return token; return token;
@@ -264,6 +276,30 @@ shared_ptr<Token> Lexer::matchReal() {
return token; return token;
} }
shared_ptr<Token> Lexer::matchType() {
bool isVarDec = tokens.size() >= 2 &&
tokens.at(tokens.size() - 1)->getKind() == TokenKind::COLON &&
tokens.at(tokens.size() - 2)->getKind() == TokenKind::IDENTIFIER;
bool isFunDec = tokens.size() >= 1 &&
tokens.at(tokens.size() - 1)->getKind() == TokenKind::RIGHT_ARROW;
if (!isVarDec && !isFunDec)
return nullptr;
int nextIndex = currentIndex;
while (nextIndex < source.length() && isIdentifier(nextIndex))
nextIndex++;
if (nextIndex == currentIndex || !isSeparator(nextIndex))
return nullptr;
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
shared_ptr<Token> token = make_shared<Token>(TokenKind::TYPE, lexme, currentLine, currentColumn);
advanceWithToken(token);
return token;
}
shared_ptr<Token> Lexer::matchIdentifier() { shared_ptr<Token> Lexer::matchIdentifier() {
int nextIndex = currentIndex; int nextIndex = currentIndex;

View File

@@ -14,10 +14,13 @@ private:
int currentLine = 0; int currentLine = 0;
int currentColumn = 0; int currentColumn = 0;
vector<shared_ptr<Token>> tokens;
shared_ptr<Token> nextToken(); shared_ptr<Token> nextToken();
shared_ptr<Token> match(TokenKind kind, string lexme, bool needsSeparator); shared_ptr<Token> match(TokenKind kind, string lexme, bool needsSeparator);
shared_ptr<Token> matchInteger(); shared_ptr<Token> matchInteger();
shared_ptr<Token> matchReal(); shared_ptr<Token> matchReal();
shared_ptr<Token> matchType();
shared_ptr<Token> matchIdentifier(); shared_ptr<Token> matchIdentifier();
shared_ptr<Token> matchEnd(); shared_ptr<Token> matchEnd();
shared_ptr<Token> matchInvalid(); shared_ptr<Token> matchInvalid();

View File

@@ -72,41 +72,45 @@ bool Token::isOfKind(vector<TokenKind> kinds) {
string Token::toString() { string Token::toString() {
switch (kind) { switch (kind) {
case TokenKind::PLUS: case TokenKind::PLUS:
return "PLUS"; return "+";
case TokenKind::MINUS: case TokenKind::MINUS:
return "MINUS"; return "-";
case TokenKind::STAR: case TokenKind::STAR:
return "STAR"; return "*";
case TokenKind::SLASH: case TokenKind::SLASH:
return "SLASH"; return "/";
case TokenKind::PERCENT: case TokenKind::PERCENT:
return "PERCENT"; return "%";
case TokenKind::EQUAL: case TokenKind::EQUAL:
return "EQUAL"; return "=";
case TokenKind::NOT_EQUAL: case TokenKind::NOT_EQUAL:
return "NOT_EQUAL"; return "";
case TokenKind::LESS: case TokenKind::LESS:
return "LESS"; return "<";
case TokenKind::LESS_EQUAL: case TokenKind::LESS_EQUAL:
return "LESS_EQUAL"; return "";
case TokenKind::GREATER: case TokenKind::GREATER:
return "GREATER"; return ">";
case TokenKind::GREATER_EQUAL: case TokenKind::GREATER_EQUAL:
return "GREATER_EQUAL"; return "";
case TokenKind::LEFT_PAREN: case TokenKind::LEFT_PAREN:
return "LEFT_PAREN"; return "(";
case TokenKind::RIGHT_PAREN: case TokenKind::RIGHT_PAREN:
return "RIGHT_PAREN"; return ")";
case TokenKind::COLON: case TokenKind::COLON:
return "COLON"; return ":";
case TokenKind::SEMICOLON: case TokenKind::SEMICOLON:
return "SEMICOLON"; return ";";
case TokenKind::QUESTION_QUESTION: case TokenKind::QUESTION_QUESTION:
return "QUESTION_QUESTION"; return "??";
case TokenKind::QUESTION: case TokenKind::QUESTION:
return "QUESTION"; return "?";
case TokenKind::LEFT_ARROW:
return "";
case TokenKind::RIGHT_ARROW:
return "";
case TokenKind::BOOL: case TokenKind::BOOL:
return "BOOL(" + lexme + ")"; return "BOOL(" + lexme + ")";
@@ -116,6 +120,8 @@ string Token::toString() {
return "REAL(" + lexme + ")"; return "REAL(" + lexme + ")";
case TokenKind::IDENTIFIER: case TokenKind::IDENTIFIER:
return "IDENTIFIER(" + lexme + ")"; return "IDENTIFIER(" + lexme + ")";
case TokenKind::TYPE:
return "TYPE(" + lexme + ")";
case TokenKind::FUNCTION: case TokenKind::FUNCTION:
return "FUNCTION"; return "FUNCTION";
@@ -123,7 +129,7 @@ string Token::toString() {
return "RETURN"; return "RETURN";
case TokenKind::NEW_LINE: case TokenKind::NEW_LINE:
return "NEW_LINE"; return "";
case TokenKind::END: case TokenKind::END:
return "END"; return "END";
case TokenKind::INVALID: case TokenKind::INVALID:

View File

@@ -21,6 +21,8 @@ enum class TokenKind {
SEMICOLON, SEMICOLON,
QUESTION, QUESTION,
QUESTION_QUESTION, QUESTION_QUESTION,
LEFT_ARROW,
RIGHT_ARROW,
FUNCTION, FUNCTION,
RETURN, RETURN,
@@ -29,6 +31,7 @@ enum class TokenKind {
INTEGER, INTEGER,
REAL, REAL,
IDENTIFIER, IDENTIFIER,
TYPE,
NEW_LINE, NEW_LINE,
END, END,

View File

@@ -37,9 +37,9 @@ int main(int argc, char **argv) {
Lexer lexer(source); Lexer lexer(source);
vector<shared_ptr<Token>> tokens = lexer.getTokens(); vector<shared_ptr<Token>> tokens = lexer.getTokens();
for (int i=0; i<tokens.size(); i++) { for (int i=0; i<tokens.size(); i++) {
cout << tokens.at(i)->toString(); cout << i << "|" << tokens.at(i)->toString();
if (i < tokens.size() - 1) if (i < tokens.size() - 1)
cout << " "; cout << ", ";
} }
cout << endl << endl; cout << endl << endl;