Scan function
This commit is contained in:
@@ -84,13 +84,25 @@ Token Lexer::nextToken() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
Token token =matchSymbol('.', Token::Kind::DOT);
|
Token token = matchSymbol(':', Token::Kind::COLON);
|
||||||
if (token.isValid())
|
if (token.isValid())
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
Token token = matchSymbol(',', Token::Kind::COMMA);
|
Token token = matchSymbol(';', Token::Kind::SEMICOLON);
|
||||||
|
if (token.isValid())
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Token token = matchKeyword("fun", Token::Kind::FUNCTION);
|
||||||
|
if (token.isValid())
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Token token = matchKeyword("ret", Token::Kind::RETURN);
|
||||||
if (token.isValid())
|
if (token.isValid())
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
@@ -102,7 +114,7 @@ Token Lexer::nextToken() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
Token token = matchKeyword("fun", Token::Kind::FUNCTION);
|
Token token = matchIdentifier();
|
||||||
if (token.isValid())
|
if (token.isValid())
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
@@ -137,27 +149,39 @@ Token Lexer::matchSymbol(char symbol, Token::Kind kind) {
|
|||||||
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
|
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Token Lexer::matchKeyword(string keyword, Token::Kind kind) {
|
||||||
|
bool isMatching = source.compare(currentIndex, keyword.length(), keyword) == 0;
|
||||||
|
|
||||||
|
if (isMatching && isSeparator(currentIndex + keyword.length()))
|
||||||
|
return Token(kind, keyword, currentLine, currentColumn);
|
||||||
|
else
|
||||||
|
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
|
||||||
|
}
|
||||||
|
|
||||||
Token Lexer::matchInteger() {
|
Token Lexer::matchInteger() {
|
||||||
int nextIndex = currentIndex;
|
int nextIndex = currentIndex;
|
||||||
|
|
||||||
while (nextIndex < source.length() && isDigit(nextIndex))
|
while (nextIndex < source.length() && isDigit(nextIndex))
|
||||||
nextIndex++;
|
nextIndex++;
|
||||||
|
|
||||||
if (nextIndex == currentIndex)
|
if (nextIndex == currentIndex || !isSeparator(nextIndex))
|
||||||
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
|
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
|
||||||
|
|
||||||
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
|
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
|
||||||
return Token(Token::Kind::INTEGER, lexme, currentLine, currentColumn);
|
return Token(Token::Kind::INTEGER, lexme, currentLine, currentColumn);
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Lexer::matchKeyword(string keyword, Token::Kind kind) {
|
Token Lexer::matchIdentifier() {
|
||||||
bool isMatching = source.compare(currentIndex, keyword.length(), keyword) == 0;
|
int nextIndex = currentIndex;
|
||||||
bool isSeparated = (currentIndex + keyword.length() >= source.length()) || isWhiteSpace(currentIndex + keyword.length()) || isNewLine(currentIndex + keyword.length());
|
|
||||||
|
|
||||||
if (isMatching && isSeparated)
|
while (nextIndex < source.length() && isIdentifier(nextIndex))
|
||||||
return Token(Token::Kind::FUNCTION, keyword, currentLine, currentColumn);
|
nextIndex++;
|
||||||
else
|
|
||||||
|
if (nextIndex == currentIndex || !isSeparator(nextIndex))
|
||||||
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
|
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
|
||||||
|
|
||||||
|
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
|
||||||
|
return Token(Token::Kind::IDENTIFIER, lexme, currentLine, currentColumn);
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Lexer::matchInvalid() {
|
Token Lexer::matchInvalid() {
|
||||||
@@ -178,3 +202,35 @@ bool Lexer::isDigit(int index) {
|
|||||||
char character = source.at(index);
|
char character = source.at(index);
|
||||||
return character >= '0' && character <= '9';
|
return character >= '0' && character <= '9';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Lexer::isIdentifier(int index) {
|
||||||
|
char character = source.at(index);
|
||||||
|
bool isDigit = character >= '0' && character <= '9';
|
||||||
|
bool isAlpha = character >= 'a' && character <= 'z' || character >= 'A' && character <= 'Z';
|
||||||
|
bool isAlowedSymbol = character == '_';
|
||||||
|
|
||||||
|
return isDigit || isAlpha || isAlowedSymbol;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Lexer::isSeparator(int index) {
|
||||||
|
if (index >= source.length())
|
||||||
|
return true;
|
||||||
|
|
||||||
|
char character = source.at(index);
|
||||||
|
switch (character) {
|
||||||
|
case '+':
|
||||||
|
case '-':
|
||||||
|
case '*':
|
||||||
|
case '/':
|
||||||
|
case '%':
|
||||||
|
case '(':
|
||||||
|
case ')':
|
||||||
|
case ':':
|
||||||
|
case ' ':
|
||||||
|
case '\t':
|
||||||
|
case '\n':
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -19,12 +19,15 @@ private:
|
|||||||
Token matchNewLine();
|
Token matchNewLine();
|
||||||
Token matchInvalid();
|
Token matchInvalid();
|
||||||
Token matchSymbol(char symbol, Token::Kind kind);
|
Token matchSymbol(char symbol, Token::Kind kind);
|
||||||
Token matchInteger();
|
|
||||||
Token matchKeyword(string keyword, Token::Kind kind);
|
Token matchKeyword(string keyword, Token::Kind kind);
|
||||||
|
Token matchInteger();
|
||||||
|
Token matchIdentifier();
|
||||||
|
|
||||||
bool isWhiteSpace(int index);
|
bool isWhiteSpace(int index);
|
||||||
bool isNewLine(int index);
|
bool isNewLine(int index);
|
||||||
bool isDigit(int index);
|
bool isDigit(int index);
|
||||||
|
bool isIdentifier(int index);
|
||||||
|
bool isSeparator(int index);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Lexer(string source);
|
Lexer(string source);
|
||||||
|
|||||||
@@ -48,14 +48,18 @@ string Token::toString() {
|
|||||||
return "LEFT_PAREN";
|
return "LEFT_PAREN";
|
||||||
case RIGHT_PAREN:
|
case RIGHT_PAREN:
|
||||||
return "RIGHT_PAREN";
|
return "RIGHT_PAREN";
|
||||||
case DOT:
|
case COLON:
|
||||||
return "DOT";
|
return "COLON";
|
||||||
case COMMA:
|
case SEMICOLON:
|
||||||
return "COMMA";
|
return "SEMICOLON";
|
||||||
case INTEGER:
|
case INTEGER:
|
||||||
return "INTEGER";
|
return "INTEGER";
|
||||||
|
case IDENTIFIER:
|
||||||
|
return "IDENTIFIER";
|
||||||
case FUNCTION:
|
case FUNCTION:
|
||||||
return "FUNCTION";
|
return "FUNCTION";
|
||||||
|
case RETURN:
|
||||||
|
return "RETURN";
|
||||||
case NEW_LINE:
|
case NEW_LINE:
|
||||||
return "NEW_LINE";
|
return "NEW_LINE";
|
||||||
case END:
|
case END:
|
||||||
|
|||||||
@@ -16,12 +16,14 @@ public:
|
|||||||
|
|
||||||
LEFT_PAREN,
|
LEFT_PAREN,
|
||||||
RIGHT_PAREN,
|
RIGHT_PAREN,
|
||||||
DOT,
|
COLON,
|
||||||
COMMA,
|
SEMICOLON,
|
||||||
|
|
||||||
INTEGER,
|
INTEGER,
|
||||||
|
IDENTIFIER,
|
||||||
|
|
||||||
FUNCTION,
|
FUNCTION,
|
||||||
|
RETURN,
|
||||||
|
|
||||||
NEW_LINE,
|
NEW_LINE,
|
||||||
END,
|
END,
|
||||||
|
|||||||
Reference in New Issue
Block a user