Better error reporting
This commit is contained in:
@@ -1,14 +1,13 @@
|
||||
#include "Expression.h"
|
||||
|
||||
std::shared_ptr<Expression> Expression::Invalid = std::make_shared<Expression>(Expression::Kind::INVALID, Token::Invalid, nullptr, nullptr);
|
||||
|
||||
Expression::Expression(Kind kind, Token token, shared_ptr<Expression> left, shared_ptr<Expression> right) {
|
||||
Expression::Expression(Kind kind, Token token, shared_ptr<Expression> left, shared_ptr<Expression> right): token(token) {
|
||||
switch (kind) {
|
||||
case LITERAL:
|
||||
setupLiteral(token);
|
||||
break;
|
||||
case GROUPING:
|
||||
setupGrouping(token, left);
|
||||
break;
|
||||
case BINARY:
|
||||
setupBinary(token, left, right);
|
||||
break;
|
||||
@@ -18,7 +17,7 @@ Expression::Expression(Kind kind, Token token, shared_ptr<Expression> left, shar
|
||||
}
|
||||
|
||||
void Expression::setupLiteral(Token token) {
|
||||
bool isKindValid = token.isOneOf({Token::Kind::INTEGER});
|
||||
bool isKindValid = token.isOfKind({Token::Kind::INTEGER});
|
||||
if (!isKindValid)
|
||||
return;
|
||||
|
||||
@@ -38,7 +37,7 @@ void Expression::setupGrouping(Token token, shared_ptr<Expression> expression) {
|
||||
}
|
||||
|
||||
void Expression::setupBinary(Token token, shared_ptr<Expression> left, shared_ptr<Expression> right) {
|
||||
bool isKindValid = token.isOneOf({Token::Kind::PLUS, Token::Kind::MINUS, Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT});
|
||||
bool isKindValid = token.isOfKind({Token::Kind::PLUS, Token::Kind::MINUS, Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT});
|
||||
bool isLeftValid = left != nullptr && left->getKind() != Kind::INVALID;
|
||||
bool isRightValid = right != nullptr && right->getKind() != Kind::INVALID;
|
||||
|
||||
@@ -65,6 +64,8 @@ void Expression::setupBinary(Token token, shared_ptr<Expression> left, shared_pt
|
||||
break;
|
||||
case Token::Kind::INVALID:
|
||||
break;
|
||||
default:
|
||||
exit(1);
|
||||
}
|
||||
|
||||
this->left = left;
|
||||
@@ -75,6 +76,10 @@ Expression::Kind Expression::getKind() {
|
||||
return kind;
|
||||
}
|
||||
|
||||
Token Expression::getToken() {
|
||||
return token;
|
||||
}
|
||||
|
||||
int64_t Expression::getInteger() {
|
||||
return integer;
|
||||
}
|
||||
@@ -91,15 +96,11 @@ shared_ptr<Expression> Expression::getRight() {
|
||||
return right;
|
||||
}
|
||||
|
||||
bool Expression::operator==(Expression const& other) {
|
||||
return kind == other.kind;
|
||||
bool Expression::isValid() {
|
||||
return kind != Expression::Kind::INVALID;
|
||||
}
|
||||
|
||||
bool Expression::operator!=(Expression const& other) {
|
||||
return kind != other.kind;
|
||||
}
|
||||
|
||||
std::string Expression::toString() {
|
||||
string Expression::toString() {
|
||||
switch (kind) {
|
||||
case LITERAL:
|
||||
return to_string(integer);
|
||||
|
||||
@@ -25,6 +25,7 @@ public:
|
||||
|
||||
private:
|
||||
Kind kind = INVALID;
|
||||
Token token;
|
||||
int64_t integer = 0;
|
||||
Operator operation = NONE;
|
||||
shared_ptr<Expression> left = nullptr;
|
||||
@@ -37,15 +38,13 @@ private:
|
||||
public:
|
||||
Expression(Kind kind, Token token, shared_ptr<Expression> left, shared_ptr<Expression> right);
|
||||
Kind getKind();
|
||||
Token getToken();
|
||||
int64_t getInteger();
|
||||
Operator getOperator();
|
||||
shared_ptr<Expression> getLeft();
|
||||
shared_ptr<Expression> getRight();
|
||||
bool operator==(Expression const& other);
|
||||
bool operator!=(Expression const& other);
|
||||
bool isValid();
|
||||
string toString();
|
||||
|
||||
static shared_ptr<Expression> Invalid;
|
||||
};
|
||||
|
||||
#endif
|
||||
159
src/Lexer.cpp
159
src/Lexer.cpp
@@ -1,92 +1,140 @@
|
||||
#include "Lexer.h"
|
||||
|
||||
Lexer::Lexer(std::string source) : source(source) {
|
||||
Lexer::Lexer(string source): source(source) {
|
||||
}
|
||||
|
||||
std::vector<Token> Lexer::getTokens() {
|
||||
std::vector<Token> tokens;
|
||||
vector<Token> Lexer::getTokens() {
|
||||
vector<Token> tokens;
|
||||
do {
|
||||
Token token = nextToken();
|
||||
currentIndex += token.getLexme().length();
|
||||
|
||||
if (token.getKind() == Token::Kind::NEW_LINE)
|
||||
// Abort scanning if we got an error
|
||||
if (token.getKind() == Token::Kind::INVALID) {
|
||||
cerr << "Unexpected character '" << token.getLexme() << "' at " << token.getLine() << ":" << token.getColumn() << endl;
|
||||
return vector<Token>();
|
||||
}
|
||||
|
||||
currentIndex += token.getLexme().length();
|
||||
currentColumn += token.getLexme().length();
|
||||
|
||||
if (token.getKind() == Token::Kind::NEW_LINE) {
|
||||
currentLine++;
|
||||
currentColumn = 0;
|
||||
}
|
||||
|
||||
// filter out multiple new lines
|
||||
if (tokens.empty() || token.getKind() != Token::Kind::NEW_LINE || tokens.back() != token)
|
||||
if (tokens.empty() || token.getKind() != Token::Kind::NEW_LINE || tokens.back().getKind() != token.getKind())
|
||||
tokens.push_back(token);
|
||||
} while (tokens.back().getKind() != Token::Kind::END);
|
||||
return tokens;
|
||||
}
|
||||
|
||||
Token Lexer::nextToken() {
|
||||
Token token = Token::Invalid;
|
||||
|
||||
while (currentIndex < source.length() && isWhiteSpace(currentIndex))
|
||||
while (currentIndex < source.length() && isWhiteSpace(currentIndex)) {
|
||||
currentIndex++;
|
||||
currentColumn++;
|
||||
}
|
||||
|
||||
do {
|
||||
if ((token = matchEnd()) != Token::Invalid)
|
||||
break;
|
||||
|
||||
if ((token = matchSymbol('+', Token::Kind::PLUS)) != Token::Invalid)
|
||||
break;
|
||||
|
||||
if ((token = matchSymbol('-', Token::Kind::MINUS)) != Token::Invalid)
|
||||
break;
|
||||
{
|
||||
Token token = matchEnd();
|
||||
if (token.isValid())
|
||||
return token;
|
||||
}
|
||||
|
||||
if ((token = matchSymbol('*', Token::Kind::STAR)) != Token::Invalid)
|
||||
break;
|
||||
{
|
||||
Token token = matchSymbol('+', Token::Kind::PLUS);
|
||||
if (token.isValid())
|
||||
return token;
|
||||
}
|
||||
|
||||
if ((token = matchSymbol('/', Token::Kind::SLASH)) != Token::Invalid)
|
||||
break;
|
||||
{
|
||||
Token token = matchSymbol('-', Token::Kind::MINUS);
|
||||
if (token.isValid())
|
||||
return token;
|
||||
}
|
||||
|
||||
if ((token = matchSymbol('%', Token::Kind::PERCENT)) != Token::Invalid)
|
||||
break;
|
||||
{
|
||||
Token token = matchSymbol('*', Token::Kind::STAR);
|
||||
if (token.isValid())
|
||||
return token;
|
||||
}
|
||||
|
||||
if ((token = matchSymbol('(', Token::Kind::LEFT_PAREN)) != Token::Invalid)
|
||||
break;
|
||||
{
|
||||
Token token = matchSymbol('/', Token::Kind::SLASH);
|
||||
if (token.isValid())
|
||||
return token;
|
||||
}
|
||||
|
||||
if ((token = matchSymbol(')', Token::Kind::RIGHT_PAREN)) != Token::Invalid)
|
||||
break;
|
||||
{
|
||||
Token token =matchSymbol('%', Token::Kind::PERCENT);
|
||||
if (token.isValid())
|
||||
return token;
|
||||
}
|
||||
|
||||
if ((token = matchSymbol('.', Token::Kind::DOT)) != Token::Invalid)
|
||||
break;
|
||||
{
|
||||
Token token = matchSymbol('(', Token::Kind::LEFT_PAREN);
|
||||
if (token.isValid())
|
||||
return token;
|
||||
}
|
||||
|
||||
if ((token = matchSymbol(',', Token::Kind::COMMA)) != Token::Invalid)
|
||||
break;
|
||||
{
|
||||
Token token = matchSymbol(')', Token::Kind::RIGHT_PAREN);
|
||||
if (token.isValid())
|
||||
return token;
|
||||
}
|
||||
|
||||
if ((token = matchInteger()) != Token::Invalid)
|
||||
break;
|
||||
{
|
||||
Token token =matchSymbol('.', Token::Kind::DOT);
|
||||
if (token.isValid())
|
||||
return token;
|
||||
}
|
||||
|
||||
if ((token = matchNewLine()) != Token::Invalid)
|
||||
break;
|
||||
|
||||
token = matchInvalid();
|
||||
} while(false);
|
||||
{
|
||||
Token token = matchSymbol(',', Token::Kind::COMMA);
|
||||
if (token.isValid())
|
||||
return token;
|
||||
}
|
||||
|
||||
return token;
|
||||
{
|
||||
Token token = matchInteger();
|
||||
if (token.isValid())
|
||||
return token;
|
||||
}
|
||||
|
||||
{
|
||||
Token token = matchKeyword("fun", Token::Kind::FUNCTION);
|
||||
if (token.isValid())
|
||||
return token;
|
||||
}
|
||||
|
||||
{
|
||||
Token token = matchNewLine();
|
||||
if (token.isValid())
|
||||
return token;
|
||||
}
|
||||
|
||||
return matchInvalid();
|
||||
}
|
||||
|
||||
Token Lexer::matchEnd() {
|
||||
if (currentIndex >= source.length())
|
||||
return Token(Token::Kind::END, "");
|
||||
return Token(Token::Kind::END, "", currentLine, currentColumn);
|
||||
|
||||
return Token::Invalid;
|
||||
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
|
||||
}
|
||||
|
||||
Token Lexer::matchNewLine() {
|
||||
if (isNewLine(currentIndex))
|
||||
return Token(Token::Kind::NEW_LINE, "\n");
|
||||
return Token(Token::Kind::NEW_LINE, "\n", currentLine, currentColumn);
|
||||
|
||||
return Token::Invalid;
|
||||
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
|
||||
}
|
||||
|
||||
Token Lexer::matchSymbol(char symbol, Token::Kind kind) {
|
||||
if (source.at(currentIndex) == symbol)
|
||||
return Token(kind, std::string(1, symbol));
|
||||
return Token(kind, string(1, symbol), currentLine, currentColumn);
|
||||
|
||||
return Token::Invalid;
|
||||
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
|
||||
}
|
||||
|
||||
Token Lexer::matchInteger() {
|
||||
@@ -96,15 +144,24 @@ Token Lexer::matchInteger() {
|
||||
nextIndex++;
|
||||
|
||||
if (nextIndex == currentIndex)
|
||||
return Token::Invalid;
|
||||
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
|
||||
|
||||
std::string lexme = source.substr(currentIndex, nextIndex - currentIndex);
|
||||
return Token(Token::Kind::INTEGER, lexme);
|
||||
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
|
||||
return Token(Token::Kind::INTEGER, lexme, currentLine, currentColumn);
|
||||
}
|
||||
|
||||
Token Lexer::matchKeyword(string keyword, Token::Kind kind) {
|
||||
bool isMatching = source.compare(currentIndex, keyword.length(), keyword) == 0;
|
||||
bool isSeparated = (currentIndex + keyword.length() >= source.length()) || isWhiteSpace(currentIndex + keyword.length()) || isNewLine(currentIndex + keyword.length());
|
||||
|
||||
if (isMatching && isSeparated)
|
||||
return Token(Token::Kind::FUNCTION, keyword, currentLine, currentColumn);
|
||||
else
|
||||
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
|
||||
}
|
||||
|
||||
Token Lexer::matchInvalid() {
|
||||
char symbol = source.at(currentIndex);
|
||||
return Token(Token::Kind::INVALID, std::string(1, symbol));
|
||||
return Token(Token::Kind::INVALID, source.substr(currentIndex, 1), currentLine, currentColumn);
|
||||
}
|
||||
|
||||
bool Lexer::isWhiteSpace(int index) {
|
||||
|
||||
13
src/Lexer.h
13
src/Lexer.h
@@ -1,14 +1,18 @@
|
||||
#ifndef LEXER_H
|
||||
#define LEXER_H
|
||||
|
||||
#include <vector>
|
||||
#include "Token.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
class Lexer {
|
||||
private:
|
||||
std::string source;
|
||||
string source;
|
||||
int currentIndex = 0;
|
||||
int currentLine = 0;
|
||||
int currentColumn = 0;
|
||||
|
||||
Token nextToken();
|
||||
Token matchEnd();
|
||||
@@ -16,14 +20,15 @@ private:
|
||||
Token matchInvalid();
|
||||
Token matchSymbol(char symbol, Token::Kind kind);
|
||||
Token matchInteger();
|
||||
Token matchKeyword(string keyword, Token::Kind kind);
|
||||
|
||||
bool isWhiteSpace(int index);
|
||||
bool isNewLine(int index);
|
||||
bool isDigit(int index);
|
||||
|
||||
public:
|
||||
Lexer(std::string source);
|
||||
std::vector<Token> getTokens();
|
||||
Lexer(string source);
|
||||
vector<Token> getTokens();
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -4,13 +4,18 @@ Parser::Parser(vector<Token> tokens): tokens(tokens) {
|
||||
}
|
||||
|
||||
shared_ptr<Expression> Parser::getExpression() {
|
||||
return term();
|
||||
shared_ptr<Expression> expression = term();
|
||||
if (!expression->isValid()) {
|
||||
cerr << "Unexpected token '" << expression->getToken().getLexme() << "' at " << expression->getToken().getLine() << ":" << expression->getToken().getColumn() << endl;
|
||||
return nullptr;
|
||||
}
|
||||
return expression;
|
||||
}
|
||||
|
||||
shared_ptr<Expression> Parser::term() {
|
||||
shared_ptr<Expression> expression = factor();
|
||||
|
||||
while (tokens.at(currentIndex).isOneOf({Token::Kind::PLUS, Token::Kind::MINUS})) {
|
||||
while (tokens.at(currentIndex).isOfKind({Token::Kind::PLUS, Token::Kind::MINUS})) {
|
||||
expression = matchBinary(expression);
|
||||
}
|
||||
|
||||
@@ -20,7 +25,7 @@ shared_ptr<Expression> Parser::term() {
|
||||
shared_ptr<Expression> Parser::factor() {
|
||||
shared_ptr<Expression> expression = primary();
|
||||
|
||||
while (tokens.at(currentIndex).isOneOf({Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT})) {
|
||||
while (tokens.at(currentIndex).isOfKind({Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT})) {
|
||||
expression = matchBinary(expression);
|
||||
}
|
||||
|
||||
@@ -28,17 +33,19 @@ shared_ptr<Expression> Parser::factor() {
|
||||
}
|
||||
|
||||
shared_ptr<Expression> Parser::primary() {
|
||||
shared_ptr<Expression> expression = Expression::Invalid;
|
||||
{
|
||||
shared_ptr<Expression> expression = matchInteger();
|
||||
if (expression->isValid())
|
||||
return expression;
|
||||
}
|
||||
|
||||
do {
|
||||
if((expression = matchInteger()) != Expression::Invalid)
|
||||
break;
|
||||
|
||||
if((expression = matchGrouping()) != Expression::Invalid)
|
||||
break;
|
||||
} while(false);
|
||||
{
|
||||
shared_ptr<Expression> expression = matchGrouping();
|
||||
if (expression->isValid())
|
||||
return expression;
|
||||
}
|
||||
|
||||
return expression;
|
||||
return make_shared<Expression>(Expression::Kind::INVALID, tokens.at(currentIndex), nullptr, nullptr);
|
||||
}
|
||||
|
||||
shared_ptr<Expression> Parser::matchInteger() {
|
||||
@@ -48,7 +55,7 @@ shared_ptr<Expression> Parser::matchInteger() {
|
||||
return make_shared<Expression>(Expression::Kind::LITERAL, token, nullptr, nullptr);
|
||||
}
|
||||
|
||||
return Expression::Invalid;
|
||||
return make_shared<Expression>(Expression::Kind::INVALID, token, nullptr, nullptr);
|
||||
}
|
||||
|
||||
shared_ptr<Expression> Parser::matchGrouping() {
|
||||
@@ -56,22 +63,28 @@ shared_ptr<Expression> Parser::matchGrouping() {
|
||||
if (token.getKind() == Token::Kind::LEFT_PAREN) {
|
||||
currentIndex++;
|
||||
shared_ptr<Expression> expression = term();
|
||||
// has grouped expression failed?
|
||||
if (!expression->isValid())
|
||||
return expression;
|
||||
if (tokens.at(currentIndex).getKind() == Token::Kind::RIGHT_PAREN) {
|
||||
currentIndex++;
|
||||
return make_shared<Expression>(Expression::Kind::GROUPING, token, expression, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
return Expression::Invalid;
|
||||
return make_shared<Expression>(Expression::Kind::INVALID, token, nullptr, nullptr);
|
||||
}
|
||||
|
||||
shared_ptr<Expression> Parser::matchBinary(shared_ptr<Expression> left) {
|
||||
Token token = tokens.at(currentIndex);
|
||||
if (token.isOneOf({Token::Kind::PLUS, Token::Kind::MINUS, Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT})) {
|
||||
if (token.isOfKind({Token::Kind::PLUS, Token::Kind::MINUS, Token::Kind::STAR, Token::Kind::SLASH, Token::Kind::PERCENT})) {
|
||||
currentIndex++;
|
||||
shared_ptr<Expression> right = factor();
|
||||
// Has right expression failed?
|
||||
if (!right->isValid())
|
||||
return right;
|
||||
return make_shared<Expression>(Expression::Kind::BINARY, token, left, right);
|
||||
}
|
||||
|
||||
return Expression::Invalid;
|
||||
return make_shared<Expression>(Expression::Kind::INVALID, token, nullptr, nullptr);
|
||||
}
|
||||
@@ -14,7 +14,7 @@ private:
|
||||
|
||||
shared_ptr<Expression> term(); // +, -
|
||||
shared_ptr<Expression> factor(); // *, /, %
|
||||
shared_ptr<Expression> primary();
|
||||
shared_ptr<Expression> primary(); // integer, ()
|
||||
|
||||
shared_ptr<Expression> matchInteger();
|
||||
shared_ptr<Expression> matchGrouping();
|
||||
|
||||
@@ -1,27 +1,29 @@
|
||||
#include "Token.h"
|
||||
|
||||
Token Token::Invalid = Token(Token::Kind::INVALID, "");
|
||||
|
||||
Token::Token(Kind kind, std::string lexme): kind(kind), lexme(lexme) {
|
||||
Token::Token(Kind kind, string lexme, int line, int column): kind(kind), lexme(lexme), line(line), column(column) {
|
||||
}
|
||||
|
||||
Token::Kind Token::getKind() {
|
||||
return kind;
|
||||
}
|
||||
|
||||
std::string Token::getLexme() {
|
||||
string Token::getLexme() {
|
||||
return lexme;
|
||||
}
|
||||
|
||||
bool Token::operator==(Token const& other) {
|
||||
return kind == other.kind;
|
||||
int Token::getLine() {
|
||||
return line;
|
||||
}
|
||||
|
||||
bool Token::operator!=(Token const& other) {
|
||||
return kind != other.kind;
|
||||
int Token::getColumn() {
|
||||
return column;
|
||||
}
|
||||
|
||||
bool Token::isOneOf(std::vector<Kind> kinds) {
|
||||
bool Token::isValid() {
|
||||
return kind != Token::Kind::INVALID;
|
||||
}
|
||||
|
||||
bool Token::isOfKind(vector<Kind> kinds) {
|
||||
for (Kind &kind : kinds) {
|
||||
if (kind == this->kind)
|
||||
return true;
|
||||
@@ -30,7 +32,7 @@ bool Token::isOneOf(std::vector<Kind> kinds) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string Token::toString() {
|
||||
string Token::toString() {
|
||||
switch (kind) {
|
||||
case PLUS:
|
||||
return "PLUS";
|
||||
@@ -52,6 +54,8 @@ std::string Token::toString() {
|
||||
return "COMMA";
|
||||
case INTEGER:
|
||||
return "INTEGER";
|
||||
case FUNCTION:
|
||||
return "FUNCTION";
|
||||
case NEW_LINE:
|
||||
return "NEW_LINE";
|
||||
case END:
|
||||
|
||||
25
src/Token.h
25
src/Token.h
@@ -3,6 +3,8 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
class Token {
|
||||
public:
|
||||
enum Kind {
|
||||
@@ -19,26 +21,29 @@ public:
|
||||
|
||||
INTEGER,
|
||||
|
||||
NEW_LINE,
|
||||
FUNCTION,
|
||||
|
||||
NEW_LINE,
|
||||
END,
|
||||
|
||||
INVALID
|
||||
};
|
||||
|
||||
private:
|
||||
Kind kind;
|
||||
std::string lexme;
|
||||
string lexme;
|
||||
int line;
|
||||
int column;
|
||||
|
||||
public:
|
||||
Token(Kind kind, std::string lexme);
|
||||
Token(Kind kind, string lexme, int line, int column);
|
||||
Kind getKind();
|
||||
std::string getLexme();
|
||||
bool operator==(Token const& other);
|
||||
bool operator!=(Token const& other);
|
||||
bool isOneOf(std::vector<Kind> kinds);
|
||||
std::string toString();
|
||||
|
||||
static Token Invalid;
|
||||
string getLexme();
|
||||
int getLine();
|
||||
int getColumn();
|
||||
bool isValid();
|
||||
bool isOfKind(vector<Kind> kinds);
|
||||
string toString();
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -36,12 +36,18 @@ int main(int argc, char **argv) {
|
||||
std::string source = readFile(std::string(argv[1]));
|
||||
Lexer lexer(source);
|
||||
std::vector<Token> tokens = lexer.getTokens();
|
||||
if (tokens.empty()) {
|
||||
exit(1);
|
||||
}
|
||||
for (Token &token : tokens)
|
||||
std::cout << token.toString() << " ";
|
||||
std::cout << std::endl;
|
||||
|
||||
Parser parser(tokens);
|
||||
shared_ptr<Expression> expression = parser.getExpression();
|
||||
if (!expression) {
|
||||
exit(1);
|
||||
}
|
||||
cout << expression->toString() << endl;
|
||||
|
||||
ModuleBuilder moduleBuilder(expression);
|
||||
|
||||
Reference in New Issue
Block a user