From 69bf54a62d8dfb0a6b84693871479b39b410df31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Grodzi=C5=84ski?= <rafal.grodzinski@gmail.com>
Date: Tue, 27 May 2025 22:38:44 +0900
Subject: [PATCH] Tokenizing input

---
 .gitignore          |   5 +-
 .vscode/launch.json |  16 ++++++
 Lexer.cpp           | 122 ++++++++++++++++++++++++++++++++++++++++++--
 Lexer.h             |  18 +++++--
 Token.cpp           |  53 +++++++++++++++----
 Token.h             |  31 +++++++++--
 main.cpp            |   2 +-
 make.sh             |   2 +-
 8 files changed, 225 insertions(+), 24 deletions(-)
 create mode 100644 .vscode/launch.json
diff --git a/.gitignore b/.gitignore
index 48d31f4..06204aa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
 .DS_Store
 *.o
-brb
\ No newline at end of file
+brb
+.vscode/settings.json
+*.dSYM
+*.brc
\ No newline at end of file
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000..e8a73f9
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,16 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Debug Bits Runner Builder",
+            "type": "lldb-dap",
+            "request": "launch",
+            "program": "${workspaceFolder}/brb",
+            "args": ["${workspaceFolder}/test.brc"]
+        }
+
+    ]
+}
\ No newline at end of file
diff --git a/Lexer.cpp b/Lexer.cpp
index 8f005a8..16b5150 100644
--- a/Lexer.cpp
+++ b/Lexer.cpp
@@ -1,9 +1,123 @@
 #include "Lexer.h"
-#include "Token.h"
 
 Lexer::Lexer(std::string source) : source(source) {
 }
 
-std::vector<Token> Lexer::tokens() {
-    return { Token::integer, Token::real, Token::integer, Token::eof };
-}
\ No newline at end of file
+std::vector<Token> Lexer::getTokens() {
+    std::vector<Token> tokens;
+    do {
+        Token token = nextToken();
+        currentIndex += token.getLexme().length();
+
+        if (token.getKind() == Token::Kind::NEW_LINE)
+            currentLine++;
+        
+        // filter out multiple new lines
+        if (tokens.empty() || token.getKind() != Token::Kind::NEW_LINE || tokens.back() != token)
+            tokens.push_back(token);
+    } while (tokens.back().getKind() != Token::Kind::END);
+    return tokens;
+}
+
+Token Lexer::nextToken() {
+    Token token = Token::Invalid;
+
+    while (currentIndex < source.length() && isWhiteSpace(currentIndex))
+        currentIndex++;
+
+    do {
+        if ((token = matchEnd()) != Token::Invalid)
+            break;
+    
+        if ((token = matchSymbol('+', Token::Kind::PLUS)) != Token::Invalid)
+            break;
+        
+        if ((token = matchSymbol('-', Token::Kind::MINUS)) != Token::Invalid)
+            break;
+
+        if ((token = matchSymbol('*', Token::Kind::STAR)) != Token::Invalid)
+            break;
+
+        if ((token = matchSymbol('/', Token::Kind::SLASH)) != Token::Invalid)
+            break;
+
+        if ((token = matchSymbol('%', Token::Kind::PERCENT)) != Token::Invalid)
+            break;
+
+        if ((token = matchSymbol('(', Token::Kind::LEFT_PAREN)) != Token::Invalid)
+            break;
+
+        if ((token = matchSymbol(')', Token::Kind::RIGHT_PAREN)) != Token::Invalid)
+            break;
+
+        if ((token = matchSymbol('.', Token::Kind::DOT)) != Token::Invalid)
+            break;
+
+        if ((token = matchSymbol(',', Token::Kind::COMMA)) != Token::Invalid)
+            break;
+
+        if ((token = matchInteger()) != Token::Invalid)
+            break;
+
+        if ((token = matchNewLine()) != Token::Invalid)
+            break;
+        
+        token = matchInvalid();
+    } while(false);
+
+    return token;
+}
+
+Token Lexer::matchEnd() {
+    if (currentIndex >= source.length())
+        return Token(Token::Kind::END, "");
+    
+    return Token::Invalid;
+}
+
+Token Lexer::matchNewLine() {
+    if (isNewLine(currentIndex))
+        return Token(Token::Kind::NEW_LINE, "\n");
+
+    return Token::Invalid;
+}
+
+Token Lexer::matchSymbol(char symbol, Token::Kind kind) {
+    if (source.at(currentIndex) == symbol)
+        return Token(kind, std::string(1, symbol));
+
+    return Token::Invalid;
+}
+
+Token Lexer::matchInteger() {
+    int nextIndex = currentIndex;
+
+    while (nextIndex < source.length() && isDigit(nextIndex))
+        nextIndex++;
+    
+    if (nextIndex == currentIndex)
+        return Token::Invalid;
+    
+    std::string lexme = source.substr(currentIndex, nextIndex - currentIndex);
+    return Token(Token::Kind::INTEGER, lexme);
+}
+
+Token Lexer::matchInvalid() {
+    char symbol = source.at(currentIndex);
+    return Token(Token::Kind::INVALID, std::string(1, symbol));
+}
+
+bool Lexer::isWhiteSpace(int index) {
+    char character = source.at(index);
+    return character == ' ' || character == '\t';
+}
+
+bool Lexer::isNewLine(int index) {
+    char character = source.at(index);
+    return character == '\n';
+}
+
+bool Lexer::isDigit(int index) {
+    char character = source.at(index);
+    return character >= '0' && character <= '9';
+}
diff --git a/Lexer.h b/Lexer.h
index 4b3f413..4225f2c 100644
--- a/Lexer.h
+++ b/Lexer.h
@@ -2,16 +2,28 @@
 #define LEXER_H
 
 #include <vector>
-
-class Token;
+#include "Token.h"
 
 class Lexer {
 private:
     std::string source;
+    int currentIndex = 0;
+    int currentLine = 0;
+
+    Token nextToken();
+    Token matchEnd();
+    Token matchNewLine();
+    Token matchInvalid();
+    Token matchSymbol(char symbol, Token::Kind kind);
+    Token matchInteger();
+
+    bool isWhiteSpace(int index);
+    bool isNewLine(int index);
+    bool isDigit(int index);
 
 public:
     Lexer(std::string source);
-    std::vector<Token> tokens();
+    std::vector<Token> getTokens();
 };
 
 #endif
\ No newline at end of file
diff --git a/Token.cpp b/Token.cpp
index 3aa468f..e6c99af 100644
--- a/Token.cpp
+++ b/Token.cpp
@@ -1,18 +1,53 @@
 #include "Token.h"
 
-Token::Token(Kind kind): kind(kind) {
+Token Token::Invalid = Token(Token::Kind::INVALID, "");
+
+Token::Token(Kind kind, std::string lexme): kind(kind), lexme(lexme) {
+}
+
+Token::Kind Token::getKind() {
+    return kind;
+}
+
+std::string Token::getLexme() {
+    return lexme;
+}
+
+bool Token::operator==(Token const& other) {
+    return kind == other.kind;
+}
+
+bool Token::operator!=(Token const& other) {
+    return kind != other.kind;
 }
 
 std::string Token::toString() {
     switch (kind) {
-        case integer:
+        case PLUS:
+            return "PLUS";
+        case MINUS:
+            return "MINUS";
+        case STAR:
+            return "STAR";
+        case SLASH:
+            return "SLASH";
+        case PERCENT:
+            return "PERCENT";
+        case LEFT_PAREN:
+            return "LEFT_PARENT";
+        case RIGHT_PAREN:
+            return "RIGHT_PAREN";
+        case DOT:
+            return "DOT";
+        case COMMA:
+            return "COMMA";
+        case INTEGER:
             return "INTEGER";
-            break;
-        case real:
-            return "REAL";
-            break;
-        case eof:
-            return "EOF";
-            break;
+        case NEW_LINE:
+            return "NEW_LINE";
+        case END:
+            return "END";
+        case INVALID:
+            return "INVALID";
     }
 }
\ No newline at end of file
diff --git a/Token.h b/Token.h
index 618072b..12d05eb 100644
--- a/Token.h
+++ b/Token.h
@@ -5,18 +5,39 @@
 
 class Token {
 public:
-    enum Kind {
-        integer,
-        real,
-        eof
+    enum Kind {        
+        PLUS,
+        MINUS,
+        STAR,
+        SLASH,
+        PERCENT,
+        
+        LEFT_PAREN,
+        RIGHT_PAREN,
+        DOT,
+        COMMA,
+
+        INTEGER,
+
+        NEW_LINE,
+
+        END,
+        INVALID
     };
 
 private:
     Kind kind;
+    std::string lexme;
 
 public:
-    Token(Kind kind);
+    Token(Kind kind, std::string lexme);
+    Kind getKind();
+    std::string getLexme();
+    bool operator==(Token const& other);
+    bool operator!=(Token const& other);
     std::string toString();
+
+    static Token Invalid;
 };
 
 #endif
\ No newline at end of file
diff --git a/main.cpp b/main.cpp
index 07d8ec0..6f2c9c1 100644
--- a/main.cpp
+++ b/main.cpp
@@ -25,7 +25,7 @@ int main(int argc, char **argv) {
 
     std::string source = readFile(std::string(argv[1]));
     Lexer lexer(source);
-    std::vector<Token> tokens = lexer.tokens();
+    std::vector<Token> tokens = lexer.getTokens();
     for (Token &token : tokens)
         std::cout << token.toString() << " ";
     std::cout << std::endl;
diff --git a/make.sh b/make.sh
index c85dd32..8d241b8 100755
--- a/make.sh
+++ b/make.sh
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-cc -std=c++17 -lc++ *.cpp -o brb
\ No newline at end of file
+cc -g -std=c++17 -lc++ *.cpp -o brb
\ No newline at end of file