feat: tokenizers (#429)

* feat: tokenizers

This reverts commit d5a4db8321.

* fix(inputs): #422

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

---------

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
This commit is contained in:
Aaron Pham
2024-08-31 13:39:50 -04:00
committed by GitHub
parent 534b1e6bec
commit 0557deeab7
28 changed files with 3553 additions and 27 deletions

View File

@@ -1,4 +1,6 @@
--Taken from https://github.com/jackMort/ChatGPT.nvim/blob/main/lua/chatgpt/flows/chat/tokens.lua
local Tokenizer = require("avante.tokenizers")
---@class avante.utils.tokens
local Tokens = {}
@@ -11,6 +13,10 @@ local cost_per_token = {
---@param text string The text to calculate the number of tokens for.
---@return integer The number of tokens in the given text.
function Tokens.calculate_tokens(text)
if Tokenizer.available() then
return Tokenizer.count(text)
end
local tokens = 0
local current_token = ""
for char in text:gmatch(".") do