feat: tokenizers (#407)

* feat: autobuild tiktoken lib and schenanigans

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* chore: revert readme changes

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* fix(build): windows

Signed-off-by: Hanchin Hsieh <me@yuchanns.xyz>

* chore(plugin): early load commands and base setup

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* fix(build): make sync

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* feat: rust go vroom vroom

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* feat: scuffed afaf implementation binding go brrrr

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* chore: remove dups

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* fix(tokens): calculate whether we should do prompt_caching (fixes #416)

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* chore: ignore lockfiles

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* Update README.md

* Update crates/avante-tokenizers/README.md

* chore: remove unused

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* chore: remove auto build

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

---------

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
Signed-off-by: Hanchin Hsieh <me@yuchanns.xyz>
Co-authored-by: yuchanns <me@yuchanns.xyz>
This commit is contained in:
Aaron Pham
2024-08-31 07:19:59 -04:00
committed by GitHub
parent 81b44e4533
commit d2095ba267
27 changed files with 3554 additions and 27 deletions

View File

@@ -1,4 +1,6 @@
--Taken from https://github.com/jackMort/ChatGPT.nvim/blob/main/lua/chatgpt/flows/chat/tokens.lua
local Tokenizer = require("avante.tokenizers")
---@class avante.utils.tokens
local Tokens = {}
@@ -11,6 +13,10 @@ local cost_per_token = {
---@param text string The text to calculate the number of tokens for.
---@return integer The number of tokens in the given text.
function Tokens.calculate_tokens(text)
if Tokenizer.available() then
return Tokenizer.count(text)
end
local tokens = 0
local current_token = ""
for char in text:gmatch(".") do