feat: tokenizers (#407)

* feat: autobuild tiktoken lib and schenanigans

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* chore: revert readme changes

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* fix(build): windows

Signed-off-by: Hanchin Hsieh <me@yuchanns.xyz>

* chore(plugin): early load commands and base setup

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* fix(build): make sync

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* feat: rust go vroom vroom

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* feat: scuffed afaf implementation binding go brrrr

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* chore: remove dups

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* fix(tokens): calculate whether we should do prompt_caching (fixes #416)

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* chore: ignore lockfiles

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* Update README.md

* Update crates/avante-tokenizers/README.md

* chore: remove unused

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

* chore: remove auto build

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

---------

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
Signed-off-by: Hanchin Hsieh <me@yuchanns.xyz>
Co-authored-by: yuchanns <me@yuchanns.xyz>
This commit is contained in:
Aaron Pham
2024-08-31 07:19:59 -04:00
committed by GitHub
parent 81b44e4533
commit d2095ba267
27 changed files with 3554 additions and 27 deletions

View File

@@ -69,6 +69,7 @@ local Dressing = require("avante.ui.dressing")
---@field setup fun(): nil
---@field has fun(): boolean
---@field api_key_name string
---@field tokenizer_id string | "gpt-4o"
---@field model? string
---@field parse_api_key fun(): string | nil
---@field parse_stream_data? AvanteStreamParser
@@ -269,6 +270,11 @@ M = setmetatable(M, {
return E.parse_envvar(t[k])
end
-- default to gpt-4o as tokenizer
if t[k].tokenizer_id == nil then
t[k].tokenizer_id = "gpt-4o"
end
if t[k].has == nil then
t[k].has = function()
return E.parse_envvar(t[k]) ~= nil
@@ -280,6 +286,7 @@ M = setmetatable(M, {
if not E.is_local(k) then
t[k].parse_api_key()
end
require("avante.tokenizers").setup(t[k].tokenizer_id)
end
end