perf(anthropic): prompt-caching (#517)

bring back prompt caching support on Anthropic

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
This commit is contained in:
Aaron Pham
2024-09-04 03:19:33 -04:00
committed by GitHub
parent c027ea269a
commit 2b89f0d529
12 changed files with 116 additions and 75 deletions

View File

@@ -24,13 +24,27 @@ M.parse_message = function(opts)
end
end
local user_prompt_obj = {
type = "text",
text = opts.user_prompt,
}
if Utils.tokens.calculate_tokens(opts.user_prompt) then user_prompt_obj.cache_control = { type = "ephemeral" } end
---@type {idx: integer, length: integer}[]
local user_prompts_with_length = {}
for idx, user_prompt in ipairs(opts.user_prompts) do
table.insert(user_prompts_with_length, { idx = idx, length = Utils.tokens.calculate_tokens(user_prompt) })
end
table.insert(message_content, user_prompt_obj)
table.sort(user_prompts_with_length, function(a, b) return a.length > b.length end)
---@type table<integer, boolean>
local top_three = {}
for i = 1, math.min(3, #user_prompts_with_length) do
top_three[user_prompts_with_length[i].idx] = true
end
for idx, prompt_data in ipairs(opts.user_prompts) do
table.insert(message_content, {
type = "text",
text = prompt_data,
cache_control = top_three[idx] and { type = "ephemeral" } or nil,
})
end
return {
{