fix(llm): fix rate limit handling
Rate limit handling seems to be broken: it starts an one-shot timer with callback that reschedules the timer each second and prints/updates messages. Simultaneously it schedules a deferred by 1 second function that cancels the timer and resumes the stream. This results in the timer executing at most once, and stream resume happening way too early. Fix this by switching to use repeating timer with first instance executing immediately. All message handling is moved into the timer callback. Once countdown is complete the same callback will stop and destroy the timer and resume the stream.
This commit is contained in:
@@ -936,44 +936,45 @@ function M._stream(opts)
|
|||||||
return handle_next_tool_use(pending_tools, pending_tool_use_messages, 1, {}, stop_opts.streaming_tool_use)
|
return handle_next_tool_use(pending_tools, pending_tool_use_messages, 1, {}, stop_opts.streaming_tool_use)
|
||||||
end
|
end
|
||||||
if stop_opts.reason == "rate_limit" then
|
if stop_opts.reason == "rate_limit" then
|
||||||
local msg_content = "*[Rate limit reached. Retrying in " .. stop_opts.retry_after .. " seconds ...]*"
|
local message = opts.on_messages_add
|
||||||
if opts.on_chunk then opts.on_chunk("\n" .. msg_content .. "\n") end
|
and History.Message:new({
|
||||||
local message
|
|
||||||
if opts.on_messages_add then
|
|
||||||
message = History.Message:new({
|
|
||||||
role = "assistant",
|
role = "assistant",
|
||||||
content = "\n\n" .. msg_content,
|
content = "", -- Actual content will be set below
|
||||||
}, {
|
}, {
|
||||||
just_for_display = true,
|
just_for_display = true,
|
||||||
})
|
})
|
||||||
opts.on_messages_add({ message })
|
|
||||||
end
|
|
||||||
local timer = vim.loop.new_timer()
|
local timer = vim.loop.new_timer()
|
||||||
if timer then
|
if timer then
|
||||||
local retry_after = stop_opts.retry_after
|
local retry_count = stop_opts.retry_after
|
||||||
|
Utils.info("Rate limit reached. Retrying in " .. retry_count .. " seconds", { title = "Avante" })
|
||||||
|
|
||||||
local function countdown()
|
local function countdown()
|
||||||
timer:start(
|
local msg_content = "*[Rate limit reached. Retrying in " .. retry_count .. " seconds ...]*"
|
||||||
1000,
|
if opts.on_chunk then
|
||||||
0,
|
-- Use ANSI escape codes to clear line and move cursor up only for subsequent updates
|
||||||
vim.schedule_wrap(function()
|
local prefix = ""
|
||||||
if retry_after > 0 then retry_after = retry_after - 1 end
|
if retry_count < stop_opts.retry_after then prefix = [[\033[1A\033[K]] end
|
||||||
local msg_content_ = "*[Rate limit reached. Retrying in " .. retry_after .. " seconds ...]*"
|
opts.on_chunk(prefix .. "\n" .. msg_content .. "\n")
|
||||||
if opts.on_chunk then opts.on_chunk([[\033[1A\033[K]] .. "\n" .. msg_content_ .. "\n") end
|
end
|
||||||
if opts.on_messages_add and message then
|
if opts.on_messages_add and message then
|
||||||
message.message.content = "\n\n" .. msg_content_
|
message.message.content = "\n\n" .. msg_content
|
||||||
opts.on_messages_add({ message })
|
opts.on_messages_add({ message })
|
||||||
end
|
end
|
||||||
countdown()
|
|
||||||
end)
|
if retry_count <= 0 then
|
||||||
)
|
timer:stop()
|
||||||
|
timer:close()
|
||||||
|
|
||||||
|
Utils.info("Restarting stream after rate limi pause")
|
||||||
|
M._stream(opts)
|
||||||
|
else
|
||||||
|
retry_count = retry_count - 1
|
||||||
|
end
|
||||||
end
|
end
|
||||||
countdown()
|
|
||||||
|
timer:start(0, 1000, vim.schedule_wrap(function() countdown() end))
|
||||||
end
|
end
|
||||||
Utils.info("Rate limit reached. Retrying in " .. stop_opts.retry_after .. " seconds", { title = "Avante" })
|
|
||||||
vim.defer_fn(function()
|
|
||||||
if timer then timer:stop() end
|
|
||||||
M._stream(opts)
|
|
||||||
end, stop_opts.retry_after * 1000)
|
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
return opts.on_stop(stop_opts)
|
return opts.on_stop(stop_opts)
|
||||||
|
|||||||
Reference in New Issue
Block a user