feat: streaming json parser (#1883)
This commit is contained in:
@@ -585,6 +585,7 @@ function M._stream(opts)
|
||||
|
||||
---@type AvanteHandlerOptions
|
||||
local handler_opts = {
|
||||
on_partial_tool_use = opts.on_partial_tool_use,
|
||||
on_start = opts.on_start,
|
||||
on_chunk = opts.on_chunk,
|
||||
on_stop = function(stop_opts)
|
||||
@@ -779,9 +780,9 @@ function M.stream(opts)
|
||||
local is_completed = false
|
||||
if opts.on_tool_log ~= nil then
|
||||
local original_on_tool_log = opts.on_tool_log
|
||||
opts.on_tool_log = vim.schedule_wrap(function(tool_name, log)
|
||||
opts.on_tool_log = vim.schedule_wrap(function(...)
|
||||
if not original_on_tool_log then return end
|
||||
return original_on_tool_log(tool_name, log)
|
||||
return original_on_tool_log(...)
|
||||
end)
|
||||
end
|
||||
if opts.on_chunk ~= nil then
|
||||
@@ -801,6 +802,13 @@ function M.stream(opts)
|
||||
return original_on_stop(stop_opts)
|
||||
end)
|
||||
end
|
||||
if opts.on_partial_tool_use ~= nil then
|
||||
local original_on_partial_tool_use = opts.on_partial_tool_use
|
||||
opts.on_partial_tool_use = vim.schedule_wrap(function(tool_use)
|
||||
if is_completed then return end
|
||||
return original_on_partial_tool_use(tool_use)
|
||||
end)
|
||||
end
|
||||
|
||||
local valid_dual_boost_modes = {
|
||||
planning = true,
|
||||
|
||||
@@ -90,7 +90,7 @@ When you're done, provide a clear and concise summary of what you found.]]):gsub
|
||||
ask = true,
|
||||
code_lang = "unknown",
|
||||
provider = Providers[Config.provider],
|
||||
on_tool_log = function(tool_name, log)
|
||||
on_tool_log = function(tool_id, tool_name, log, state)
|
||||
if on_log then on_log(string.format("[%s] %s", tool_name, log)) end
|
||||
end,
|
||||
session_ctx = session_ctx,
|
||||
|
||||
@@ -1101,7 +1101,7 @@ M._tools = {
|
||||
|
||||
---@param tools AvanteLLMTool[]
|
||||
---@param tool_use AvanteLLMToolUse
|
||||
---@param on_log? fun(tool_name: string, log: string): nil
|
||||
---@param on_log? fun(tool_id: string, tool_name: string, log: string, state: AvanteLLMToolUseState): nil
|
||||
---@param on_complete? fun(result: string | nil, error: string | nil): nil
|
||||
---@param session_ctx? table
|
||||
---@return string | nil result
|
||||
@@ -1131,7 +1131,7 @@ function M.process_tool_use(tools, tool_use, on_log, on_complete, session_ctx)
|
||||
local ok, input_json = pcall(vim.json.decode, tool_use.input_json)
|
||||
if not ok then return nil, "Failed to decode tool input json: " .. vim.inspect(input_json) end
|
||||
if not func then return nil, "Tool not found: " .. tool_use.name end
|
||||
if on_log then on_log(tool_use.name, "running tool") end
|
||||
if on_log then on_log(tool_use.id, tool_use.name, "running tool", "running") end
|
||||
|
||||
-- Set up a timer to periodically check for cancellation
|
||||
local cancel_timer
|
||||
@@ -1166,15 +1166,14 @@ function M.process_tool_use(tools, tool_use, on_log, on_complete, session_ctx)
|
||||
|
||||
-- Check for cancellation one more time before processing result
|
||||
if Helpers.is_cancelled then
|
||||
if on_log then on_log(tool_use.name, "cancelled during result handling") end
|
||||
if on_log then on_log(tool_use.id, tool_use.name, "cancelled during result handling", "failed") end
|
||||
return nil, Helpers.CANCEL_TOKEN
|
||||
end
|
||||
|
||||
if on_log then on_log(tool_use.name, "tool finished") end
|
||||
-- Utils.debug("result", result)
|
||||
-- Utils.debug("error", error)
|
||||
if err ~= nil then
|
||||
if on_log then on_log(tool_use.name, "Error: " .. err) end
|
||||
if on_log then on_log(tool_use.id, tool_use.name, "Error: " .. err, "failed") end
|
||||
else
|
||||
if on_log then on_log(tool_use.id, tool_use.name, "tool finished", "succeeded") end
|
||||
end
|
||||
local result_str ---@type string?
|
||||
if type(result) == "string" then
|
||||
@@ -1188,7 +1187,7 @@ function M.process_tool_use(tools, tool_use, on_log, on_complete, session_ctx)
|
||||
local result, err = func(input_json, function(log)
|
||||
-- Check for cancellation during logging
|
||||
if Helpers.is_cancelled then return end
|
||||
if on_log then on_log(tool_use.name, log) end
|
||||
if on_log then on_log(tool_use.id, tool_use.name, log, "running") end
|
||||
end, function(result, err)
|
||||
-- Check for cancellation before completing
|
||||
if Helpers.is_cancelled then
|
||||
|
||||
@@ -2,6 +2,7 @@ local Utils = require("avante.utils")
|
||||
local Clipboard = require("avante.clipboard")
|
||||
local P = require("avante.providers")
|
||||
local Config = require("avante.config")
|
||||
local StreamingJsonParser = require("avante.utils.streaming_json_parser")
|
||||
|
||||
---@class AvanteProviderFunctor
|
||||
local M = {}
|
||||
@@ -226,6 +227,14 @@ function M:parse_response(ctx, data_stream, event_state, opts)
|
||||
content_block.stoppped = false
|
||||
ctx.content_blocks[jsn.index + 1] = content_block
|
||||
if content_block.type == "thinking" then opts.on_chunk("<think>\n") end
|
||||
if content_block.type == "tool_use" and opts.on_partial_tool_use then
|
||||
opts.on_partial_tool_use({
|
||||
name = content_block.name,
|
||||
id = content_block.id,
|
||||
partial_json = {},
|
||||
state = "generating",
|
||||
})
|
||||
end
|
||||
elseif event_state == "content_block_delta" then
|
||||
local ok, jsn = pcall(vim.json.decode, data_stream)
|
||||
if not ok then return end
|
||||
@@ -233,6 +242,16 @@ function M:parse_response(ctx, data_stream, event_state, opts)
|
||||
if jsn.delta.type == "input_json_delta" then
|
||||
if not content_block.input_json then content_block.input_json = "" end
|
||||
content_block.input_json = content_block.input_json .. jsn.delta.partial_json
|
||||
if opts.on_partial_tool_use then
|
||||
local streaming_json_parser = StreamingJsonParser:new()
|
||||
local partial_json = streaming_json_parser:parse(content_block.input_json)
|
||||
opts.on_partial_tool_use({
|
||||
name = content_block.name,
|
||||
id = content_block.id,
|
||||
partial_json = partial_json or {},
|
||||
state = "generating",
|
||||
})
|
||||
end
|
||||
return
|
||||
elseif jsn.delta.type == "thinking_delta" then
|
||||
content_block.thinking = content_block.thinking .. jsn.delta.thinking
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
local Utils = require("avante.utils")
|
||||
local Config = require("avante.config")
|
||||
local Clipboard = require("avante.clipboard")
|
||||
local P = require("avante.providers")
|
||||
local Providers = require("avante.providers")
|
||||
local StreamingJsonParser = require("avante.utils.streaming_json_parser")
|
||||
|
||||
---@class AvanteProviderFunctor
|
||||
local M = {}
|
||||
@@ -73,7 +74,7 @@ end
|
||||
|
||||
function M:parse_messages(opts)
|
||||
local messages = {}
|
||||
local provider_conf, _ = P.parse_config(self)
|
||||
local provider_conf, _ = Providers.parse_config(self)
|
||||
|
||||
if self.is_reasoning_model(provider_conf.model) then
|
||||
table.insert(messages, { role = "developer", content = opts.system_prompt })
|
||||
@@ -224,18 +225,37 @@ function M:parse_response(ctx, data_stream, _, opts)
|
||||
ctx.last_think_content = choice.delta.reasoning
|
||||
opts.on_chunk(choice.delta.reasoning)
|
||||
elseif choice.delta.tool_calls and choice.delta.tool_calls ~= vim.NIL then
|
||||
local tool_call = choice.delta.tool_calls[1]
|
||||
if not ctx.tool_use_list then ctx.tool_use_list = {} end
|
||||
if not ctx.tool_use_list[tool_call.index + 1] then
|
||||
local tool_use = {
|
||||
name = tool_call["function"].name,
|
||||
id = tool_call.id,
|
||||
input_json = "",
|
||||
}
|
||||
ctx.tool_use_list[tool_call.index + 1] = tool_use
|
||||
else
|
||||
local tool_use = ctx.tool_use_list[tool_call.index + 1]
|
||||
tool_use.input_json = tool_use.input_json .. tool_call["function"].arguments
|
||||
for _, tool_call in ipairs(choice.delta.tool_calls) do
|
||||
if not ctx.tool_use_list then ctx.tool_use_list = {} end
|
||||
if not ctx.tool_use_list[tool_call.index + 1] then
|
||||
local tool_use = {
|
||||
name = tool_call["function"].name,
|
||||
id = tool_call.id,
|
||||
input_json = "",
|
||||
}
|
||||
ctx.tool_use_list[tool_call.index + 1] = tool_use
|
||||
if opts.on_partial_tool_use then
|
||||
opts.on_partial_tool_use({
|
||||
name = tool_call["function"].name,
|
||||
id = tool_call.id,
|
||||
partial_json = {},
|
||||
state = "generating",
|
||||
})
|
||||
end
|
||||
else
|
||||
local tool_use = ctx.tool_use_list[tool_call.index + 1]
|
||||
tool_use.input_json = tool_use.input_json .. tool_call["function"].arguments
|
||||
if opts.on_partial_tool_use then
|
||||
local parser = StreamingJsonParser:new()
|
||||
local partial_json = parser:parse(tool_use.input_json)
|
||||
opts.on_partial_tool_use({
|
||||
name = tool_call["function"].name,
|
||||
id = tool_call.id,
|
||||
partial_json = partial_json or {},
|
||||
state = "generating",
|
||||
})
|
||||
end
|
||||
end
|
||||
end
|
||||
elseif choice.delta.content then
|
||||
if
|
||||
@@ -271,7 +291,7 @@ function M:parse_response_without_stream(data, _, opts)
|
||||
end
|
||||
|
||||
function M:parse_curl_args(prompt_opts)
|
||||
local provider_conf, request_body = P.parse_config(self)
|
||||
local provider_conf, request_body = Providers.parse_config(self)
|
||||
local disable_tools = provider_conf.disable_tools or false
|
||||
|
||||
local headers = {
|
||||
@@ -284,7 +304,7 @@ function M:parse_curl_args(prompt_opts)
|
||||
end
|
||||
end
|
||||
|
||||
if P.env.require_api_key(provider_conf) then
|
||||
if Providers.env.require_api_key(provider_conf) then
|
||||
local api_key = self.parse_api_key()
|
||||
if api_key == nil then
|
||||
error(Config.provider .. " API key is not set, please set it in your environment variable or config file")
|
||||
|
||||
@@ -27,6 +27,7 @@ local RESP_SEPARATOR = "-------"
|
||||
|
||||
---@class avante.Sidebar
|
||||
local Sidebar = {}
|
||||
Sidebar.__index = Sidebar
|
||||
|
||||
---@class avante.CodeState
|
||||
---@field winid integer
|
||||
@@ -64,7 +65,7 @@ function Sidebar:new(id)
|
||||
file_selector = FileSelector:new(id),
|
||||
is_generating = false,
|
||||
chat_history = nil,
|
||||
}, { __index = self })
|
||||
}, Sidebar)
|
||||
end
|
||||
|
||||
function Sidebar:delete_autocmds()
|
||||
@@ -2660,7 +2661,17 @@ function Sidebar:create_input_container(opts)
|
||||
displayed_response = cur_displayed_response
|
||||
end
|
||||
|
||||
local function on_tool_log(tool_name, log)
|
||||
local tool_use_log_history = {}
|
||||
|
||||
---@param tool_id string
|
||||
---@param tool_name string
|
||||
---@param log string
|
||||
---@param state AvanteLLMToolUseState
|
||||
local function on_tool_log(tool_id, tool_name, log, state)
|
||||
if state == "generating" then
|
||||
if tool_use_log_history[tool_id] then return end
|
||||
tool_use_log_history[tool_id] = true
|
||||
end
|
||||
if transformed_response:sub(-1) ~= "\n" then transformed_response = transformed_response .. "\n" end
|
||||
transformed_response = transformed_response .. "[" .. tool_name .. "]: " .. log .. "\n"
|
||||
local breakline = ""
|
||||
@@ -2671,6 +2682,13 @@ function Sidebar:create_input_container(opts)
|
||||
})
|
||||
end
|
||||
|
||||
---@param tool_use AvantePartialLLMToolUse
|
||||
local function on_partial_tool_use(tool_use)
|
||||
if not tool_use.name then return end
|
||||
if not tool_use.id then return end
|
||||
on_tool_log(tool_use.id, tool_use.name, "calling...", tool_use.state)
|
||||
end
|
||||
|
||||
---@type AvanteLLMStopCallback
|
||||
local function on_stop(stop_opts)
|
||||
self.is_generating = false
|
||||
@@ -2738,6 +2756,7 @@ function Sidebar:create_input_container(opts)
|
||||
on_chunk = on_chunk,
|
||||
on_stop = on_stop,
|
||||
on_tool_log = on_tool_log,
|
||||
on_partial_tool_use = on_partial_tool_use,
|
||||
session_ctx = {},
|
||||
})
|
||||
|
||||
|
||||
@@ -75,6 +75,7 @@ vim.g.avante_login = vim.g.avante_login
|
||||
---@field on_start AvanteLLMStartCallback
|
||||
---@field on_chunk AvanteLLMChunkCallback
|
||||
---@field on_stop AvanteLLMStopCallback
|
||||
---@field on_partial_tool_use? fun(tool_use: AvantePartialLLMToolUse): nil
|
||||
---
|
||||
---@alias AvanteLLMMessageContentItem string | { type: "text", text: string } | { type: "image", source: { type: "base64", media_type: string, data: string } } | { type: "tool_use", name: string, id: string, input: any } | { type: "tool_result", tool_use_id: string, content: string, is_error?: boolean } | { type: "thinking", thinking: string, signature: string } | { type: "redacted_thinking", data: string }
|
||||
---
|
||||
@@ -234,6 +235,12 @@ vim.g.avante_login = vim.g.avante_login
|
||||
---@class AvanteLLMRedactedThinkingBlock
|
||||
---@field data string
|
||||
---
|
||||
---@class AvantePartialLLMToolUse
|
||||
---@field name string
|
||||
---@field id string
|
||||
---@field partial_json table
|
||||
---@field state "generating" | "generated"
|
||||
---
|
||||
---@class AvanteLLMToolUse
|
||||
---@field name string
|
||||
---@field id string
|
||||
@@ -336,12 +343,15 @@ vim.g.avante_login = vim.g.avante_login
|
||||
---
|
||||
---@alias AvanteLLMMemorySummarizeCallback fun(dropped_history_messages: AvanteLLMMessage[]): nil
|
||||
---
|
||||
---@alias AvanteLLMToolUseState "generating" | "generated" | "running" | "succeeded" | "failed"
|
||||
---
|
||||
---@class AvanteLLMStreamOptions: AvanteGeneratePromptsOptions
|
||||
---@field on_start AvanteLLMStartCallback
|
||||
---@field on_chunk AvanteLLMChunkCallback
|
||||
---@field on_stop AvanteLLMStopCallback
|
||||
---@field on_memory_summarize? AvanteLLMMemorySummarizeCallback
|
||||
---@field on_tool_log? function(tool_name: string, log: string): nil
|
||||
---@field on_tool_log? fun(tool_id: string, tool_name: string, log: string, state: AvanteLLMToolUseState): nil
|
||||
---@field on_partial_tool_use? fun(tool_use: AvantePartialLLMToolUse): nil
|
||||
---
|
||||
---@alias AvanteLLMToolFunc<T> fun(
|
||||
--- input: T,
|
||||
|
||||
314
lua/avante/utils/streaming_json_parser.lua
Normal file
314
lua/avante/utils/streaming_json_parser.lua
Normal file
@@ -0,0 +1,314 @@
|
||||
-- StreamingJSONParser: 一个能够处理不完整 JSON 流的解析器
|
||||
local StreamingJSONParser = {}
|
||||
StreamingJSONParser.__index = StreamingJSONParser
|
||||
|
||||
-- Create a new StreamingJSONParser instance
|
||||
function StreamingJSONParser:new()
|
||||
local obj = setmetatable({}, StreamingJSONParser)
|
||||
obj:reset()
|
||||
return obj
|
||||
end
|
||||
|
||||
-- Reset the parser state
|
||||
function StreamingJSONParser:reset()
|
||||
self.buffer = ""
|
||||
self.state = {
|
||||
inString = false,
|
||||
escaping = false,
|
||||
stack = {},
|
||||
result = nil,
|
||||
currentKey = nil,
|
||||
current = nil,
|
||||
parentKeys = {},
|
||||
stringBuffer = "",
|
||||
}
|
||||
end
|
||||
|
||||
-- Get the current partial result
|
||||
function StreamingJSONParser:getCurrentPartial() return self.state.result end
|
||||
|
||||
-- Add a value to the current object or array
|
||||
function StreamingJSONParser:addValue(value)
|
||||
local top = self.state.stack[#self.state.stack]
|
||||
top.expectingValue = false
|
||||
|
||||
if top.type == "object" then
|
||||
if self.state.current == nil then
|
||||
self.state.current = {}
|
||||
if self.state.result == nil then self.state.result = self.state.current end
|
||||
end
|
||||
self.state.current[self.state.currentKey] = value
|
||||
top.expectingComma = true
|
||||
elseif top.type == "array" then
|
||||
if self.state.current == nil then
|
||||
self.state.current = {}
|
||||
if self.state.result == nil then self.state.result = self.state.current end
|
||||
end
|
||||
table.insert(self.state.current, value)
|
||||
top.expectingComma = true
|
||||
end
|
||||
end
|
||||
|
||||
-- Parse literal values (true, false, null)
|
||||
local function parseLiteral(buffer)
|
||||
if buffer == "true" then
|
||||
return true
|
||||
elseif buffer == "false" then
|
||||
return false
|
||||
elseif buffer == "null" then
|
||||
return nil
|
||||
else
|
||||
-- Try to parse as number
|
||||
local num = tonumber(buffer)
|
||||
if num then return num end
|
||||
end
|
||||
return buffer
|
||||
end
|
||||
|
||||
-- Parse a chunk of JSON data
|
||||
function StreamingJSONParser:parse(chunk)
|
||||
self.buffer = self.buffer .. chunk
|
||||
local i = 1
|
||||
local len = #self.buffer
|
||||
|
||||
while i <= len do
|
||||
local char = self.buffer:sub(i, i)
|
||||
|
||||
-- Handle strings specially (they can contain JSON control characters)
|
||||
if self.state.inString then
|
||||
if self.state.escaping then
|
||||
self.state.stringBuffer = self.state.stringBuffer .. char
|
||||
self.state.escaping = false
|
||||
elseif char == "\\" then
|
||||
self.state.stringBuffer = self.state.stringBuffer .. char
|
||||
self.state.escaping = true
|
||||
elseif char == '"' then
|
||||
-- End of string
|
||||
self.state.inString = false
|
||||
|
||||
-- If expecting a key in an object
|
||||
if #self.state.stack > 0 and self.state.stack[#self.state.stack].expectingKey then
|
||||
self.state.currentKey = self.state.stringBuffer
|
||||
self.state.stack[#self.state.stack].expectingKey = false
|
||||
self.state.stack[#self.state.stack].expectingColon = true
|
||||
-- If expecting a value
|
||||
elseif #self.state.stack > 0 and self.state.stack[#self.state.stack].expectingValue then
|
||||
self:addValue(self.state.stringBuffer)
|
||||
end
|
||||
self.state.stringBuffer = ""
|
||||
else
|
||||
self.state.stringBuffer = self.state.stringBuffer .. char
|
||||
|
||||
-- For partial string handling, update the current object with the partial string value
|
||||
if #self.state.stack > 0 and self.state.stack[#self.state.stack].expectingValue and i == len then
|
||||
-- If we're at the end of the buffer and still in a string, store the partial value
|
||||
if self.state.current and self.state.currentKey then
|
||||
self.state.current[self.state.currentKey] = self.state.stringBuffer
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
i = i + 1
|
||||
goto continue
|
||||
end
|
||||
|
||||
-- Skip whitespace when not in a string
|
||||
if string.match(char, "%s") then
|
||||
i = i + 1
|
||||
goto continue
|
||||
end
|
||||
|
||||
-- Start of an object
|
||||
if char == "{" then
|
||||
local newObject = {
|
||||
type = "object",
|
||||
expectingKey = true,
|
||||
expectingComma = false,
|
||||
expectingValue = false,
|
||||
expectingColon = false,
|
||||
}
|
||||
table.insert(self.state.stack, newObject)
|
||||
|
||||
-- If we're already in an object/array, save the current state
|
||||
if self.state.current then
|
||||
table.insert(self.state.parentKeys, { current = self.state.current, key = self.state.currentKey })
|
||||
end
|
||||
|
||||
-- Create a new current object
|
||||
self.state.current = {}
|
||||
|
||||
-- If this is the root, set result directly
|
||||
if self.state.result == nil then
|
||||
self.state.result = self.state.current
|
||||
elseif #self.state.parentKeys > 0 then
|
||||
-- Set as child of the parent
|
||||
local parent = self.state.parentKeys[#self.state.parentKeys].current
|
||||
local key = self.state.parentKeys[#self.state.parentKeys].key
|
||||
|
||||
if self.state.stack[#self.state.stack - 1].type == "array" then
|
||||
table.insert(parent, self.state.current)
|
||||
else
|
||||
parent[key] = self.state.current
|
||||
end
|
||||
end
|
||||
|
||||
i = i + 1
|
||||
goto continue
|
||||
end
|
||||
|
||||
-- End of an object
|
||||
if char == "}" then
|
||||
table.remove(self.state.stack)
|
||||
|
||||
-- Move back to parent if there is one
|
||||
if #self.state.parentKeys > 0 then
|
||||
local parentInfo = table.remove(self.state.parentKeys)
|
||||
self.state.current = parentInfo.current
|
||||
self.state.currentKey = parentInfo.key
|
||||
end
|
||||
|
||||
-- If this was the last item on stack, we're complete
|
||||
if #self.state.stack == 0 then
|
||||
i = i + 1
|
||||
self.buffer = self.buffer:sub(i)
|
||||
return self.state.result, true
|
||||
else
|
||||
-- Update parent's expectations
|
||||
self.state.stack[#self.state.stack].expectingComma = true
|
||||
self.state.stack[#self.state.stack].expectingValue = false
|
||||
end
|
||||
|
||||
i = i + 1
|
||||
goto continue
|
||||
end
|
||||
|
||||
-- Start of an array
|
||||
if char == "[" then
|
||||
local newArray = { type = "array", expectingValue = true, expectingComma = false }
|
||||
table.insert(self.state.stack, newArray)
|
||||
|
||||
-- If we're already in an object/array, save the current state
|
||||
if self.state.current then
|
||||
table.insert(self.state.parentKeys, { current = self.state.current, key = self.state.currentKey })
|
||||
end
|
||||
|
||||
-- Create a new current array
|
||||
self.state.current = {}
|
||||
|
||||
-- If this is the root, set result directly
|
||||
if self.state.result == nil then
|
||||
self.state.result = self.state.current
|
||||
elseif #self.state.parentKeys > 0 then
|
||||
-- Set as child of the parent
|
||||
local parent = self.state.parentKeys[#self.state.parentKeys].current
|
||||
local key = self.state.parentKeys[#self.state.parentKeys].key
|
||||
|
||||
if self.state.stack[#self.state.stack - 1].type == "array" then
|
||||
table.insert(parent, self.state.current)
|
||||
else
|
||||
parent[key] = self.state.current
|
||||
end
|
||||
end
|
||||
|
||||
i = i + 1
|
||||
goto continue
|
||||
end
|
||||
|
||||
-- End of an array
|
||||
if char == "]" then
|
||||
table.remove(self.state.stack)
|
||||
|
||||
-- Move back to parent if there is one
|
||||
if #self.state.parentKeys > 0 then
|
||||
local parentInfo = table.remove(self.state.parentKeys)
|
||||
self.state.current = parentInfo.current
|
||||
self.state.currentKey = parentInfo.key
|
||||
end
|
||||
|
||||
-- If this was the last item on stack, we're complete
|
||||
if #self.state.stack == 0 then
|
||||
i = i + 1
|
||||
self.buffer = self.buffer:sub(i)
|
||||
return self.state.result, true
|
||||
else
|
||||
-- Update parent's expectations
|
||||
self.state.stack[#self.state.stack].expectingComma = true
|
||||
self.state.stack[#self.state.stack].expectingValue = false
|
||||
end
|
||||
|
||||
i = i + 1
|
||||
goto continue
|
||||
end
|
||||
|
||||
-- Colon between key and value
|
||||
if char == ":" then
|
||||
if #self.state.stack > 0 and self.state.stack[#self.state.stack].expectingColon then
|
||||
self.state.stack[#self.state.stack].expectingColon = false
|
||||
self.state.stack[#self.state.stack].expectingValue = true
|
||||
i = i + 1
|
||||
goto continue
|
||||
end
|
||||
end
|
||||
|
||||
-- Comma between items
|
||||
if char == "," then
|
||||
if #self.state.stack > 0 and self.state.stack[#self.state.stack].expectingComma then
|
||||
self.state.stack[#self.state.stack].expectingComma = false
|
||||
|
||||
if self.state.stack[#self.state.stack].type == "object" then
|
||||
self.state.stack[#self.state.stack].expectingKey = true
|
||||
else -- array
|
||||
self.state.stack[#self.state.stack].expectingValue = true
|
||||
end
|
||||
|
||||
i = i + 1
|
||||
goto continue
|
||||
end
|
||||
end
|
||||
|
||||
-- Start of a key or string value
|
||||
if char == '"' then
|
||||
self.state.inString = true
|
||||
self.state.stringBuffer = ""
|
||||
i = i + 1
|
||||
goto continue
|
||||
end
|
||||
|
||||
-- Start of a non-string value (number, boolean, null)
|
||||
if #self.state.stack > 0 and self.state.stack[#self.state.stack].expectingValue then
|
||||
local valueBuffer = ""
|
||||
local j = i
|
||||
|
||||
-- Collect until we hit a comma, closing bracket, or brace
|
||||
while j <= len do
|
||||
local currentChar = self.buffer:sub(j, j)
|
||||
if currentChar:match("[%s,}%]]") then break end
|
||||
valueBuffer = valueBuffer .. currentChar
|
||||
j = j + 1
|
||||
end
|
||||
|
||||
-- Only process if we have a complete value
|
||||
if j <= len and self.buffer:sub(j, j):match("[,}%]]") then
|
||||
local value = parseLiteral(valueBuffer)
|
||||
self:addValue(value)
|
||||
i = j
|
||||
goto continue
|
||||
end
|
||||
|
||||
-- If we reached the end but didn't hit a delimiter, wait for more input
|
||||
break
|
||||
end
|
||||
|
||||
i = i + 1
|
||||
|
||||
::continue::
|
||||
end
|
||||
|
||||
-- Update the buffer to remove processed characters
|
||||
self.buffer = self.buffer:sub(i)
|
||||
|
||||
-- Return partial result if available, but indicate parsing is incomplete
|
||||
return self.state.result, false
|
||||
end
|
||||
|
||||
return StreamingJSONParser
|
||||
216
tests/utils/streaming_json_parser_spec.lua
Normal file
216
tests/utils/streaming_json_parser_spec.lua
Normal file
@@ -0,0 +1,216 @@
|
||||
local StreamingJSONParser = require("avante.utils.streaming_json_parser")
|
||||
|
||||
describe("StreamingJSONParser", function()
|
||||
local parser
|
||||
|
||||
before_each(function() parser = StreamingJSONParser:new() end)
|
||||
|
||||
describe("initialization", function()
|
||||
it("should create a new parser with empty state", function()
|
||||
assert.is_not_nil(parser)
|
||||
assert.equals("", parser.buffer)
|
||||
assert.is_not_nil(parser.state)
|
||||
assert.is_false(parser.state.inString)
|
||||
assert.is_false(parser.state.escaping)
|
||||
assert.is_table(parser.state.stack)
|
||||
assert.equals(0, #parser.state.stack)
|
||||
assert.is_nil(parser.state.result)
|
||||
assert.is_nil(parser.state.currentKey)
|
||||
assert.is_nil(parser.state.current)
|
||||
assert.is_table(parser.state.parentKeys)
|
||||
end)
|
||||
end)
|
||||
|
||||
describe("parse", function()
|
||||
it("should parse a complete simple JSON object", function()
|
||||
local result, complete = parser:parse('{"key": "value"}')
|
||||
assert.is_true(complete)
|
||||
assert.is_table(result)
|
||||
assert.equals("value", result.key)
|
||||
end)
|
||||
|
||||
it("should parse a complete simple JSON array", function()
|
||||
local result, complete = parser:parse("[1, 2, 3]")
|
||||
assert.is_true(complete)
|
||||
assert.is_table(result)
|
||||
assert.equals(1, result[1])
|
||||
assert.equals(2, result[2])
|
||||
assert.equals(3, result[3])
|
||||
end)
|
||||
|
||||
it("should handle streaming JSON in multiple chunks", function()
|
||||
local result1, complete1 = parser:parse('{"name": "John')
|
||||
assert.is_false(complete1)
|
||||
assert.is_table(result1)
|
||||
assert.equals("John", result1.name)
|
||||
|
||||
local result2, complete2 = parser:parse('", "age": 30}')
|
||||
assert.is_true(complete2)
|
||||
assert.is_table(result2)
|
||||
assert.equals("John", result2.name)
|
||||
assert.equals(30, result2.age)
|
||||
end)
|
||||
|
||||
it("should handle streaming string field", function()
|
||||
local result1, complete1 = parser:parse('{"name": {"first": "John')
|
||||
assert.is_false(complete1)
|
||||
assert.is_table(result1)
|
||||
assert.equals("John", result1.name.first)
|
||||
end)
|
||||
|
||||
it("should parse nested objects", function()
|
||||
local json = [[{
|
||||
"person": {
|
||||
"name": "John",
|
||||
"age": 30,
|
||||
"address": {
|
||||
"city": "New York",
|
||||
"zip": "10001"
|
||||
}
|
||||
}
|
||||
}]]
|
||||
|
||||
local result, complete = parser:parse(json)
|
||||
assert.is_true(complete)
|
||||
assert.is_table(result)
|
||||
assert.is_table(result.person)
|
||||
assert.equals("John", result.person.name)
|
||||
assert.equals(30, result.person.age)
|
||||
assert.is_table(result.person.address)
|
||||
assert.equals("New York", result.person.address.city)
|
||||
assert.equals("10001", result.person.address.zip)
|
||||
end)
|
||||
|
||||
it("should parse nested arrays", function()
|
||||
local json = [[{
|
||||
"matrix": [
|
||||
[1, 2, 3],
|
||||
[4, 5, 6],
|
||||
[7, 8, 9]
|
||||
]
|
||||
}]]
|
||||
|
||||
local result, complete = parser:parse(json)
|
||||
assert.is_true(complete)
|
||||
assert.is_table(result)
|
||||
assert.is_table(result.matrix)
|
||||
assert.equals(3, #result.matrix)
|
||||
assert.equals(1, result.matrix[1][1])
|
||||
assert.equals(5, result.matrix[2][2])
|
||||
assert.equals(9, result.matrix[3][3])
|
||||
end)
|
||||
|
||||
it("should handle boolean values", function()
|
||||
local result, complete = parser:parse('{"success": true, "failed": false}')
|
||||
assert.is_true(complete)
|
||||
assert.is_table(result)
|
||||
assert.is_true(result.success)
|
||||
assert.is_false(result.failed)
|
||||
end)
|
||||
|
||||
it("should handle null values", function()
|
||||
local result, complete = parser:parse('{"value": null}')
|
||||
assert.is_true(complete)
|
||||
assert.is_table(result)
|
||||
assert.is_nil(result.value)
|
||||
end)
|
||||
|
||||
it("should handle escaped characters in strings", function()
|
||||
local result, complete = parser:parse('{"text": "line1\\nline2\\t\\"quoted\\""}')
|
||||
assert.is_true(complete)
|
||||
assert.is_table(result)
|
||||
assert.equals('line1\\nline2\\t\\"quoted\\"', result.text)
|
||||
end)
|
||||
|
||||
it("should handle numbers correctly", function()
|
||||
local result, complete = parser:parse('{"integer": 42, "float": 3.14, "negative": -10, "exponent": 1.2e3}')
|
||||
assert.is_true(complete)
|
||||
assert.is_table(result)
|
||||
assert.equals(42, result.integer)
|
||||
assert.equals(3.14, result.float)
|
||||
assert.equals(-10, result.negative)
|
||||
assert.equals(1200, result.exponent)
|
||||
end)
|
||||
|
||||
it("should handle streaming complex JSON", function()
|
||||
local chunks = {
|
||||
'{"data": [{"id": 1, "info": {"name":',
|
||||
' "Product A", "active": true}}, {"id": 2, ',
|
||||
'"info": {"name": "Product B", "active": false',
|
||||
'}}], "total": 2}',
|
||||
}
|
||||
|
||||
local complete = false
|
||||
local result
|
||||
|
||||
for _, chunk in ipairs(chunks) do
|
||||
result, complete = parser:parse(chunk)
|
||||
end
|
||||
|
||||
assert.is_true(complete)
|
||||
assert.is_table(result)
|
||||
assert.equals(2, #result.data)
|
||||
assert.equals(1, result.data[1].id)
|
||||
assert.equals("Product A", result.data[1].info.name)
|
||||
assert.is_true(result.data[1].info.active)
|
||||
assert.equals(2, result.data[2].id)
|
||||
assert.equals("Product B", result.data[2].info.name)
|
||||
assert.is_false(result.data[2].info.active)
|
||||
assert.equals(2, result.total)
|
||||
end)
|
||||
|
||||
it("should reset the parser state correctly", function()
|
||||
parser:parse('{"key": "value"}')
|
||||
parser:reset()
|
||||
|
||||
assert.equals("", parser.buffer)
|
||||
assert.is_false(parser.state.inString)
|
||||
assert.is_false(parser.state.escaping)
|
||||
assert.is_table(parser.state.stack)
|
||||
assert.equals(0, #parser.state.stack)
|
||||
assert.is_nil(parser.state.result)
|
||||
assert.is_nil(parser.state.currentKey)
|
||||
assert.is_nil(parser.state.current)
|
||||
assert.is_table(parser.state.parentKeys)
|
||||
end)
|
||||
|
||||
it("should return partial results for incomplete JSON", function()
|
||||
parser:reset()
|
||||
local result, complete = parser:parse('{"stream": [1, 2,')
|
||||
assert.is_false(complete)
|
||||
assert.is_table(result)
|
||||
assert.is_table(result.stream)
|
||||
assert.equals(1, result.stream[1])
|
||||
assert.equals(2, result.stream[2])
|
||||
|
||||
-- We need exactly one item in the stack (the array)
|
||||
assert.equals(2, #parser.state.stack)
|
||||
end)
|
||||
|
||||
it("should handle whitespace correctly", function()
|
||||
parser:reset()
|
||||
local result, complete = parser:parse('{"key1": "value1", "key2": 42}')
|
||||
assert.is_true(complete)
|
||||
assert.is_table(result)
|
||||
assert.equals("value1", result.key1)
|
||||
assert.equals(42, result.key2)
|
||||
end)
|
||||
|
||||
it("should provide access to partial results during streaming", function()
|
||||
parser:parse('{"name": "John", "items": [')
|
||||
|
||||
local partial = parser:getCurrentPartial()
|
||||
assert.is_table(partial)
|
||||
assert.equals("John", partial.name)
|
||||
assert.is_table(partial.items)
|
||||
|
||||
parser:parse("1, 2]")
|
||||
local result, complete = parser:parse("}")
|
||||
|
||||
assert.is_true(complete)
|
||||
assert.equals("John", result.name)
|
||||
assert.equals(1, result.items[1])
|
||||
assert.equals(2, result.items[2])
|
||||
end)
|
||||
end)
|
||||
end)
|
||||
Reference in New Issue
Block a user