Adding more features
This commit is contained in:
@@ -14,6 +14,51 @@ local AUTH_URL = "https://api.github.com/copilot_internal/v2/token"
|
||||
---@field github_token table|nil
|
||||
M.state = nil
|
||||
|
||||
--- Track if we've already suggested Ollama fallback this session
|
||||
local ollama_fallback_suggested = false
|
||||
|
||||
--- Suggest switching to Ollama when rate limits are hit
|
||||
---@param error_msg string The error message that triggered this
|
||||
function M.suggest_ollama_fallback(error_msg)
|
||||
if ollama_fallback_suggested then
|
||||
return
|
||||
end
|
||||
|
||||
-- Check if Ollama is available
|
||||
local ollama_available = false
|
||||
vim.fn.jobstart({ "curl", "-s", "http://localhost:11434/api/tags" }, {
|
||||
on_exit = function(_, code)
|
||||
if code == 0 then
|
||||
ollama_available = true
|
||||
end
|
||||
|
||||
vim.schedule(function()
|
||||
if ollama_available then
|
||||
-- Switch to Ollama automatically
|
||||
local codetyper = require("codetyper")
|
||||
local config = codetyper.get_config()
|
||||
config.llm.provider = "ollama"
|
||||
|
||||
ollama_fallback_suggested = true
|
||||
utils.notify(
|
||||
"⚠️ Copilot rate limit reached. Switched to Ollama automatically.\n"
|
||||
.. "Original error: "
|
||||
.. error_msg:sub(1, 100),
|
||||
vim.log.levels.WARN
|
||||
)
|
||||
else
|
||||
utils.notify(
|
||||
"⚠️ Copilot rate limit reached. Ollama not available.\n"
|
||||
.. "Start Ollama with: ollama serve\n"
|
||||
.. "Or wait for Copilot limits to reset.",
|
||||
vim.log.levels.WARN
|
||||
)
|
||||
end
|
||||
end)
|
||||
end,
|
||||
})
|
||||
end
|
||||
|
||||
--- Get OAuth token from copilot.lua or copilot.vim config
|
||||
---@return string|nil OAuth token
|
||||
local function get_oauth_token()
|
||||
@@ -51,9 +96,16 @@ local function get_oauth_token()
|
||||
return nil
|
||||
end
|
||||
|
||||
--- Get model from config
|
||||
--- Get model from stored credentials or config
|
||||
---@return string Model name
|
||||
local function get_model()
|
||||
-- Priority: stored credentials > config
|
||||
local credentials = require("codetyper.credentials")
|
||||
local stored_model = credentials.get_model("copilot")
|
||||
if stored_model then
|
||||
return stored_model
|
||||
end
|
||||
|
||||
local codetyper = require("codetyper")
|
||||
local config = codetyper.get_config()
|
||||
return config.llm.copilot.model
|
||||
@@ -204,15 +256,37 @@ local function make_request(token, body, callback)
|
||||
local ok, response = pcall(vim.json.decode, response_text)
|
||||
|
||||
if not ok then
|
||||
-- Show the actual response text as the error (truncated if too long)
|
||||
local error_msg = response_text
|
||||
if #error_msg > 200 then
|
||||
error_msg = error_msg:sub(1, 200) .. "..."
|
||||
end
|
||||
|
||||
-- Clean up common patterns
|
||||
if response_text:match("<!DOCTYPE") or response_text:match("<html") then
|
||||
error_msg = "Copilot API returned HTML error page. Service may be unavailable."
|
||||
end
|
||||
|
||||
-- Check for rate limit and suggest Ollama fallback
|
||||
if response_text:match("limit") or response_text:match("Upgrade") or response_text:match("quota") then
|
||||
M.suggest_ollama_fallback(error_msg)
|
||||
end
|
||||
|
||||
vim.schedule(function()
|
||||
callback(nil, "Failed to parse Copilot response", nil)
|
||||
callback(nil, error_msg, nil)
|
||||
end)
|
||||
return
|
||||
end
|
||||
|
||||
if response.error then
|
||||
local error_msg = response.error.message or "Copilot API error"
|
||||
if response.error.code == "rate_limit_exceeded" or (error_msg:match("limit") and error_msg:match("plan")) then
|
||||
error_msg = "Copilot rate limit: " .. error_msg
|
||||
M.suggest_ollama_fallback(error_msg)
|
||||
end
|
||||
|
||||
vim.schedule(function()
|
||||
callback(nil, response.error.message or "Copilot API error", nil)
|
||||
callback(nil, error_msg, nil)
|
||||
end)
|
||||
return
|
||||
end
|
||||
@@ -220,6 +294,17 @@ local function make_request(token, body, callback)
|
||||
-- Extract usage info
|
||||
local usage = response.usage or {}
|
||||
|
||||
-- Record usage for cost tracking
|
||||
if usage.prompt_tokens or usage.completion_tokens then
|
||||
local cost = require("codetyper.cost")
|
||||
cost.record_usage(
|
||||
get_model(),
|
||||
usage.prompt_tokens or 0,
|
||||
usage.completion_tokens or 0,
|
||||
usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens or 0
|
||||
)
|
||||
end
|
||||
|
||||
if response.choices and response.choices[1] and response.choices[1].message then
|
||||
local code = llm.extract_code(response.choices[1].message.content)
|
||||
vim.schedule(function()
|
||||
@@ -362,20 +447,46 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
|
||||
-- Format messages for Copilot (OpenAI-compatible format)
|
||||
local copilot_messages = { { role = "system", content = system_prompt } }
|
||||
for _, msg in ipairs(messages) do
|
||||
if type(msg.content) == "string" then
|
||||
table.insert(copilot_messages, { role = msg.role, content = msg.content })
|
||||
elseif type(msg.content) == "table" then
|
||||
local text_parts = {}
|
||||
for _, part in ipairs(msg.content) do
|
||||
if part.type == "tool_result" then
|
||||
table.insert(text_parts, "[" .. (part.name or "tool") .. " result]: " .. (part.content or ""))
|
||||
elseif part.type == "text" then
|
||||
table.insert(text_parts, part.text or "")
|
||||
if msg.role == "user" then
|
||||
-- User messages - handle string or table content
|
||||
if type(msg.content) == "string" then
|
||||
table.insert(copilot_messages, { role = "user", content = msg.content })
|
||||
elseif type(msg.content) == "table" then
|
||||
-- Handle complex content (like tool results from user perspective)
|
||||
local text_parts = {}
|
||||
for _, part in ipairs(msg.content) do
|
||||
if part.type == "tool_result" then
|
||||
table.insert(text_parts, "[" .. (part.name or "tool") .. " result]: " .. (part.content or ""))
|
||||
elseif part.type == "text" then
|
||||
table.insert(text_parts, part.text or "")
|
||||
end
|
||||
end
|
||||
if #text_parts > 0 then
|
||||
table.insert(copilot_messages, { role = "user", content = table.concat(text_parts, "\n") })
|
||||
end
|
||||
end
|
||||
if #text_parts > 0 then
|
||||
table.insert(copilot_messages, { role = msg.role, content = table.concat(text_parts, "\n") })
|
||||
elseif msg.role == "assistant" then
|
||||
-- Assistant messages - must preserve tool_calls if present
|
||||
local assistant_msg = {
|
||||
role = "assistant",
|
||||
content = type(msg.content) == "string" and msg.content or nil,
|
||||
}
|
||||
-- Preserve tool_calls for the API
|
||||
if msg.tool_calls then
|
||||
assistant_msg.tool_calls = msg.tool_calls
|
||||
-- Ensure content is not nil when tool_calls present
|
||||
if assistant_msg.content == nil then
|
||||
assistant_msg.content = ""
|
||||
end
|
||||
end
|
||||
table.insert(copilot_messages, assistant_msg)
|
||||
elseif msg.role == "tool" then
|
||||
-- Tool result messages - must have tool_call_id
|
||||
table.insert(copilot_messages, {
|
||||
role = "tool",
|
||||
tool_call_id = msg.tool_call_id,
|
||||
content = type(msg.content) == "string" and msg.content or vim.json.encode(msg.content),
|
||||
})
|
||||
end
|
||||
end
|
||||
|
||||
@@ -396,6 +507,20 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
|
||||
logs.debug(string.format("Estimated prompt: ~%d tokens", prompt_estimate))
|
||||
logs.thinking("Sending to Copilot API...")
|
||||
|
||||
-- Log request to debug file
|
||||
local debug_log_path = vim.fn.expand("~/.local/codetyper-debug.log")
|
||||
local debug_f = io.open(debug_log_path, "a")
|
||||
if debug_f then
|
||||
debug_f:write(os.date("[%Y-%m-%d %H:%M:%S] ") .. "COPILOT REQUEST\n")
|
||||
debug_f:write("Messages count: " .. #copilot_messages .. "\n")
|
||||
for i, m in ipairs(copilot_messages) do
|
||||
debug_f:write(string.format(" [%d] role=%s, has_tool_calls=%s, has_tool_call_id=%s\n",
|
||||
i, m.role, tostring(m.tool_calls ~= nil), tostring(m.tool_call_id ~= nil)))
|
||||
end
|
||||
debug_f:write("---\n")
|
||||
debug_f:close()
|
||||
end
|
||||
|
||||
local headers = build_headers(token)
|
||||
local cmd = {
|
||||
"curl",
|
||||
@@ -413,35 +538,97 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
|
||||
table.insert(cmd, "-d")
|
||||
table.insert(cmd, json_body)
|
||||
|
||||
-- Debug logging helper
|
||||
local function debug_log(msg, data)
|
||||
local log_path = vim.fn.expand("~/.local/codetyper-debug.log")
|
||||
local f = io.open(log_path, "a")
|
||||
if f then
|
||||
f:write(os.date("[%Y-%m-%d %H:%M:%S] ") .. msg .. "\n")
|
||||
if data then
|
||||
f:write("DATA: " .. tostring(data):sub(1, 2000) .. "\n")
|
||||
end
|
||||
f:write("---\n")
|
||||
f:close()
|
||||
end
|
||||
end
|
||||
|
||||
-- Prevent double callback calls
|
||||
local callback_called = false
|
||||
|
||||
vim.fn.jobstart(cmd, {
|
||||
stdout_buffered = true,
|
||||
on_stdout = function(_, data)
|
||||
if callback_called then
|
||||
debug_log("on_stdout: callback already called, skipping")
|
||||
return
|
||||
end
|
||||
|
||||
if not data or #data == 0 or (data[1] == "" and #data == 1) then
|
||||
debug_log("on_stdout: empty data")
|
||||
return
|
||||
end
|
||||
|
||||
local response_text = table.concat(data, "\n")
|
||||
debug_log("on_stdout: received response", response_text)
|
||||
|
||||
local ok, response = pcall(vim.json.decode, response_text)
|
||||
|
||||
if not ok then
|
||||
debug_log("JSON parse failed", response_text)
|
||||
callback_called = true
|
||||
|
||||
-- Show the actual response text as the error (truncated if too long)
|
||||
local error_msg = response_text
|
||||
if #error_msg > 200 then
|
||||
error_msg = error_msg:sub(1, 200) .. "..."
|
||||
end
|
||||
|
||||
-- Clean up common patterns
|
||||
if response_text:match("<!DOCTYPE") or response_text:match("<html") then
|
||||
error_msg = "Copilot API returned HTML error page. Service may be unavailable."
|
||||
end
|
||||
|
||||
-- Check for rate limit and suggest Ollama fallback
|
||||
if response_text:match("limit") or response_text:match("Upgrade") or response_text:match("quota") then
|
||||
M.suggest_ollama_fallback(error_msg)
|
||||
end
|
||||
|
||||
vim.schedule(function()
|
||||
logs.error("Failed to parse Copilot response")
|
||||
callback(nil, "Failed to parse Copilot response")
|
||||
logs.error(error_msg)
|
||||
callback(nil, error_msg)
|
||||
end)
|
||||
return
|
||||
end
|
||||
|
||||
if response.error then
|
||||
callback_called = true
|
||||
local error_msg = response.error.message or "Copilot API error"
|
||||
|
||||
-- Check for rate limit in structured error
|
||||
if response.error.code == "rate_limit_exceeded" or (error_msg:match("limit") and error_msg:match("plan")) then
|
||||
error_msg = "Copilot rate limit: " .. error_msg
|
||||
M.suggest_ollama_fallback(error_msg)
|
||||
end
|
||||
|
||||
vim.schedule(function()
|
||||
logs.error(response.error.message or "Copilot API error")
|
||||
callback(nil, response.error.message or "Copilot API error")
|
||||
logs.error(error_msg)
|
||||
callback(nil, error_msg)
|
||||
end)
|
||||
return
|
||||
end
|
||||
|
||||
-- Log token usage
|
||||
-- Log token usage and record cost
|
||||
if response.usage then
|
||||
logs.response(response.usage.prompt_tokens or 0, response.usage.completion_tokens or 0, "stop")
|
||||
|
||||
-- Record usage for cost tracking
|
||||
local cost_tracker = require("codetyper.cost")
|
||||
cost_tracker.record_usage(
|
||||
get_model(),
|
||||
response.usage.prompt_tokens or 0,
|
||||
response.usage.completion_tokens or 0,
|
||||
response.usage.prompt_tokens_details and response.usage.prompt_tokens_details.cached_tokens or 0
|
||||
)
|
||||
end
|
||||
|
||||
-- Convert to Claude-like format for parser compatibility
|
||||
@@ -474,12 +661,19 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
|
||||
end
|
||||
end
|
||||
|
||||
callback_called = true
|
||||
debug_log("on_stdout: success, calling callback")
|
||||
vim.schedule(function()
|
||||
callback(converted, nil)
|
||||
end)
|
||||
end,
|
||||
on_stderr = function(_, data)
|
||||
if callback_called then
|
||||
return
|
||||
end
|
||||
if data and #data > 0 and data[1] ~= "" then
|
||||
debug_log("on_stderr", table.concat(data, "\n"))
|
||||
callback_called = true
|
||||
vim.schedule(function()
|
||||
logs.error("Copilot API request failed: " .. table.concat(data, "\n"))
|
||||
callback(nil, "Copilot API request failed: " .. table.concat(data, "\n"))
|
||||
@@ -487,7 +681,12 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
|
||||
end
|
||||
end,
|
||||
on_exit = function(_, code)
|
||||
debug_log("on_exit: code=" .. code .. ", callback_called=" .. tostring(callback_called))
|
||||
if callback_called then
|
||||
return
|
||||
end
|
||||
if code ~= 0 then
|
||||
callback_called = true
|
||||
vim.schedule(function()
|
||||
logs.error("Copilot API request failed with code: " .. code)
|
||||
callback(nil, "Copilot API request failed with code: " .. code)
|
||||
|
||||
@@ -8,17 +8,31 @@ local llm = require("codetyper.llm")
|
||||
--- Gemini API endpoint
|
||||
local API_URL = "https://generativelanguage.googleapis.com/v1beta/models"
|
||||
|
||||
--- Get API key from config or environment
|
||||
--- Get API key from stored credentials, config, or environment
|
||||
---@return string|nil API key
|
||||
local function get_api_key()
|
||||
-- Priority: stored credentials > config > environment
|
||||
local credentials = require("codetyper.credentials")
|
||||
local stored_key = credentials.get_api_key("gemini")
|
||||
if stored_key then
|
||||
return stored_key
|
||||
end
|
||||
|
||||
local codetyper = require("codetyper")
|
||||
local config = codetyper.get_config()
|
||||
return config.llm.gemini.api_key or vim.env.GEMINI_API_KEY
|
||||
end
|
||||
|
||||
--- Get model from config
|
||||
--- Get model from stored credentials or config
|
||||
---@return string Model name
|
||||
local function get_model()
|
||||
-- Priority: stored credentials > config
|
||||
local credentials = require("codetyper.credentials")
|
||||
local stored_model = credentials.get_model("gemini")
|
||||
if stored_model then
|
||||
return stored_model
|
||||
end
|
||||
|
||||
local codetyper = require("codetyper")
|
||||
local config = codetyper.get_config()
|
||||
return config.llm.gemini.model
|
||||
|
||||
@@ -32,6 +32,32 @@ function M.generate(prompt, context, callback)
|
||||
client.generate(prompt, context, callback)
|
||||
end
|
||||
|
||||
--- Smart generate with automatic provider selection based on brain memories
|
||||
--- Prefers Ollama when context is rich, falls back to Copilot otherwise.
|
||||
--- Implements verification pondering to reinforce Ollama accuracy over time.
|
||||
---@param prompt string The user's prompt
|
||||
---@param context table Context information
|
||||
---@param callback fun(response: string|nil, error: string|nil, metadata: table|nil) Callback
|
||||
function M.smart_generate(prompt, context, callback)
|
||||
local selector = require("codetyper.llm.selector")
|
||||
selector.smart_generate(prompt, context, callback)
|
||||
end
|
||||
|
||||
--- Get accuracy statistics for providers
|
||||
---@return table Statistics for each provider
|
||||
function M.get_accuracy_stats()
|
||||
local selector = require("codetyper.llm.selector")
|
||||
return selector.get_accuracy_stats()
|
||||
end
|
||||
|
||||
--- Report user feedback on response quality (for reinforcement learning)
|
||||
---@param provider string Which provider generated the response
|
||||
---@param was_correct boolean Whether the response was good
|
||||
function M.report_feedback(provider, was_correct)
|
||||
local selector = require("codetyper.llm.selector")
|
||||
selector.report_feedback(provider, was_correct)
|
||||
end
|
||||
|
||||
--- Build the system prompt for code generation
|
||||
---@param context table Context information
|
||||
---@return string System prompt
|
||||
|
||||
@@ -5,21 +5,33 @@ local M = {}
|
||||
local utils = require("codetyper.utils")
|
||||
local llm = require("codetyper.llm")
|
||||
|
||||
--- Get Ollama host from config
|
||||
--- Get Ollama host from stored credentials or config
|
||||
---@return string Host URL
|
||||
local function get_host()
|
||||
-- Priority: stored credentials > config
|
||||
local credentials = require("codetyper.credentials")
|
||||
local stored_host = credentials.get_ollama_host()
|
||||
if stored_host then
|
||||
return stored_host
|
||||
end
|
||||
|
||||
local codetyper = require("codetyper")
|
||||
local config = codetyper.get_config()
|
||||
|
||||
return config.llm.ollama.host
|
||||
end
|
||||
|
||||
--- Get model from config
|
||||
--- Get model from stored credentials or config
|
||||
---@return string Model name
|
||||
local function get_model()
|
||||
-- Priority: stored credentials > config
|
||||
local credentials = require("codetyper.credentials")
|
||||
local stored_model = credentials.get_model("ollama")
|
||||
if stored_model then
|
||||
return stored_model
|
||||
end
|
||||
|
||||
local codetyper = require("codetyper")
|
||||
local config = codetyper.get_config()
|
||||
|
||||
return config.llm.ollama.model
|
||||
end
|
||||
|
||||
@@ -199,47 +211,41 @@ function M.validate()
|
||||
return true
|
||||
end
|
||||
|
||||
--- Build system prompt for agent mode with tool instructions
|
||||
--- Generate with tool use support for agentic mode (text-based tool calling)
|
||||
---@param messages table[] Conversation history
|
||||
---@param context table Context information
|
||||
---@return string System prompt
|
||||
local function build_agent_system_prompt(context)
|
||||
---@param tool_definitions table Tool definitions
|
||||
---@param callback fun(response: table|nil, error: string|nil) Callback with Claude-like response format
|
||||
function M.generate_with_tools(messages, context, tool_definitions, callback)
|
||||
local logs = require("codetyper.agent.logs")
|
||||
local agent_prompts = require("codetyper.prompts.agent")
|
||||
local tools_module = require("codetyper.agent.tools")
|
||||
|
||||
local system_prompt = agent_prompts.system .. "\n\n"
|
||||
system_prompt = system_prompt .. tools_module.to_prompt_format() .. "\n\n"
|
||||
system_prompt = system_prompt .. agent_prompts.tool_instructions
|
||||
logs.request("ollama", get_model())
|
||||
logs.thinking("Preparing agent request...")
|
||||
|
||||
-- Add context about current file if available
|
||||
if context.file_path then
|
||||
system_prompt = system_prompt .. "\n\nCurrent working context:\n"
|
||||
system_prompt = system_prompt .. "- File: " .. context.file_path .. "\n"
|
||||
if context.language then
|
||||
system_prompt = system_prompt .. "- Language: " .. context.language .. "\n"
|
||||
-- Build system prompt with tool instructions
|
||||
local system_prompt = llm.build_system_prompt(context)
|
||||
system_prompt = system_prompt .. "\n\n" .. agent_prompts.system
|
||||
system_prompt = system_prompt .. "\n\n" .. agent_prompts.tool_instructions
|
||||
|
||||
-- Add tool descriptions
|
||||
system_prompt = system_prompt .. "\n\n## Available Tools\n"
|
||||
system_prompt = system_prompt .. "Call tools by outputting JSON in this exact format:\n"
|
||||
system_prompt = system_prompt .. '```json\n{"tool": "tool_name", "arguments": {...}}\n```\n\n'
|
||||
|
||||
for _, tool in ipairs(tool_definitions) do
|
||||
local name = tool.name or (tool["function"] and tool["function"].name)
|
||||
local desc = tool.description or (tool["function"] and tool["function"].description)
|
||||
if name then
|
||||
system_prompt = system_prompt .. string.format("### %s\n%s\n\n", name, desc or "")
|
||||
end
|
||||
end
|
||||
|
||||
-- Add project root info
|
||||
local root = utils.get_project_root()
|
||||
if root then
|
||||
system_prompt = system_prompt .. "- Project root: " .. root .. "\n"
|
||||
end
|
||||
|
||||
return system_prompt
|
||||
end
|
||||
|
||||
--- Build request body for Ollama API with tools (chat format)
|
||||
---@param messages table[] Conversation messages
|
||||
---@param context table Context information
|
||||
---@return table Request body
|
||||
local function build_tools_request_body(messages, context)
|
||||
local system_prompt = build_agent_system_prompt(context)
|
||||
|
||||
-- Convert messages to Ollama chat format
|
||||
local ollama_messages = {}
|
||||
for _, msg in ipairs(messages) do
|
||||
local content = msg.content
|
||||
-- Handle complex content (like tool results)
|
||||
if type(content) == "table" then
|
||||
local text_parts = {}
|
||||
for _, part in ipairs(content) do
|
||||
@@ -251,14 +257,10 @@ local function build_tools_request_body(messages, context)
|
||||
end
|
||||
content = table.concat(text_parts, "\n")
|
||||
end
|
||||
|
||||
table.insert(ollama_messages, {
|
||||
role = msg.role,
|
||||
content = content,
|
||||
})
|
||||
table.insert(ollama_messages, { role = msg.role, content = content })
|
||||
end
|
||||
|
||||
return {
|
||||
local body = {
|
||||
model = get_model(),
|
||||
messages = ollama_messages,
|
||||
system = system_prompt,
|
||||
@@ -268,16 +270,15 @@ local function build_tools_request_body(messages, context)
|
||||
num_predict = 4096,
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
--- Make HTTP request to Ollama chat API
|
||||
---@param body table Request body
|
||||
---@param callback fun(response: string|nil, error: string|nil, usage: table|nil) Callback function
|
||||
local function make_chat_request(body, callback)
|
||||
local host = get_host()
|
||||
local url = host .. "/api/chat"
|
||||
local json_body = vim.json.encode(body)
|
||||
|
||||
local prompt_estimate = logs.estimate_tokens(json_body)
|
||||
logs.debug(string.format("Estimated prompt: ~%d tokens", prompt_estimate))
|
||||
logs.thinking("Sending to Ollama API...")
|
||||
|
||||
local cmd = {
|
||||
"curl",
|
||||
"-s",
|
||||
@@ -302,196 +303,82 @@ local function make_chat_request(body, callback)
|
||||
|
||||
if not ok then
|
||||
vim.schedule(function()
|
||||
callback(nil, "Failed to parse Ollama response", nil)
|
||||
logs.error("Failed to parse Ollama response")
|
||||
callback(nil, "Failed to parse Ollama response")
|
||||
end)
|
||||
return
|
||||
end
|
||||
|
||||
if response.error then
|
||||
vim.schedule(function()
|
||||
callback(nil, response.error or "Ollama API error", nil)
|
||||
logs.error(response.error or "Ollama API error")
|
||||
callback(nil, response.error or "Ollama API error")
|
||||
end)
|
||||
return
|
||||
end
|
||||
|
||||
-- Extract usage info
|
||||
local usage = {
|
||||
prompt_tokens = response.prompt_eval_count or 0,
|
||||
response_tokens = response.eval_count or 0,
|
||||
}
|
||||
-- Log token usage and record cost (Ollama is free but we track usage)
|
||||
if response.prompt_eval_count or response.eval_count then
|
||||
logs.response(response.prompt_eval_count or 0, response.eval_count or 0, "stop")
|
||||
|
||||
-- Return the message content for agent parsing
|
||||
if response.message and response.message.content then
|
||||
vim.schedule(function()
|
||||
callback(response.message.content, nil, usage)
|
||||
end)
|
||||
else
|
||||
vim.schedule(function()
|
||||
callback(nil, "No response from Ollama", nil)
|
||||
end)
|
||||
-- Record usage for cost tracking (free for local models)
|
||||
local cost = require("codetyper.cost")
|
||||
cost.record_usage(
|
||||
get_model(),
|
||||
response.prompt_eval_count or 0,
|
||||
response.eval_count or 0,
|
||||
0 -- No cached tokens for Ollama
|
||||
)
|
||||
end
|
||||
|
||||
-- Parse the response text for tool calls
|
||||
local content_text = response.message and response.message.content or ""
|
||||
local converted = { content = {}, stop_reason = "end_turn" }
|
||||
|
||||
-- Try to extract JSON tool calls from response
|
||||
local json_match = content_text:match("```json%s*(%b{})%s*```")
|
||||
if json_match then
|
||||
local ok_json, parsed = pcall(vim.json.decode, json_match)
|
||||
if ok_json and parsed.tool then
|
||||
table.insert(converted.content, {
|
||||
type = "tool_use",
|
||||
id = "call_" .. string.format("%x", os.time()) .. "_" .. string.format("%x", math.random(0, 0xFFFF)),
|
||||
name = parsed.tool,
|
||||
input = parsed.arguments or {},
|
||||
})
|
||||
logs.thinking("Tool call: " .. parsed.tool)
|
||||
content_text = content_text:gsub("```json.-```", ""):gsub("^%s+", ""):gsub("%s+$", "")
|
||||
converted.stop_reason = "tool_use"
|
||||
end
|
||||
end
|
||||
|
||||
-- Add text content
|
||||
if content_text and content_text ~= "" then
|
||||
table.insert(converted.content, 1, { type = "text", text = content_text })
|
||||
logs.thinking("Response contains text")
|
||||
end
|
||||
|
||||
vim.schedule(function()
|
||||
callback(converted, nil)
|
||||
end)
|
||||
end,
|
||||
on_stderr = function(_, data)
|
||||
if data and #data > 0 and data[1] ~= "" then
|
||||
vim.schedule(function()
|
||||
callback(nil, "Ollama API request failed: " .. table.concat(data, "\n"), nil)
|
||||
logs.error("Ollama API request failed: " .. table.concat(data, "\n"))
|
||||
callback(nil, "Ollama API request failed: " .. table.concat(data, "\n"))
|
||||
end)
|
||||
end
|
||||
end,
|
||||
on_exit = function(_, code)
|
||||
if code ~= 0 then
|
||||
-- Don't double-report errors
|
||||
vim.schedule(function()
|
||||
logs.error("Ollama API request failed with code: " .. code)
|
||||
callback(nil, "Ollama API request failed with code: " .. code)
|
||||
end)
|
||||
end
|
||||
end,
|
||||
})
|
||||
end
|
||||
|
||||
--- Generate response with tools using Ollama API
|
||||
---@param messages table[] Conversation history
|
||||
---@param context table Context information
|
||||
---@param tools table Tool definitions (embedded in prompt for Ollama)
|
||||
---@param callback fun(response: string|nil, error: string|nil) Callback function
|
||||
function M.generate_with_tools(messages, context, tools, callback)
|
||||
local logs = require("codetyper.agent.logs")
|
||||
|
||||
-- Log the request
|
||||
local model = get_model()
|
||||
logs.request("ollama", model)
|
||||
logs.thinking("Preparing API request...")
|
||||
|
||||
local body = build_tools_request_body(messages, context)
|
||||
|
||||
-- Estimate prompt tokens
|
||||
local prompt_estimate = logs.estimate_tokens(vim.json.encode(body))
|
||||
logs.debug(string.format("Estimated prompt: ~%d tokens", prompt_estimate))
|
||||
|
||||
make_chat_request(body, function(response, err, usage)
|
||||
if err then
|
||||
logs.error(err)
|
||||
callback(nil, err)
|
||||
else
|
||||
-- Log token usage
|
||||
if usage then
|
||||
logs.response(usage.prompt_tokens or 0, usage.response_tokens or 0, "end_turn")
|
||||
end
|
||||
|
||||
-- Log if response contains tool calls
|
||||
if response then
|
||||
local parser = require("codetyper.agent.parser")
|
||||
local parsed = parser.parse_ollama_response(response)
|
||||
if #parsed.tool_calls > 0 then
|
||||
for _, tc in ipairs(parsed.tool_calls) do
|
||||
logs.thinking("Tool call: " .. tc.name)
|
||||
end
|
||||
end
|
||||
if parsed.text and parsed.text ~= "" then
|
||||
logs.thinking("Response contains text")
|
||||
end
|
||||
end
|
||||
|
||||
callback(response, nil)
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
--- Generate with tool use support for agentic mode (simulated via prompts)
|
||||
---@param messages table[] Conversation history
|
||||
---@param context table Context information
|
||||
---@param tool_definitions table Tool definitions
|
||||
---@param callback fun(response: string|nil, error: string|nil) Callback with response text
|
||||
function M.generate_with_tools(messages, context, tool_definitions, callback)
|
||||
local tools_module = require("codetyper.agent.tools")
|
||||
local agent_prompts = require("codetyper.prompts.agent")
|
||||
|
||||
-- Build system prompt with agent instructions and tool definitions
|
||||
local system_prompt = llm.build_system_prompt(context)
|
||||
system_prompt = system_prompt .. "\n\n" .. agent_prompts.system
|
||||
system_prompt = system_prompt .. "\n\n" .. tools_module.to_prompt_format()
|
||||
|
||||
-- Flatten messages to single prompt (Ollama's generate API)
|
||||
local prompt_parts = {}
|
||||
for _, msg in ipairs(messages) do
|
||||
if type(msg.content) == "string" then
|
||||
local role_prefix = msg.role == "user" and "User" or "Assistant"
|
||||
table.insert(prompt_parts, role_prefix .. ": " .. msg.content)
|
||||
elseif type(msg.content) == "table" then
|
||||
-- Handle tool results
|
||||
for _, item in ipairs(msg.content) do
|
||||
if item.type == "tool_result" then
|
||||
table.insert(prompt_parts, "Tool result: " .. item.content)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
local body = {
|
||||
model = get_model(),
|
||||
system = system_prompt,
|
||||
prompt = table.concat(prompt_parts, "\n\n"),
|
||||
stream = false,
|
||||
options = {
|
||||
temperature = 0.2,
|
||||
num_predict = 4096,
|
||||
},
|
||||
}
|
||||
|
||||
local host = get_host()
|
||||
local url = host .. "/api/generate"
|
||||
local json_body = vim.json.encode(body)
|
||||
|
||||
local cmd = {
|
||||
"curl",
|
||||
"-s",
|
||||
"-X", "POST",
|
||||
url,
|
||||
"-H", "Content-Type: application/json",
|
||||
"-d", json_body,
|
||||
}
|
||||
|
||||
vim.fn.jobstart(cmd, {
|
||||
stdout_buffered = true,
|
||||
on_stdout = function(_, data)
|
||||
if not data or #data == 0 or (data[1] == "" and #data == 1) then
|
||||
return
|
||||
end
|
||||
|
||||
local response_text = table.concat(data, "\n")
|
||||
local ok, response = pcall(vim.json.decode, response_text)
|
||||
|
||||
if not ok then
|
||||
vim.schedule(function()
|
||||
callback(nil, "Failed to parse Ollama response")
|
||||
end)
|
||||
return
|
||||
end
|
||||
|
||||
if response.error then
|
||||
vim.schedule(function()
|
||||
callback(nil, response.error or "Ollama API error")
|
||||
end)
|
||||
return
|
||||
end
|
||||
|
||||
-- Return raw response text for parser to handle
|
||||
vim.schedule(function()
|
||||
callback(response.response or "", nil)
|
||||
end)
|
||||
end,
|
||||
on_stderr = function(_, data)
|
||||
if data and #data > 0 and data[1] ~= "" then
|
||||
vim.schedule(function()
|
||||
callback(nil, "Ollama API request failed: " .. table.concat(data, "\n"))
|
||||
end)
|
||||
end
|
||||
end,
|
||||
on_exit = function(_, code)
|
||||
if code ~= 0 then
|
||||
vim.schedule(function()
|
||||
callback(nil, "Ollama API request failed with code: " .. code)
|
||||
end)
|
||||
end
|
||||
end,
|
||||
})
|
||||
end
|
||||
|
||||
return M
|
||||
|
||||
@@ -8,25 +8,46 @@ local llm = require("codetyper.llm")
|
||||
--- OpenAI API endpoint
|
||||
local API_URL = "https://api.openai.com/v1/chat/completions"
|
||||
|
||||
--- Get API key from config or environment
|
||||
--- Get API key from stored credentials, config, or environment
|
||||
---@return string|nil API key
|
||||
local function get_api_key()
|
||||
-- Priority: stored credentials > config > environment
|
||||
local credentials = require("codetyper.credentials")
|
||||
local stored_key = credentials.get_api_key("openai")
|
||||
if stored_key then
|
||||
return stored_key
|
||||
end
|
||||
|
||||
local codetyper = require("codetyper")
|
||||
local config = codetyper.get_config()
|
||||
return config.llm.openai.api_key or vim.env.OPENAI_API_KEY
|
||||
end
|
||||
|
||||
--- Get model from config
|
||||
--- Get model from stored credentials or config
|
||||
---@return string Model name
|
||||
local function get_model()
|
||||
-- Priority: stored credentials > config
|
||||
local credentials = require("codetyper.credentials")
|
||||
local stored_model = credentials.get_model("openai")
|
||||
if stored_model then
|
||||
return stored_model
|
||||
end
|
||||
|
||||
local codetyper = require("codetyper")
|
||||
local config = codetyper.get_config()
|
||||
return config.llm.openai.model
|
||||
end
|
||||
|
||||
--- Get endpoint from config (allows custom endpoints like Azure, OpenRouter)
|
||||
--- Get endpoint from stored credentials or config (allows custom endpoints like Azure, OpenRouter)
|
||||
---@return string API endpoint
|
||||
local function get_endpoint()
|
||||
-- Priority: stored credentials > config > default
|
||||
local credentials = require("codetyper.credentials")
|
||||
local stored_endpoint = credentials.get_endpoint("openai")
|
||||
if stored_endpoint then
|
||||
return stored_endpoint
|
||||
end
|
||||
|
||||
local codetyper = require("codetyper")
|
||||
local config = codetyper.get_config()
|
||||
return config.llm.openai.endpoint or API_URL
|
||||
@@ -284,9 +305,18 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
|
||||
return
|
||||
end
|
||||
|
||||
-- Log token usage
|
||||
-- Log token usage and record cost
|
||||
if response.usage then
|
||||
logs.response(response.usage.prompt_tokens or 0, response.usage.completion_tokens or 0, "stop")
|
||||
|
||||
-- Record usage for cost tracking
|
||||
local cost = require("codetyper.cost")
|
||||
cost.record_usage(
|
||||
model,
|
||||
response.usage.prompt_tokens or 0,
|
||||
response.usage.completion_tokens or 0,
|
||||
response.usage.prompt_tokens_details and response.usage.prompt_tokens_details.cached_tokens or 0
|
||||
)
|
||||
end
|
||||
|
||||
-- Convert to Claude-like format for parser compatibility
|
||||
|
||||
514
lua/codetyper/llm/selector.lua
Normal file
514
lua/codetyper/llm/selector.lua
Normal file
@@ -0,0 +1,514 @@
|
||||
---@mod codetyper.llm.selector Smart LLM selection with memory-based confidence
|
||||
---@brief [[
|
||||
--- Intelligent LLM provider selection based on brain memories.
|
||||
--- Prefers local Ollama when context is rich, falls back to Copilot otherwise.
|
||||
--- Implements verification pondering to reinforce Ollama accuracy over time.
|
||||
---@brief ]]
|
||||
|
||||
local M = {}
|
||||
|
||||
---@class SelectionResult
|
||||
---@field provider string Selected provider name
|
||||
---@field confidence number Confidence score (0-1)
|
||||
---@field memory_count number Number of relevant memories found
|
||||
---@field reason string Human-readable reason for selection
|
||||
|
||||
---@class PonderResult
|
||||
---@field ollama_response string Ollama's response
|
||||
---@field verifier_response string Verifier's response
|
||||
---@field agreement_score number How much they agree (0-1)
|
||||
---@field ollama_correct boolean Whether Ollama was deemed correct
|
||||
---@field feedback string Feedback for learning
|
||||
|
||||
--- Minimum memories required for high confidence
|
||||
local MIN_MEMORIES_FOR_LOCAL = 3
|
||||
|
||||
--- Minimum memory relevance score for local provider
|
||||
local MIN_RELEVANCE_FOR_LOCAL = 0.6
|
||||
|
||||
--- Agreement threshold for Ollama verification
|
||||
local AGREEMENT_THRESHOLD = 0.7
|
||||
|
||||
--- Pondering sample rate (0-1) - how often to verify Ollama
|
||||
local PONDER_SAMPLE_RATE = 0.2
|
||||
|
||||
--- Provider accuracy tracking (persisted in brain)
|
||||
local accuracy_cache = {
|
||||
ollama = { correct = 0, total = 0 },
|
||||
copilot = { correct = 0, total = 0 },
|
||||
}
|
||||
|
||||
--- Get the brain module safely
|
||||
---@return table|nil
|
||||
local function get_brain()
|
||||
local ok, brain = pcall(require, "codetyper.brain")
|
||||
if ok and brain.is_initialized and brain.is_initialized() then
|
||||
return brain
|
||||
end
|
||||
return nil
|
||||
end
|
||||
|
||||
--- Load accuracy stats from brain
|
||||
local function load_accuracy_stats()
|
||||
local brain = get_brain()
|
||||
if not brain then
|
||||
return
|
||||
end
|
||||
|
||||
-- Query for accuracy tracking nodes
|
||||
pcall(function()
|
||||
local result = brain.query({
|
||||
query = "provider_accuracy_stats",
|
||||
types = { "metric" },
|
||||
limit = 1,
|
||||
})
|
||||
|
||||
if result and result.nodes and #result.nodes > 0 then
|
||||
local node = result.nodes[1]
|
||||
if node.c and node.c.d then
|
||||
local ok, stats = pcall(vim.json.decode, node.c.d)
|
||||
if ok and stats then
|
||||
accuracy_cache = stats
|
||||
end
|
||||
end
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
--- Save accuracy stats to brain
|
||||
local function save_accuracy_stats()
|
||||
local brain = get_brain()
|
||||
if not brain then
|
||||
return
|
||||
end
|
||||
|
||||
pcall(function()
|
||||
brain.learn({
|
||||
type = "metric",
|
||||
summary = "provider_accuracy_stats",
|
||||
detail = vim.json.encode(accuracy_cache),
|
||||
weight = 1.0,
|
||||
})
|
||||
end)
|
||||
end
|
||||
|
||||
--- Calculate Ollama confidence based on historical accuracy
|
||||
---@return number confidence (0-1)
|
||||
local function get_ollama_historical_confidence()
|
||||
local stats = accuracy_cache.ollama
|
||||
if stats.total < 5 then
|
||||
-- Not enough data, return neutral confidence
|
||||
return 0.5
|
||||
end
|
||||
|
||||
local accuracy = stats.correct / stats.total
|
||||
-- Boost confidence if accuracy is high
|
||||
return math.min(1.0, accuracy * 1.2)
|
||||
end
|
||||
|
||||
--- Query brain for relevant context
|
||||
---@param prompt string User prompt
|
||||
---@param file_path string|nil Current file path
|
||||
---@return table result {memories: table[], relevance: number, count: number}
|
||||
local function query_brain_context(prompt, file_path)
|
||||
local result = {
|
||||
memories = {},
|
||||
relevance = 0,
|
||||
count = 0,
|
||||
}
|
||||
|
||||
local brain = get_brain()
|
||||
if not brain then
|
||||
return result
|
||||
end
|
||||
|
||||
-- Query brain with multiple dimensions
|
||||
local ok, query_result = pcall(function()
|
||||
return brain.query({
|
||||
query = prompt,
|
||||
file = file_path,
|
||||
limit = 10,
|
||||
types = { "pattern", "correction", "convention", "fact" },
|
||||
})
|
||||
end)
|
||||
|
||||
if not ok or not query_result then
|
||||
return result
|
||||
end
|
||||
|
||||
result.memories = query_result.nodes or {}
|
||||
result.count = #result.memories
|
||||
|
||||
-- Calculate average relevance
|
||||
if result.count > 0 then
|
||||
local total_relevance = 0
|
||||
for _, node in ipairs(result.memories) do
|
||||
-- Use node weight and success rate as relevance indicators
|
||||
local node_relevance = (node.sc and node.sc.w or 0.5) * (node.sc and node.sc.sr or 0.5)
|
||||
total_relevance = total_relevance + node_relevance
|
||||
end
|
||||
result.relevance = total_relevance / result.count
|
||||
end
|
||||
|
||||
return result
|
||||
end
|
||||
|
||||
--- Select the best LLM provider based on context
|
||||
---@param prompt string User prompt
|
||||
---@param context table LLM context
|
||||
---@return SelectionResult
|
||||
function M.select_provider(prompt, context)
|
||||
-- Load accuracy stats on first call
|
||||
if accuracy_cache.ollama.total == 0 then
|
||||
load_accuracy_stats()
|
||||
end
|
||||
|
||||
local file_path = context.file_path
|
||||
|
||||
-- Query brain for relevant memories
|
||||
local brain_context = query_brain_context(prompt, file_path)
|
||||
|
||||
-- Calculate base confidence from memories
|
||||
local memory_confidence = 0
|
||||
if brain_context.count >= MIN_MEMORIES_FOR_LOCAL then
|
||||
memory_confidence = math.min(1.0, brain_context.count / 10) * brain_context.relevance
|
||||
end
|
||||
|
||||
-- Factor in historical Ollama accuracy
|
||||
local historical_confidence = get_ollama_historical_confidence()
|
||||
|
||||
-- Combined confidence score
|
||||
local combined_confidence = (memory_confidence * 0.6) + (historical_confidence * 0.4)
|
||||
|
||||
-- Decision logic
|
||||
local provider = "copilot" -- Default to more capable
|
||||
local reason = ""
|
||||
|
||||
if brain_context.count >= MIN_MEMORIES_FOR_LOCAL and combined_confidence >= MIN_RELEVANCE_FOR_LOCAL then
|
||||
provider = "ollama"
|
||||
reason = string.format(
|
||||
"Rich context: %d memories (%.1f%% relevance), historical accuracy: %.1f%%",
|
||||
brain_context.count,
|
||||
brain_context.relevance * 100,
|
||||
historical_confidence * 100
|
||||
)
|
||||
elseif brain_context.count > 0 and combined_confidence >= 0.4 then
|
||||
-- Medium confidence - use Ollama but with pondering
|
||||
provider = "ollama"
|
||||
reason = string.format(
|
||||
"Moderate context: %d memories, will verify with pondering",
|
||||
brain_context.count
|
||||
)
|
||||
else
|
||||
reason = string.format(
|
||||
"Insufficient context: %d memories (need %d), using capable provider",
|
||||
brain_context.count,
|
||||
MIN_MEMORIES_FOR_LOCAL
|
||||
)
|
||||
end
|
||||
|
||||
return {
|
||||
provider = provider,
|
||||
confidence = combined_confidence,
|
||||
memory_count = brain_context.count,
|
||||
reason = reason,
|
||||
memories = brain_context.memories,
|
||||
}
|
||||
end
|
||||
|
||||
--- Check if we should ponder (verify) this Ollama response
|
||||
---@param confidence number Current confidence level
|
||||
---@return boolean
|
||||
function M.should_ponder(confidence)
|
||||
-- Always ponder when confidence is medium
|
||||
if confidence >= 0.4 and confidence < 0.7 then
|
||||
return true
|
||||
end
|
||||
|
||||
-- Random sampling for high confidence to keep learning
|
||||
if confidence >= 0.7 then
|
||||
return math.random() < PONDER_SAMPLE_RATE
|
||||
end
|
||||
|
||||
-- Low confidence shouldn't reach Ollama anyway
|
||||
return false
|
||||
end
|
||||
|
||||
--- Calculate agreement score between two responses
|
||||
---@param response1 string First response
|
||||
---@param response2 string Second response
|
||||
---@return number Agreement score (0-1)
|
||||
local function calculate_agreement(response1, response2)
|
||||
-- Normalize responses
|
||||
local norm1 = response1:lower():gsub("%s+", " "):gsub("[^%w%s]", "")
|
||||
local norm2 = response2:lower():gsub("%s+", " "):gsub("[^%w%s]", "")
|
||||
|
||||
-- Extract words
|
||||
local words1 = {}
|
||||
for word in norm1:gmatch("%w+") do
|
||||
words1[word] = (words1[word] or 0) + 1
|
||||
end
|
||||
|
||||
local words2 = {}
|
||||
for word in norm2:gmatch("%w+") do
|
||||
words2[word] = (words2[word] or 0) + 1
|
||||
end
|
||||
|
||||
-- Calculate Jaccard similarity
|
||||
local intersection = 0
|
||||
local union = 0
|
||||
|
||||
for word, count1 in pairs(words1) do
|
||||
local count2 = words2[word] or 0
|
||||
intersection = intersection + math.min(count1, count2)
|
||||
union = union + math.max(count1, count2)
|
||||
end
|
||||
|
||||
for word, count2 in pairs(words2) do
|
||||
if not words1[word] then
|
||||
union = union + count2
|
||||
end
|
||||
end
|
||||
|
||||
if union == 0 then
|
||||
return 1.0 -- Both empty
|
||||
end
|
||||
|
||||
-- Also check structural similarity (code structure)
|
||||
local struct_score = 0
|
||||
local function_count1 = select(2, response1:gsub("function", ""))
|
||||
local function_count2 = select(2, response2:gsub("function", ""))
|
||||
if function_count1 > 0 or function_count2 > 0 then
|
||||
struct_score = 1 - math.abs(function_count1 - function_count2) / math.max(function_count1, function_count2, 1)
|
||||
else
|
||||
struct_score = 1.0
|
||||
end
|
||||
|
||||
-- Combined score
|
||||
local jaccard = intersection / union
|
||||
return (jaccard * 0.7) + (struct_score * 0.3)
|
||||
end
|
||||
|
||||
--- Ponder (verify) Ollama's response with another LLM
|
||||
---@param prompt string Original prompt
|
||||
---@param context table LLM context
|
||||
---@param ollama_response string Ollama's response
|
||||
---@param callback fun(result: PonderResult) Callback with pondering result
|
||||
function M.ponder(prompt, context, ollama_response, callback)
|
||||
-- Use Copilot as verifier
|
||||
local copilot = require("codetyper.llm.copilot")
|
||||
|
||||
-- Build verification prompt
|
||||
local verify_prompt = prompt
|
||||
|
||||
copilot.generate(verify_prompt, context, function(verifier_response, error)
|
||||
if error or not verifier_response then
|
||||
-- Verification failed, assume Ollama is correct
|
||||
callback({
|
||||
ollama_response = ollama_response,
|
||||
verifier_response = "",
|
||||
agreement_score = 1.0,
|
||||
ollama_correct = true,
|
||||
feedback = "Verification unavailable, trusting Ollama",
|
||||
})
|
||||
return
|
||||
end
|
||||
|
||||
-- Calculate agreement
|
||||
local agreement = calculate_agreement(ollama_response, verifier_response)
|
||||
|
||||
-- Determine if Ollama was correct
|
||||
local ollama_correct = agreement >= AGREEMENT_THRESHOLD
|
||||
|
||||
-- Generate feedback
|
||||
local feedback
|
||||
if ollama_correct then
|
||||
feedback = string.format("Agreement: %.1f%% - Ollama response validated", agreement * 100)
|
||||
else
|
||||
feedback = string.format(
|
||||
"Disagreement: %.1f%% - Ollama may need correction",
|
||||
(1 - agreement) * 100
|
||||
)
|
||||
end
|
||||
|
||||
-- Update accuracy tracking
|
||||
accuracy_cache.ollama.total = accuracy_cache.ollama.total + 1
|
||||
if ollama_correct then
|
||||
accuracy_cache.ollama.correct = accuracy_cache.ollama.correct + 1
|
||||
end
|
||||
save_accuracy_stats()
|
||||
|
||||
-- Learn from this verification
|
||||
local brain = get_brain()
|
||||
if brain then
|
||||
pcall(function()
|
||||
if ollama_correct then
|
||||
-- Reinforce the pattern
|
||||
brain.learn({
|
||||
type = "correction",
|
||||
summary = "Ollama verified correct",
|
||||
detail = string.format(
|
||||
"Prompt: %s\nAgreement: %.1f%%",
|
||||
prompt:sub(1, 100),
|
||||
agreement * 100
|
||||
),
|
||||
weight = 0.8,
|
||||
file = context.file_path,
|
||||
})
|
||||
else
|
||||
-- Learn the correction
|
||||
brain.learn({
|
||||
type = "correction",
|
||||
summary = "Ollama needed correction",
|
||||
detail = string.format(
|
||||
"Prompt: %s\nOllama: %s\nCorrect: %s",
|
||||
prompt:sub(1, 100),
|
||||
ollama_response:sub(1, 200),
|
||||
verifier_response:sub(1, 200)
|
||||
),
|
||||
weight = 0.9,
|
||||
file = context.file_path,
|
||||
})
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
callback({
|
||||
ollama_response = ollama_response,
|
||||
verifier_response = verifier_response,
|
||||
agreement_score = agreement,
|
||||
ollama_correct = ollama_correct,
|
||||
feedback = feedback,
|
||||
})
|
||||
end)
|
||||
end
|
||||
|
||||
--- Smart generate with automatic provider selection and pondering
|
||||
---@param prompt string User prompt
|
||||
---@param context table LLM context
|
||||
---@param callback fun(response: string|nil, error: string|nil, metadata: table|nil) Callback
|
||||
function M.smart_generate(prompt, context, callback)
|
||||
-- Select provider
|
||||
local selection = M.select_provider(prompt, context)
|
||||
|
||||
-- Log selection
|
||||
pcall(function()
|
||||
local logs = require("codetyper.agent.logs")
|
||||
logs.add({
|
||||
type = "info",
|
||||
message = string.format(
|
||||
"LLM: %s (confidence: %.1f%%, %s)",
|
||||
selection.provider,
|
||||
selection.confidence * 100,
|
||||
selection.reason
|
||||
),
|
||||
})
|
||||
end)
|
||||
|
||||
-- Get the selected client
|
||||
local client
|
||||
if selection.provider == "ollama" then
|
||||
client = require("codetyper.llm.ollama")
|
||||
else
|
||||
client = require("codetyper.llm.copilot")
|
||||
end
|
||||
|
||||
-- Generate response
|
||||
client.generate(prompt, context, function(response, error)
|
||||
if error then
|
||||
-- Fallback on error
|
||||
if selection.provider == "ollama" then
|
||||
-- Try Copilot as fallback
|
||||
local copilot = require("codetyper.llm.copilot")
|
||||
copilot.generate(prompt, context, function(fallback_response, fallback_error)
|
||||
callback(fallback_response, fallback_error, {
|
||||
provider = "copilot",
|
||||
fallback = true,
|
||||
original_provider = "ollama",
|
||||
original_error = error,
|
||||
})
|
||||
end)
|
||||
return
|
||||
end
|
||||
callback(nil, error, { provider = selection.provider })
|
||||
return
|
||||
end
|
||||
|
||||
-- Check if we should ponder
|
||||
if selection.provider == "ollama" and M.should_ponder(selection.confidence) then
|
||||
M.ponder(prompt, context, response, function(ponder_result)
|
||||
if ponder_result.ollama_correct then
|
||||
-- Ollama was correct, use its response
|
||||
callback(response, nil, {
|
||||
provider = "ollama",
|
||||
pondered = true,
|
||||
agreement = ponder_result.agreement_score,
|
||||
confidence = selection.confidence,
|
||||
})
|
||||
else
|
||||
-- Use verifier's response instead
|
||||
callback(ponder_result.verifier_response, nil, {
|
||||
provider = "copilot",
|
||||
pondered = true,
|
||||
agreement = ponder_result.agreement_score,
|
||||
original_provider = "ollama",
|
||||
corrected = true,
|
||||
})
|
||||
end
|
||||
end)
|
||||
else
|
||||
-- No pondering needed
|
||||
callback(response, nil, {
|
||||
provider = selection.provider,
|
||||
pondered = false,
|
||||
confidence = selection.confidence,
|
||||
})
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
--- Get current accuracy statistics
|
||||
---@return table {ollama: {correct, total, accuracy}, copilot: {correct, total, accuracy}}
|
||||
function M.get_accuracy_stats()
|
||||
local stats = {
|
||||
ollama = {
|
||||
correct = accuracy_cache.ollama.correct,
|
||||
total = accuracy_cache.ollama.total,
|
||||
accuracy = accuracy_cache.ollama.total > 0
|
||||
and (accuracy_cache.ollama.correct / accuracy_cache.ollama.total)
|
||||
or 0,
|
||||
},
|
||||
copilot = {
|
||||
correct = accuracy_cache.copilot.correct,
|
||||
total = accuracy_cache.copilot.total,
|
||||
accuracy = accuracy_cache.copilot.total > 0
|
||||
and (accuracy_cache.copilot.correct / accuracy_cache.copilot.total)
|
||||
or 0,
|
||||
},
|
||||
}
|
||||
return stats
|
||||
end
|
||||
|
||||
--- Reset accuracy statistics
|
||||
function M.reset_accuracy_stats()
|
||||
accuracy_cache = {
|
||||
ollama = { correct = 0, total = 0 },
|
||||
copilot = { correct = 0, total = 0 },
|
||||
}
|
||||
save_accuracy_stats()
|
||||
end
|
||||
|
||||
--- Report user feedback on response quality
|
||||
---@param provider string Which provider generated the response
|
||||
---@param was_correct boolean Whether the response was good
|
||||
function M.report_feedback(provider, was_correct)
|
||||
if accuracy_cache[provider] then
|
||||
accuracy_cache[provider].total = accuracy_cache[provider].total + 1
|
||||
if was_correct then
|
||||
accuracy_cache[provider].correct = accuracy_cache[provider].correct + 1
|
||||
end
|
||||
save_accuracy_stats()
|
||||
end
|
||||
end
|
||||
|
||||
return M
|
||||
Reference in New Issue
Block a user