Adding more features

This commit is contained in:
2026-01-15 20:58:56 -05:00
parent 84c8bcf92c
commit f5df1a9ac0
40 changed files with 9145 additions and 458 deletions

View File

@@ -14,6 +14,51 @@ local AUTH_URL = "https://api.github.com/copilot_internal/v2/token"
---@field github_token table|nil
M.state = nil
--- Track if we've already suggested Ollama fallback this session
local ollama_fallback_suggested = false
--- Suggest switching to Ollama when rate limits are hit
---@param error_msg string The error message that triggered this
function M.suggest_ollama_fallback(error_msg)
if ollama_fallback_suggested then
return
end
-- Check if Ollama is available
local ollama_available = false
vim.fn.jobstart({ "curl", "-s", "http://localhost:11434/api/tags" }, {
on_exit = function(_, code)
if code == 0 then
ollama_available = true
end
vim.schedule(function()
if ollama_available then
-- Switch to Ollama automatically
local codetyper = require("codetyper")
local config = codetyper.get_config()
config.llm.provider = "ollama"
ollama_fallback_suggested = true
utils.notify(
"⚠️ Copilot rate limit reached. Switched to Ollama automatically.\n"
.. "Original error: "
.. error_msg:sub(1, 100),
vim.log.levels.WARN
)
else
utils.notify(
"⚠️ Copilot rate limit reached. Ollama not available.\n"
.. "Start Ollama with: ollama serve\n"
.. "Or wait for Copilot limits to reset.",
vim.log.levels.WARN
)
end
end)
end,
})
end
--- Get OAuth token from copilot.lua or copilot.vim config
---@return string|nil OAuth token
local function get_oauth_token()
@@ -51,9 +96,16 @@ local function get_oauth_token()
return nil
end
--- Get model from config
--- Get model from stored credentials or config
---@return string Model name
local function get_model()
-- Priority: stored credentials > config
local credentials = require("codetyper.credentials")
local stored_model = credentials.get_model("copilot")
if stored_model then
return stored_model
end
local codetyper = require("codetyper")
local config = codetyper.get_config()
return config.llm.copilot.model
@@ -204,15 +256,37 @@ local function make_request(token, body, callback)
local ok, response = pcall(vim.json.decode, response_text)
if not ok then
-- Show the actual response text as the error (truncated if too long)
local error_msg = response_text
if #error_msg > 200 then
error_msg = error_msg:sub(1, 200) .. "..."
end
-- Clean up common patterns
if response_text:match("<!DOCTYPE") or response_text:match("<html") then
error_msg = "Copilot API returned HTML error page. Service may be unavailable."
end
-- Check for rate limit and suggest Ollama fallback
if response_text:match("limit") or response_text:match("Upgrade") or response_text:match("quota") then
M.suggest_ollama_fallback(error_msg)
end
vim.schedule(function()
callback(nil, "Failed to parse Copilot response", nil)
callback(nil, error_msg, nil)
end)
return
end
if response.error then
local error_msg = response.error.message or "Copilot API error"
if response.error.code == "rate_limit_exceeded" or (error_msg:match("limit") and error_msg:match("plan")) then
error_msg = "Copilot rate limit: " .. error_msg
M.suggest_ollama_fallback(error_msg)
end
vim.schedule(function()
callback(nil, response.error.message or "Copilot API error", nil)
callback(nil, error_msg, nil)
end)
return
end
@@ -220,6 +294,17 @@ local function make_request(token, body, callback)
-- Extract usage info
local usage = response.usage or {}
-- Record usage for cost tracking
if usage.prompt_tokens or usage.completion_tokens then
local cost = require("codetyper.cost")
cost.record_usage(
get_model(),
usage.prompt_tokens or 0,
usage.completion_tokens or 0,
usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens or 0
)
end
if response.choices and response.choices[1] and response.choices[1].message then
local code = llm.extract_code(response.choices[1].message.content)
vim.schedule(function()
@@ -362,20 +447,46 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
-- Format messages for Copilot (OpenAI-compatible format)
local copilot_messages = { { role = "system", content = system_prompt } }
for _, msg in ipairs(messages) do
if type(msg.content) == "string" then
table.insert(copilot_messages, { role = msg.role, content = msg.content })
elseif type(msg.content) == "table" then
local text_parts = {}
for _, part in ipairs(msg.content) do
if part.type == "tool_result" then
table.insert(text_parts, "[" .. (part.name or "tool") .. " result]: " .. (part.content or ""))
elseif part.type == "text" then
table.insert(text_parts, part.text or "")
if msg.role == "user" then
-- User messages - handle string or table content
if type(msg.content) == "string" then
table.insert(copilot_messages, { role = "user", content = msg.content })
elseif type(msg.content) == "table" then
-- Handle complex content (like tool results from user perspective)
local text_parts = {}
for _, part in ipairs(msg.content) do
if part.type == "tool_result" then
table.insert(text_parts, "[" .. (part.name or "tool") .. " result]: " .. (part.content or ""))
elseif part.type == "text" then
table.insert(text_parts, part.text or "")
end
end
if #text_parts > 0 then
table.insert(copilot_messages, { role = "user", content = table.concat(text_parts, "\n") })
end
end
if #text_parts > 0 then
table.insert(copilot_messages, { role = msg.role, content = table.concat(text_parts, "\n") })
elseif msg.role == "assistant" then
-- Assistant messages - must preserve tool_calls if present
local assistant_msg = {
role = "assistant",
content = type(msg.content) == "string" and msg.content or nil,
}
-- Preserve tool_calls for the API
if msg.tool_calls then
assistant_msg.tool_calls = msg.tool_calls
-- Ensure content is not nil when tool_calls present
if assistant_msg.content == nil then
assistant_msg.content = ""
end
end
table.insert(copilot_messages, assistant_msg)
elseif msg.role == "tool" then
-- Tool result messages - must have tool_call_id
table.insert(copilot_messages, {
role = "tool",
tool_call_id = msg.tool_call_id,
content = type(msg.content) == "string" and msg.content or vim.json.encode(msg.content),
})
end
end
@@ -396,6 +507,20 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
logs.debug(string.format("Estimated prompt: ~%d tokens", prompt_estimate))
logs.thinking("Sending to Copilot API...")
-- Log request to debug file
local debug_log_path = vim.fn.expand("~/.local/codetyper-debug.log")
local debug_f = io.open(debug_log_path, "a")
if debug_f then
debug_f:write(os.date("[%Y-%m-%d %H:%M:%S] ") .. "COPILOT REQUEST\n")
debug_f:write("Messages count: " .. #copilot_messages .. "\n")
for i, m in ipairs(copilot_messages) do
debug_f:write(string.format(" [%d] role=%s, has_tool_calls=%s, has_tool_call_id=%s\n",
i, m.role, tostring(m.tool_calls ~= nil), tostring(m.tool_call_id ~= nil)))
end
debug_f:write("---\n")
debug_f:close()
end
local headers = build_headers(token)
local cmd = {
"curl",
@@ -413,35 +538,97 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
table.insert(cmd, "-d")
table.insert(cmd, json_body)
-- Debug logging helper
local function debug_log(msg, data)
local log_path = vim.fn.expand("~/.local/codetyper-debug.log")
local f = io.open(log_path, "a")
if f then
f:write(os.date("[%Y-%m-%d %H:%M:%S] ") .. msg .. "\n")
if data then
f:write("DATA: " .. tostring(data):sub(1, 2000) .. "\n")
end
f:write("---\n")
f:close()
end
end
-- Prevent double callback calls
local callback_called = false
vim.fn.jobstart(cmd, {
stdout_buffered = true,
on_stdout = function(_, data)
if callback_called then
debug_log("on_stdout: callback already called, skipping")
return
end
if not data or #data == 0 or (data[1] == "" and #data == 1) then
debug_log("on_stdout: empty data")
return
end
local response_text = table.concat(data, "\n")
debug_log("on_stdout: received response", response_text)
local ok, response = pcall(vim.json.decode, response_text)
if not ok then
debug_log("JSON parse failed", response_text)
callback_called = true
-- Show the actual response text as the error (truncated if too long)
local error_msg = response_text
if #error_msg > 200 then
error_msg = error_msg:sub(1, 200) .. "..."
end
-- Clean up common patterns
if response_text:match("<!DOCTYPE") or response_text:match("<html") then
error_msg = "Copilot API returned HTML error page. Service may be unavailable."
end
-- Check for rate limit and suggest Ollama fallback
if response_text:match("limit") or response_text:match("Upgrade") or response_text:match("quota") then
M.suggest_ollama_fallback(error_msg)
end
vim.schedule(function()
logs.error("Failed to parse Copilot response")
callback(nil, "Failed to parse Copilot response")
logs.error(error_msg)
callback(nil, error_msg)
end)
return
end
if response.error then
callback_called = true
local error_msg = response.error.message or "Copilot API error"
-- Check for rate limit in structured error
if response.error.code == "rate_limit_exceeded" or (error_msg:match("limit") and error_msg:match("plan")) then
error_msg = "Copilot rate limit: " .. error_msg
M.suggest_ollama_fallback(error_msg)
end
vim.schedule(function()
logs.error(response.error.message or "Copilot API error")
callback(nil, response.error.message or "Copilot API error")
logs.error(error_msg)
callback(nil, error_msg)
end)
return
end
-- Log token usage
-- Log token usage and record cost
if response.usage then
logs.response(response.usage.prompt_tokens or 0, response.usage.completion_tokens or 0, "stop")
-- Record usage for cost tracking
local cost_tracker = require("codetyper.cost")
cost_tracker.record_usage(
get_model(),
response.usage.prompt_tokens or 0,
response.usage.completion_tokens or 0,
response.usage.prompt_tokens_details and response.usage.prompt_tokens_details.cached_tokens or 0
)
end
-- Convert to Claude-like format for parser compatibility
@@ -474,12 +661,19 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
end
end
callback_called = true
debug_log("on_stdout: success, calling callback")
vim.schedule(function()
callback(converted, nil)
end)
end,
on_stderr = function(_, data)
if callback_called then
return
end
if data and #data > 0 and data[1] ~= "" then
debug_log("on_stderr", table.concat(data, "\n"))
callback_called = true
vim.schedule(function()
logs.error("Copilot API request failed: " .. table.concat(data, "\n"))
callback(nil, "Copilot API request failed: " .. table.concat(data, "\n"))
@@ -487,7 +681,12 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
end
end,
on_exit = function(_, code)
debug_log("on_exit: code=" .. code .. ", callback_called=" .. tostring(callback_called))
if callback_called then
return
end
if code ~= 0 then
callback_called = true
vim.schedule(function()
logs.error("Copilot API request failed with code: " .. code)
callback(nil, "Copilot API request failed with code: " .. code)

View File

@@ -8,17 +8,31 @@ local llm = require("codetyper.llm")
--- Gemini API endpoint
local API_URL = "https://generativelanguage.googleapis.com/v1beta/models"
--- Get API key from config or environment
--- Get API key from stored credentials, config, or environment
---@return string|nil API key
local function get_api_key()
-- Priority: stored credentials > config > environment
local credentials = require("codetyper.credentials")
local stored_key = credentials.get_api_key("gemini")
if stored_key then
return stored_key
end
local codetyper = require("codetyper")
local config = codetyper.get_config()
return config.llm.gemini.api_key or vim.env.GEMINI_API_KEY
end
--- Get model from config
--- Get model from stored credentials or config
---@return string Model name
local function get_model()
-- Priority: stored credentials > config
local credentials = require("codetyper.credentials")
local stored_model = credentials.get_model("gemini")
if stored_model then
return stored_model
end
local codetyper = require("codetyper")
local config = codetyper.get_config()
return config.llm.gemini.model

View File

@@ -32,6 +32,32 @@ function M.generate(prompt, context, callback)
client.generate(prompt, context, callback)
end
--- Smart generate with automatic provider selection based on brain memories
--- Prefers Ollama when context is rich, falls back to Copilot otherwise.
--- Implements verification pondering to reinforce Ollama accuracy over time.
---@param prompt string The user's prompt
---@param context table Context information
---@param callback fun(response: string|nil, error: string|nil, metadata: table|nil) Callback
function M.smart_generate(prompt, context, callback)
local selector = require("codetyper.llm.selector")
selector.smart_generate(prompt, context, callback)
end
--- Get accuracy statistics for providers
---@return table Statistics for each provider
function M.get_accuracy_stats()
local selector = require("codetyper.llm.selector")
return selector.get_accuracy_stats()
end
--- Report user feedback on response quality (for reinforcement learning)
---@param provider string Which provider generated the response
---@param was_correct boolean Whether the response was good
function M.report_feedback(provider, was_correct)
local selector = require("codetyper.llm.selector")
selector.report_feedback(provider, was_correct)
end
--- Build the system prompt for code generation
---@param context table Context information
---@return string System prompt

View File

@@ -5,21 +5,33 @@ local M = {}
local utils = require("codetyper.utils")
local llm = require("codetyper.llm")
--- Get Ollama host from config
--- Get Ollama host from stored credentials or config
---@return string Host URL
local function get_host()
-- Priority: stored credentials > config
local credentials = require("codetyper.credentials")
local stored_host = credentials.get_ollama_host()
if stored_host then
return stored_host
end
local codetyper = require("codetyper")
local config = codetyper.get_config()
return config.llm.ollama.host
end
--- Get model from config
--- Get model from stored credentials or config
---@return string Model name
local function get_model()
-- Priority: stored credentials > config
local credentials = require("codetyper.credentials")
local stored_model = credentials.get_model("ollama")
if stored_model then
return stored_model
end
local codetyper = require("codetyper")
local config = codetyper.get_config()
return config.llm.ollama.model
end
@@ -199,47 +211,41 @@ function M.validate()
return true
end
--- Build system prompt for agent mode with tool instructions
--- Generate with tool use support for agentic mode (text-based tool calling)
---@param messages table[] Conversation history
---@param context table Context information
---@return string System prompt
local function build_agent_system_prompt(context)
---@param tool_definitions table Tool definitions
---@param callback fun(response: table|nil, error: string|nil) Callback with Claude-like response format
function M.generate_with_tools(messages, context, tool_definitions, callback)
local logs = require("codetyper.agent.logs")
local agent_prompts = require("codetyper.prompts.agent")
local tools_module = require("codetyper.agent.tools")
local system_prompt = agent_prompts.system .. "\n\n"
system_prompt = system_prompt .. tools_module.to_prompt_format() .. "\n\n"
system_prompt = system_prompt .. agent_prompts.tool_instructions
logs.request("ollama", get_model())
logs.thinking("Preparing agent request...")
-- Add context about current file if available
if context.file_path then
system_prompt = system_prompt .. "\n\nCurrent working context:\n"
system_prompt = system_prompt .. "- File: " .. context.file_path .. "\n"
if context.language then
system_prompt = system_prompt .. "- Language: " .. context.language .. "\n"
-- Build system prompt with tool instructions
local system_prompt = llm.build_system_prompt(context)
system_prompt = system_prompt .. "\n\n" .. agent_prompts.system
system_prompt = system_prompt .. "\n\n" .. agent_prompts.tool_instructions
-- Add tool descriptions
system_prompt = system_prompt .. "\n\n## Available Tools\n"
system_prompt = system_prompt .. "Call tools by outputting JSON in this exact format:\n"
system_prompt = system_prompt .. '```json\n{"tool": "tool_name", "arguments": {...}}\n```\n\n'
for _, tool in ipairs(tool_definitions) do
local name = tool.name or (tool["function"] and tool["function"].name)
local desc = tool.description or (tool["function"] and tool["function"].description)
if name then
system_prompt = system_prompt .. string.format("### %s\n%s\n\n", name, desc or "")
end
end
-- Add project root info
local root = utils.get_project_root()
if root then
system_prompt = system_prompt .. "- Project root: " .. root .. "\n"
end
return system_prompt
end
--- Build request body for Ollama API with tools (chat format)
---@param messages table[] Conversation messages
---@param context table Context information
---@return table Request body
local function build_tools_request_body(messages, context)
local system_prompt = build_agent_system_prompt(context)
-- Convert messages to Ollama chat format
local ollama_messages = {}
for _, msg in ipairs(messages) do
local content = msg.content
-- Handle complex content (like tool results)
if type(content) == "table" then
local text_parts = {}
for _, part in ipairs(content) do
@@ -251,14 +257,10 @@ local function build_tools_request_body(messages, context)
end
content = table.concat(text_parts, "\n")
end
table.insert(ollama_messages, {
role = msg.role,
content = content,
})
table.insert(ollama_messages, { role = msg.role, content = content })
end
return {
local body = {
model = get_model(),
messages = ollama_messages,
system = system_prompt,
@@ -268,16 +270,15 @@ local function build_tools_request_body(messages, context)
num_predict = 4096,
},
}
end
--- Make HTTP request to Ollama chat API
---@param body table Request body
---@param callback fun(response: string|nil, error: string|nil, usage: table|nil) Callback function
local function make_chat_request(body, callback)
local host = get_host()
local url = host .. "/api/chat"
local json_body = vim.json.encode(body)
local prompt_estimate = logs.estimate_tokens(json_body)
logs.debug(string.format("Estimated prompt: ~%d tokens", prompt_estimate))
logs.thinking("Sending to Ollama API...")
local cmd = {
"curl",
"-s",
@@ -302,196 +303,82 @@ local function make_chat_request(body, callback)
if not ok then
vim.schedule(function()
callback(nil, "Failed to parse Ollama response", nil)
logs.error("Failed to parse Ollama response")
callback(nil, "Failed to parse Ollama response")
end)
return
end
if response.error then
vim.schedule(function()
callback(nil, response.error or "Ollama API error", nil)
logs.error(response.error or "Ollama API error")
callback(nil, response.error or "Ollama API error")
end)
return
end
-- Extract usage info
local usage = {
prompt_tokens = response.prompt_eval_count or 0,
response_tokens = response.eval_count or 0,
}
-- Log token usage and record cost (Ollama is free but we track usage)
if response.prompt_eval_count or response.eval_count then
logs.response(response.prompt_eval_count or 0, response.eval_count or 0, "stop")
-- Return the message content for agent parsing
if response.message and response.message.content then
vim.schedule(function()
callback(response.message.content, nil, usage)
end)
else
vim.schedule(function()
callback(nil, "No response from Ollama", nil)
end)
-- Record usage for cost tracking (free for local models)
local cost = require("codetyper.cost")
cost.record_usage(
get_model(),
response.prompt_eval_count or 0,
response.eval_count or 0,
0 -- No cached tokens for Ollama
)
end
-- Parse the response text for tool calls
local content_text = response.message and response.message.content or ""
local converted = { content = {}, stop_reason = "end_turn" }
-- Try to extract JSON tool calls from response
local json_match = content_text:match("```json%s*(%b{})%s*```")
if json_match then
local ok_json, parsed = pcall(vim.json.decode, json_match)
if ok_json and parsed.tool then
table.insert(converted.content, {
type = "tool_use",
id = "call_" .. string.format("%x", os.time()) .. "_" .. string.format("%x", math.random(0, 0xFFFF)),
name = parsed.tool,
input = parsed.arguments or {},
})
logs.thinking("Tool call: " .. parsed.tool)
content_text = content_text:gsub("```json.-```", ""):gsub("^%s+", ""):gsub("%s+$", "")
converted.stop_reason = "tool_use"
end
end
-- Add text content
if content_text and content_text ~= "" then
table.insert(converted.content, 1, { type = "text", text = content_text })
logs.thinking("Response contains text")
end
vim.schedule(function()
callback(converted, nil)
end)
end,
on_stderr = function(_, data)
if data and #data > 0 and data[1] ~= "" then
vim.schedule(function()
callback(nil, "Ollama API request failed: " .. table.concat(data, "\n"), nil)
logs.error("Ollama API request failed: " .. table.concat(data, "\n"))
callback(nil, "Ollama API request failed: " .. table.concat(data, "\n"))
end)
end
end,
on_exit = function(_, code)
if code ~= 0 then
-- Don't double-report errors
vim.schedule(function()
logs.error("Ollama API request failed with code: " .. code)
callback(nil, "Ollama API request failed with code: " .. code)
end)
end
end,
})
end
--- Generate response with tools using Ollama API
---@param messages table[] Conversation history
---@param context table Context information
---@param tools table Tool definitions (embedded in prompt for Ollama)
---@param callback fun(response: string|nil, error: string|nil) Callback function
function M.generate_with_tools(messages, context, tools, callback)
local logs = require("codetyper.agent.logs")
-- Log the request
local model = get_model()
logs.request("ollama", model)
logs.thinking("Preparing API request...")
local body = build_tools_request_body(messages, context)
-- Estimate prompt tokens
local prompt_estimate = logs.estimate_tokens(vim.json.encode(body))
logs.debug(string.format("Estimated prompt: ~%d tokens", prompt_estimate))
make_chat_request(body, function(response, err, usage)
if err then
logs.error(err)
callback(nil, err)
else
-- Log token usage
if usage then
logs.response(usage.prompt_tokens or 0, usage.response_tokens or 0, "end_turn")
end
-- Log if response contains tool calls
if response then
local parser = require("codetyper.agent.parser")
local parsed = parser.parse_ollama_response(response)
if #parsed.tool_calls > 0 then
for _, tc in ipairs(parsed.tool_calls) do
logs.thinking("Tool call: " .. tc.name)
end
end
if parsed.text and parsed.text ~= "" then
logs.thinking("Response contains text")
end
end
callback(response, nil)
end
end)
end
--- Generate with tool use support for agentic mode (simulated via prompts)
---@param messages table[] Conversation history
---@param context table Context information
---@param tool_definitions table Tool definitions
---@param callback fun(response: string|nil, error: string|nil) Callback with response text
function M.generate_with_tools(messages, context, tool_definitions, callback)
local tools_module = require("codetyper.agent.tools")
local agent_prompts = require("codetyper.prompts.agent")
-- Build system prompt with agent instructions and tool definitions
local system_prompt = llm.build_system_prompt(context)
system_prompt = system_prompt .. "\n\n" .. agent_prompts.system
system_prompt = system_prompt .. "\n\n" .. tools_module.to_prompt_format()
-- Flatten messages to single prompt (Ollama's generate API)
local prompt_parts = {}
for _, msg in ipairs(messages) do
if type(msg.content) == "string" then
local role_prefix = msg.role == "user" and "User" or "Assistant"
table.insert(prompt_parts, role_prefix .. ": " .. msg.content)
elseif type(msg.content) == "table" then
-- Handle tool results
for _, item in ipairs(msg.content) do
if item.type == "tool_result" then
table.insert(prompt_parts, "Tool result: " .. item.content)
end
end
end
end
local body = {
model = get_model(),
system = system_prompt,
prompt = table.concat(prompt_parts, "\n\n"),
stream = false,
options = {
temperature = 0.2,
num_predict = 4096,
},
}
local host = get_host()
local url = host .. "/api/generate"
local json_body = vim.json.encode(body)
local cmd = {
"curl",
"-s",
"-X", "POST",
url,
"-H", "Content-Type: application/json",
"-d", json_body,
}
vim.fn.jobstart(cmd, {
stdout_buffered = true,
on_stdout = function(_, data)
if not data or #data == 0 or (data[1] == "" and #data == 1) then
return
end
local response_text = table.concat(data, "\n")
local ok, response = pcall(vim.json.decode, response_text)
if not ok then
vim.schedule(function()
callback(nil, "Failed to parse Ollama response")
end)
return
end
if response.error then
vim.schedule(function()
callback(nil, response.error or "Ollama API error")
end)
return
end
-- Return raw response text for parser to handle
vim.schedule(function()
callback(response.response or "", nil)
end)
end,
on_stderr = function(_, data)
if data and #data > 0 and data[1] ~= "" then
vim.schedule(function()
callback(nil, "Ollama API request failed: " .. table.concat(data, "\n"))
end)
end
end,
on_exit = function(_, code)
if code ~= 0 then
vim.schedule(function()
callback(nil, "Ollama API request failed with code: " .. code)
end)
end
end,
})
end
return M

View File

@@ -8,25 +8,46 @@ local llm = require("codetyper.llm")
--- OpenAI API endpoint
local API_URL = "https://api.openai.com/v1/chat/completions"
--- Get API key from config or environment
--- Get API key from stored credentials, config, or environment
---@return string|nil API key
local function get_api_key()
-- Priority: stored credentials > config > environment
local credentials = require("codetyper.credentials")
local stored_key = credentials.get_api_key("openai")
if stored_key then
return stored_key
end
local codetyper = require("codetyper")
local config = codetyper.get_config()
return config.llm.openai.api_key or vim.env.OPENAI_API_KEY
end
--- Get model from config
--- Get model from stored credentials or config
---@return string Model name
local function get_model()
-- Priority: stored credentials > config
local credentials = require("codetyper.credentials")
local stored_model = credentials.get_model("openai")
if stored_model then
return stored_model
end
local codetyper = require("codetyper")
local config = codetyper.get_config()
return config.llm.openai.model
end
--- Get endpoint from config (allows custom endpoints like Azure, OpenRouter)
--- Get endpoint from stored credentials or config (allows custom endpoints like Azure, OpenRouter)
---@return string API endpoint
local function get_endpoint()
-- Priority: stored credentials > config > default
local credentials = require("codetyper.credentials")
local stored_endpoint = credentials.get_endpoint("openai")
if stored_endpoint then
return stored_endpoint
end
local codetyper = require("codetyper")
local config = codetyper.get_config()
return config.llm.openai.endpoint or API_URL
@@ -284,9 +305,18 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
return
end
-- Log token usage
-- Log token usage and record cost
if response.usage then
logs.response(response.usage.prompt_tokens or 0, response.usage.completion_tokens or 0, "stop")
-- Record usage for cost tracking
local cost = require("codetyper.cost")
cost.record_usage(
model,
response.usage.prompt_tokens or 0,
response.usage.completion_tokens or 0,
response.usage.prompt_tokens_details and response.usage.prompt_tokens_details.cached_tokens or 0
)
end
-- Convert to Claude-like format for parser compatibility

View File

@@ -0,0 +1,514 @@
---@mod codetyper.llm.selector Smart LLM selection with memory-based confidence
---@brief [[
--- Intelligent LLM provider selection based on brain memories.
--- Prefers local Ollama when context is rich, falls back to Copilot otherwise.
--- Implements verification pondering to reinforce Ollama accuracy over time.
---@brief ]]
local M = {}
---@class SelectionResult
---@field provider string Selected provider name
---@field confidence number Confidence score (0-1)
---@field memory_count number Number of relevant memories found
---@field reason string Human-readable reason for selection
---@class PonderResult
---@field ollama_response string Ollama's response
---@field verifier_response string Verifier's response
---@field agreement_score number How much they agree (0-1)
---@field ollama_correct boolean Whether Ollama was deemed correct
---@field feedback string Feedback for learning
--- Minimum memories required for high confidence
local MIN_MEMORIES_FOR_LOCAL = 3
--- Minimum memory relevance score for local provider
local MIN_RELEVANCE_FOR_LOCAL = 0.6
--- Agreement threshold for Ollama verification
local AGREEMENT_THRESHOLD = 0.7
--- Pondering sample rate (0-1) - how often to verify Ollama
local PONDER_SAMPLE_RATE = 0.2
--- Provider accuracy tracking (persisted in brain)
local accuracy_cache = {
ollama = { correct = 0, total = 0 },
copilot = { correct = 0, total = 0 },
}
--- Get the brain module safely
---@return table|nil
local function get_brain()
local ok, brain = pcall(require, "codetyper.brain")
if ok and brain.is_initialized and brain.is_initialized() then
return brain
end
return nil
end
--- Load accuracy stats from brain
local function load_accuracy_stats()
local brain = get_brain()
if not brain then
return
end
-- Query for accuracy tracking nodes
pcall(function()
local result = brain.query({
query = "provider_accuracy_stats",
types = { "metric" },
limit = 1,
})
if result and result.nodes and #result.nodes > 0 then
local node = result.nodes[1]
if node.c and node.c.d then
local ok, stats = pcall(vim.json.decode, node.c.d)
if ok and stats then
accuracy_cache = stats
end
end
end
end)
end
--- Save accuracy stats to brain
local function save_accuracy_stats()
local brain = get_brain()
if not brain then
return
end
pcall(function()
brain.learn({
type = "metric",
summary = "provider_accuracy_stats",
detail = vim.json.encode(accuracy_cache),
weight = 1.0,
})
end)
end
--- Calculate Ollama confidence based on historical accuracy
---@return number confidence (0-1)
local function get_ollama_historical_confidence()
local stats = accuracy_cache.ollama
if stats.total < 5 then
-- Not enough data, return neutral confidence
return 0.5
end
local accuracy = stats.correct / stats.total
-- Boost confidence if accuracy is high
return math.min(1.0, accuracy * 1.2)
end
--- Query brain for relevant context
---@param prompt string User prompt
---@param file_path string|nil Current file path
---@return table result {memories: table[], relevance: number, count: number}
local function query_brain_context(prompt, file_path)
local result = {
memories = {},
relevance = 0,
count = 0,
}
local brain = get_brain()
if not brain then
return result
end
-- Query brain with multiple dimensions
local ok, query_result = pcall(function()
return brain.query({
query = prompt,
file = file_path,
limit = 10,
types = { "pattern", "correction", "convention", "fact" },
})
end)
if not ok or not query_result then
return result
end
result.memories = query_result.nodes or {}
result.count = #result.memories
-- Calculate average relevance
if result.count > 0 then
local total_relevance = 0
for _, node in ipairs(result.memories) do
-- Use node weight and success rate as relevance indicators
local node_relevance = (node.sc and node.sc.w or 0.5) * (node.sc and node.sc.sr or 0.5)
total_relevance = total_relevance + node_relevance
end
result.relevance = total_relevance / result.count
end
return result
end
--- Select the best LLM provider based on context
---@param prompt string User prompt
---@param context table LLM context
---@return SelectionResult
function M.select_provider(prompt, context)
-- Load accuracy stats on first call
if accuracy_cache.ollama.total == 0 then
load_accuracy_stats()
end
local file_path = context.file_path
-- Query brain for relevant memories
local brain_context = query_brain_context(prompt, file_path)
-- Calculate base confidence from memories
local memory_confidence = 0
if brain_context.count >= MIN_MEMORIES_FOR_LOCAL then
memory_confidence = math.min(1.0, brain_context.count / 10) * brain_context.relevance
end
-- Factor in historical Ollama accuracy
local historical_confidence = get_ollama_historical_confidence()
-- Combined confidence score
local combined_confidence = (memory_confidence * 0.6) + (historical_confidence * 0.4)
-- Decision logic
local provider = "copilot" -- Default to more capable
local reason = ""
if brain_context.count >= MIN_MEMORIES_FOR_LOCAL and combined_confidence >= MIN_RELEVANCE_FOR_LOCAL then
provider = "ollama"
reason = string.format(
"Rich context: %d memories (%.1f%% relevance), historical accuracy: %.1f%%",
brain_context.count,
brain_context.relevance * 100,
historical_confidence * 100
)
elseif brain_context.count > 0 and combined_confidence >= 0.4 then
-- Medium confidence - use Ollama but with pondering
provider = "ollama"
reason = string.format(
"Moderate context: %d memories, will verify with pondering",
brain_context.count
)
else
reason = string.format(
"Insufficient context: %d memories (need %d), using capable provider",
brain_context.count,
MIN_MEMORIES_FOR_LOCAL
)
end
return {
provider = provider,
confidence = combined_confidence,
memory_count = brain_context.count,
reason = reason,
memories = brain_context.memories,
}
end
--- Check if we should ponder (verify) this Ollama response
---@param confidence number Current confidence level
---@return boolean
function M.should_ponder(confidence)
-- Always ponder when confidence is medium
if confidence >= 0.4 and confidence < 0.7 then
return true
end
-- Random sampling for high confidence to keep learning
if confidence >= 0.7 then
return math.random() < PONDER_SAMPLE_RATE
end
-- Low confidence shouldn't reach Ollama anyway
return false
end
--- Calculate agreement score between two responses
---@param response1 string First response
---@param response2 string Second response
---@return number Agreement score (0-1)
local function calculate_agreement(response1, response2)
-- Normalize responses
local norm1 = response1:lower():gsub("%s+", " "):gsub("[^%w%s]", "")
local norm2 = response2:lower():gsub("%s+", " "):gsub("[^%w%s]", "")
-- Extract words
local words1 = {}
for word in norm1:gmatch("%w+") do
words1[word] = (words1[word] or 0) + 1
end
local words2 = {}
for word in norm2:gmatch("%w+") do
words2[word] = (words2[word] or 0) + 1
end
-- Calculate Jaccard similarity
local intersection = 0
local union = 0
for word, count1 in pairs(words1) do
local count2 = words2[word] or 0
intersection = intersection + math.min(count1, count2)
union = union + math.max(count1, count2)
end
for word, count2 in pairs(words2) do
if not words1[word] then
union = union + count2
end
end
if union == 0 then
return 1.0 -- Both empty
end
-- Also check structural similarity (code structure)
local struct_score = 0
local function_count1 = select(2, response1:gsub("function", ""))
local function_count2 = select(2, response2:gsub("function", ""))
if function_count1 > 0 or function_count2 > 0 then
struct_score = 1 - math.abs(function_count1 - function_count2) / math.max(function_count1, function_count2, 1)
else
struct_score = 1.0
end
-- Combined score
local jaccard = intersection / union
return (jaccard * 0.7) + (struct_score * 0.3)
end
--- Ponder (verify) Ollama's response with another LLM
---@param prompt string Original prompt
---@param context table LLM context
---@param ollama_response string Ollama's response
---@param callback fun(result: PonderResult) Callback with pondering result
function M.ponder(prompt, context, ollama_response, callback)
-- Use Copilot as verifier
local copilot = require("codetyper.llm.copilot")
-- Build verification prompt
local verify_prompt = prompt
copilot.generate(verify_prompt, context, function(verifier_response, error)
if error or not verifier_response then
-- Verification failed, assume Ollama is correct
callback({
ollama_response = ollama_response,
verifier_response = "",
agreement_score = 1.0,
ollama_correct = true,
feedback = "Verification unavailable, trusting Ollama",
})
return
end
-- Calculate agreement
local agreement = calculate_agreement(ollama_response, verifier_response)
-- Determine if Ollama was correct
local ollama_correct = agreement >= AGREEMENT_THRESHOLD
-- Generate feedback
local feedback
if ollama_correct then
feedback = string.format("Agreement: %.1f%% - Ollama response validated", agreement * 100)
else
feedback = string.format(
"Disagreement: %.1f%% - Ollama may need correction",
(1 - agreement) * 100
)
end
-- Update accuracy tracking
accuracy_cache.ollama.total = accuracy_cache.ollama.total + 1
if ollama_correct then
accuracy_cache.ollama.correct = accuracy_cache.ollama.correct + 1
end
save_accuracy_stats()
-- Learn from this verification
local brain = get_brain()
if brain then
pcall(function()
if ollama_correct then
-- Reinforce the pattern
brain.learn({
type = "correction",
summary = "Ollama verified correct",
detail = string.format(
"Prompt: %s\nAgreement: %.1f%%",
prompt:sub(1, 100),
agreement * 100
),
weight = 0.8,
file = context.file_path,
})
else
-- Learn the correction
brain.learn({
type = "correction",
summary = "Ollama needed correction",
detail = string.format(
"Prompt: %s\nOllama: %s\nCorrect: %s",
prompt:sub(1, 100),
ollama_response:sub(1, 200),
verifier_response:sub(1, 200)
),
weight = 0.9,
file = context.file_path,
})
end
end)
end
callback({
ollama_response = ollama_response,
verifier_response = verifier_response,
agreement_score = agreement,
ollama_correct = ollama_correct,
feedback = feedback,
})
end)
end
--- Smart generate with automatic provider selection and pondering
---@param prompt string User prompt
---@param context table LLM context
---@param callback fun(response: string|nil, error: string|nil, metadata: table|nil) Callback
function M.smart_generate(prompt, context, callback)
-- Select provider
local selection = M.select_provider(prompt, context)
-- Log selection
pcall(function()
local logs = require("codetyper.agent.logs")
logs.add({
type = "info",
message = string.format(
"LLM: %s (confidence: %.1f%%, %s)",
selection.provider,
selection.confidence * 100,
selection.reason
),
})
end)
-- Get the selected client
local client
if selection.provider == "ollama" then
client = require("codetyper.llm.ollama")
else
client = require("codetyper.llm.copilot")
end
-- Generate response
client.generate(prompt, context, function(response, error)
if error then
-- Fallback on error
if selection.provider == "ollama" then
-- Try Copilot as fallback
local copilot = require("codetyper.llm.copilot")
copilot.generate(prompt, context, function(fallback_response, fallback_error)
callback(fallback_response, fallback_error, {
provider = "copilot",
fallback = true,
original_provider = "ollama",
original_error = error,
})
end)
return
end
callback(nil, error, { provider = selection.provider })
return
end
-- Check if we should ponder
if selection.provider == "ollama" and M.should_ponder(selection.confidence) then
M.ponder(prompt, context, response, function(ponder_result)
if ponder_result.ollama_correct then
-- Ollama was correct, use its response
callback(response, nil, {
provider = "ollama",
pondered = true,
agreement = ponder_result.agreement_score,
confidence = selection.confidence,
})
else
-- Use verifier's response instead
callback(ponder_result.verifier_response, nil, {
provider = "copilot",
pondered = true,
agreement = ponder_result.agreement_score,
original_provider = "ollama",
corrected = true,
})
end
end)
else
-- No pondering needed
callback(response, nil, {
provider = selection.provider,
pondered = false,
confidence = selection.confidence,
})
end
end)
end
--- Get current accuracy statistics
---@return table {ollama: {correct, total, accuracy}, copilot: {correct, total, accuracy}}
function M.get_accuracy_stats()
local stats = {
ollama = {
correct = accuracy_cache.ollama.correct,
total = accuracy_cache.ollama.total,
accuracy = accuracy_cache.ollama.total > 0
and (accuracy_cache.ollama.correct / accuracy_cache.ollama.total)
or 0,
},
copilot = {
correct = accuracy_cache.copilot.correct,
total = accuracy_cache.copilot.total,
accuracy = accuracy_cache.copilot.total > 0
and (accuracy_cache.copilot.correct / accuracy_cache.copilot.total)
or 0,
},
}
return stats
end
--- Reset accuracy statistics
function M.reset_accuracy_stats()
accuracy_cache = {
ollama = { correct = 0, total = 0 },
copilot = { correct = 0, total = 0 },
}
save_accuracy_stats()
end
--- Report user feedback on response quality
---@param provider string Which provider generated the response
---@param was_correct boolean Whether the response was good
function M.report_feedback(provider, was_correct)
if accuracy_cache[provider] then
accuracy_cache[provider].total = accuracy_cache[provider].total + 1
if was_correct then
accuracy_cache[provider].correct = accuracy_cache[provider].correct + 1
end
save_accuracy_stats()
end
end
return M