Adding more features

2026-01-15 20:58:56 -05:00
parent 84c8bcf92c
commit f5df1a9ac0
40 changed files with 9145 additions and 458 deletions
--- a/lua/codetyper/llm/copilot.lua
+++ b/lua/codetyper/llm/copilot.lua
@@ -14,6 +14,51 @@ local AUTH_URL = "https://api.github.com/copilot_internal/v2/token"
 ---@field github_token table|nil
 M.state = nil

+--- Track if we've already suggested Ollama fallback this session
+local ollama_fallback_suggested = false
+
+--- Suggest switching to Ollama when rate limits are hit
+---@param error_msg string The error message that triggered this
+function M.suggest_ollama_fallback(error_msg)
+	if ollama_fallback_suggested then
+		return
+	end
+
+	-- Check if Ollama is available
+	local ollama_available = false
+	vim.fn.jobstart({ "curl", "-s", "http://localhost:11434/api/tags" }, {
+		on_exit = function(_, code)
+			if code == 0 then
+				ollama_available = true
+			end
+
+			vim.schedule(function()
+				if ollama_available then
+					-- Switch to Ollama automatically
+					local codetyper = require("codetyper")
+					local config = codetyper.get_config()
+					config.llm.provider = "ollama"
+
+					ollama_fallback_suggested = true
+					utils.notify(
+						"⚠️ Copilot rate limit reached. Switched to Ollama automatically.\n"
+							.. "Original error: "
+							.. error_msg:sub(1, 100),
+						vim.log.levels.WARN
+					)
+				else
+					utils.notify(
+						"⚠️ Copilot rate limit reached. Ollama not available.\n"
+							.. "Start Ollama with: ollama serve\n"
+							.. "Or wait for Copilot limits to reset.",
+						vim.log.levels.WARN
+					)
+				end
+			end)
+		end,
+	})
+end
+
 --- Get OAuth token from copilot.lua or copilot.vim config
 ---@return string|nil OAuth token
 local function get_oauth_token()
@@ -51,9 +96,16 @@ local function get_oauth_token()
 	return nil
 end

--- Get model from config
+--- Get model from stored credentials or config
 ---@return string Model name
 local function get_model()
+	-- Priority: stored credentials > config
+	local credentials = require("codetyper.credentials")
+	local stored_model = credentials.get_model("copilot")
+	if stored_model then
+		return stored_model
+	end
+
 	local codetyper = require("codetyper")
 	local config = codetyper.get_config()
 	return config.llm.copilot.model
@@ -204,15 +256,37 @@ local function make_request(token, body, callback)
 			local ok, response = pcall(vim.json.decode, response_text)

 			if not ok then
+				-- Show the actual response text as the error (truncated if too long)
+				local error_msg = response_text
+				if #error_msg > 200 then
+					error_msg = error_msg:sub(1, 200) .. "..."
+				end
+
+				-- Clean up common patterns
+				if response_text:match("<!DOCTYPE") or response_text:match("<html") then
+					error_msg = "Copilot API returned HTML error page. Service may be unavailable."
+				end
+
+				-- Check for rate limit and suggest Ollama fallback
+				if response_text:match("limit") or response_text:match("Upgrade") or response_text:match("quota") then
+					M.suggest_ollama_fallback(error_msg)
+				end
+
 				vim.schedule(function()
-					callback(nil, "Failed to parse Copilot response", nil)
+					callback(nil, error_msg, nil)
 				end)
 				return
 			end

 			if response.error then
+				local error_msg = response.error.message or "Copilot API error"
+				if response.error.code == "rate_limit_exceeded" or (error_msg:match("limit") and error_msg:match("plan")) then
+					error_msg = "Copilot rate limit: " .. error_msg
+					M.suggest_ollama_fallback(error_msg)
+				end
+
 				vim.schedule(function()
-					callback(nil, response.error.message or "Copilot API error", nil)
+					callback(nil, error_msg, nil)
 				end)
 				return
 			end
@@ -220,6 +294,17 @@ local function make_request(token, body, callback)
 			-- Extract usage info
 			local usage = response.usage or {}

+			-- Record usage for cost tracking
+			if usage.prompt_tokens or usage.completion_tokens then
+				local cost = require("codetyper.cost")
+				cost.record_usage(
+					get_model(),
+					usage.prompt_tokens or 0,
+					usage.completion_tokens or 0,
+					usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens or 0
+				)
+			end
+
 			if response.choices and response.choices[1] and response.choices[1].message then
 				local code = llm.extract_code(response.choices[1].message.content)
 				vim.schedule(function()
@@ -362,20 +447,46 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
 		-- Format messages for Copilot (OpenAI-compatible format)
 		local copilot_messages = { { role = "system", content = system_prompt } }
 		for _, msg in ipairs(messages) do
-			if type(msg.content) == "string" then
-				table.insert(copilot_messages, { role = msg.role, content = msg.content })
-			elseif type(msg.content) == "table" then
-				local text_parts = {}
-				for _, part in ipairs(msg.content) do
-					if part.type == "tool_result" then
-						table.insert(text_parts, "[" .. (part.name or "tool") .. " result]: " .. (part.content or ""))
-					elseif part.type == "text" then
-						table.insert(text_parts, part.text or "")
+			if msg.role == "user" then
+				-- User messages - handle string or table content
+				if type(msg.content) == "string" then
+					table.insert(copilot_messages, { role = "user", content = msg.content })
+				elseif type(msg.content) == "table" then
+					-- Handle complex content (like tool results from user perspective)
+					local text_parts = {}
+					for _, part in ipairs(msg.content) do
+						if part.type == "tool_result" then
+							table.insert(text_parts, "[" .. (part.name or "tool") .. " result]: " .. (part.content or ""))
+						elseif part.type == "text" then
+							table.insert(text_parts, part.text or "")
+						end
+					end
+					if #text_parts > 0 then
+						table.insert(copilot_messages, { role = "user", content = table.concat(text_parts, "\n") })
 					end
 				end
-				if #text_parts > 0 then
-					table.insert(copilot_messages, { role = msg.role, content = table.concat(text_parts, "\n") })
+			elseif msg.role == "assistant" then
+				-- Assistant messages - must preserve tool_calls if present
+				local assistant_msg = {
+					role = "assistant",
+					content = type(msg.content) == "string" and msg.content or nil,
+				}
+				-- Preserve tool_calls for the API
+				if msg.tool_calls then
+					assistant_msg.tool_calls = msg.tool_calls
+					-- Ensure content is not nil when tool_calls present
+					if assistant_msg.content == nil then
+						assistant_msg.content = ""
+					end
 				end
+				table.insert(copilot_messages, assistant_msg)
+			elseif msg.role == "tool" then
+				-- Tool result messages - must have tool_call_id
+				table.insert(copilot_messages, {
+					role = "tool",
+					tool_call_id = msg.tool_call_id,
+					content = type(msg.content) == "string" and msg.content or vim.json.encode(msg.content),
+				})
 			end
 		end

@@ -396,6 +507,20 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
 		logs.debug(string.format("Estimated prompt: ~%d tokens", prompt_estimate))
 		logs.thinking("Sending to Copilot API...")

+		-- Log request to debug file
+		local debug_log_path = vim.fn.expand("~/.local/codetyper-debug.log")
+		local debug_f = io.open(debug_log_path, "a")
+		if debug_f then
+			debug_f:write(os.date("[%Y-%m-%d %H:%M:%S] ") .. "COPILOT REQUEST\n")
+			debug_f:write("Messages count: " .. #copilot_messages .. "\n")
+			for i, m in ipairs(copilot_messages) do
+				debug_f:write(string.format("  [%d] role=%s, has_tool_calls=%s, has_tool_call_id=%s\n",
+					i, m.role, tostring(m.tool_calls ~= nil), tostring(m.tool_call_id ~= nil)))
+			end
+			debug_f:write("---\n")
+			debug_f:close()
+		end
+
 		local headers = build_headers(token)
 		local cmd = {
 			"curl",
@@ -413,35 +538,97 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
 		table.insert(cmd, "-d")
 		table.insert(cmd, json_body)

+		-- Debug logging helper
+		local function debug_log(msg, data)
+			local log_path = vim.fn.expand("~/.local/codetyper-debug.log")
+			local f = io.open(log_path, "a")
+			if f then
+				f:write(os.date("[%Y-%m-%d %H:%M:%S] ") .. msg .. "\n")
+				if data then
+					f:write("DATA: " .. tostring(data):sub(1, 2000) .. "\n")
+				end
+				f:write("---\n")
+				f:close()
+			end
+		end
+
+		-- Prevent double callback calls
+		local callback_called = false
+
 		vim.fn.jobstart(cmd, {
 			stdout_buffered = true,
 			on_stdout = function(_, data)
+				if callback_called then
+					debug_log("on_stdout: callback already called, skipping")
+					return
+				end
+
 				if not data or #data == 0 or (data[1] == "" and #data == 1) then
+					debug_log("on_stdout: empty data")
 					return
 				end

 				local response_text = table.concat(data, "\n")
+				debug_log("on_stdout: received response", response_text)
+
 				local ok, response = pcall(vim.json.decode, response_text)

 				if not ok then
+					debug_log("JSON parse failed", response_text)
+					callback_called = true
+
+					-- Show the actual response text as the error (truncated if too long)
+					local error_msg = response_text
+					if #error_msg > 200 then
+						error_msg = error_msg:sub(1, 200) .. "..."
+					end
+
+					-- Clean up common patterns
+					if response_text:match("<!DOCTYPE") or response_text:match("<html") then
+						error_msg = "Copilot API returned HTML error page. Service may be unavailable."
+					end
+
+					-- Check for rate limit and suggest Ollama fallback
+					if response_text:match("limit") or response_text:match("Upgrade") or response_text:match("quota") then
+						M.suggest_ollama_fallback(error_msg)
+					end
+
 					vim.schedule(function()
-						logs.error("Failed to parse Copilot response")
-						callback(nil, "Failed to parse Copilot response")
+						logs.error(error_msg)
+						callback(nil, error_msg)
 					end)
 					return
 				end

 				if response.error then
+					callback_called = true
+					local error_msg = response.error.message or "Copilot API error"
+
+					-- Check for rate limit in structured error
+					if response.error.code == "rate_limit_exceeded" or (error_msg:match("limit") and error_msg:match("plan")) then
+						error_msg = "Copilot rate limit: " .. error_msg
+						M.suggest_ollama_fallback(error_msg)
+					end
+
 					vim.schedule(function()
-						logs.error(response.error.message or "Copilot API error")
-						callback(nil, response.error.message or "Copilot API error")
+						logs.error(error_msg)
+						callback(nil, error_msg)
 					end)
 					return
 				end

-				-- Log token usage
+				-- Log token usage and record cost
 				if response.usage then
 					logs.response(response.usage.prompt_tokens or 0, response.usage.completion_tokens or 0, "stop")
+
+					-- Record usage for cost tracking
+					local cost_tracker = require("codetyper.cost")
+					cost_tracker.record_usage(
+						get_model(),
+						response.usage.prompt_tokens or 0,
+						response.usage.completion_tokens or 0,
+						response.usage.prompt_tokens_details and response.usage.prompt_tokens_details.cached_tokens or 0
+					)
 				end

 				-- Convert to Claude-like format for parser compatibility
@@ -474,12 +661,19 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
 					end
 				end

+				callback_called = true
+				debug_log("on_stdout: success, calling callback")
 				vim.schedule(function()
 					callback(converted, nil)
 				end)
 			end,
 			on_stderr = function(_, data)
+				if callback_called then
+					return
+				end
 				if data and #data > 0 and data[1] ~= "" then
+					debug_log("on_stderr", table.concat(data, "\n"))
+					callback_called = true
 					vim.schedule(function()
 						logs.error("Copilot API request failed: " .. table.concat(data, "\n"))
 						callback(nil, "Copilot API request failed: " .. table.concat(data, "\n"))
@@ -487,7 +681,12 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
 				end
 			end,
 			on_exit = function(_, code)
+				debug_log("on_exit: code=" .. code .. ", callback_called=" .. tostring(callback_called))
+				if callback_called then
+					return
+				end
 				if code ~= 0 then
+					callback_called = true
 					vim.schedule(function()
 						logs.error("Copilot API request failed with code: " .. code)
 						callback(nil, "Copilot API request failed with code: " .. code)
--- a/lua/codetyper/llm/gemini.lua
+++ b/lua/codetyper/llm/gemini.lua
@@ -8,17 +8,31 @@ local llm = require("codetyper.llm")
 --- Gemini API endpoint
 local API_URL = "https://generativelanguage.googleapis.com/v1beta/models"

--- Get API key from config or environment
+--- Get API key from stored credentials, config, or environment
 ---@return string|nil API key
 local function get_api_key()
+	-- Priority: stored credentials > config > environment
+	local credentials = require("codetyper.credentials")
+	local stored_key = credentials.get_api_key("gemini")
+	if stored_key then
+		return stored_key
+	end
+
 	local codetyper = require("codetyper")
 	local config = codetyper.get_config()
 	return config.llm.gemini.api_key or vim.env.GEMINI_API_KEY
 end

--- Get model from config
+--- Get model from stored credentials or config
 ---@return string Model name
 local function get_model()
+	-- Priority: stored credentials > config
+	local credentials = require("codetyper.credentials")
+	local stored_model = credentials.get_model("gemini")
+	if stored_model then
+		return stored_model
+	end
+
 	local codetyper = require("codetyper")
 	local config = codetyper.get_config()
 	return config.llm.gemini.model
--- a/lua/codetyper/llm/init.lua
+++ b/lua/codetyper/llm/init.lua
@@ -32,6 +32,32 @@ function M.generate(prompt, context, callback)
 	client.generate(prompt, context, callback)
 end

+--- Smart generate with automatic provider selection based on brain memories
+--- Prefers Ollama when context is rich, falls back to Copilot otherwise.
+--- Implements verification pondering to reinforce Ollama accuracy over time.
+---@param prompt string The user's prompt
+---@param context table Context information
+---@param callback fun(response: string|nil, error: string|nil, metadata: table|nil) Callback
+function M.smart_generate(prompt, context, callback)
+	local selector = require("codetyper.llm.selector")
+	selector.smart_generate(prompt, context, callback)
+end
+
+--- Get accuracy statistics for providers
+---@return table Statistics for each provider
+function M.get_accuracy_stats()
+	local selector = require("codetyper.llm.selector")
+	return selector.get_accuracy_stats()
+end
+
+--- Report user feedback on response quality (for reinforcement learning)
+---@param provider string Which provider generated the response
+---@param was_correct boolean Whether the response was good
+function M.report_feedback(provider, was_correct)
+	local selector = require("codetyper.llm.selector")
+	selector.report_feedback(provider, was_correct)
+end
+
 --- Build the system prompt for code generation
 ---@param context table Context information
 ---@return string System prompt
--- a/lua/codetyper/llm/ollama.lua
+++ b/lua/codetyper/llm/ollama.lua
@@ -5,21 +5,33 @@ local M = {}
 local utils = require("codetyper.utils")
 local llm = require("codetyper.llm")

--- Get Ollama host from config
+--- Get Ollama host from stored credentials or config
 ---@return string Host URL
 local function get_host()
+	-- Priority: stored credentials > config
+	local credentials = require("codetyper.credentials")
+	local stored_host = credentials.get_ollama_host()
+	if stored_host then
+		return stored_host
+	end
+
 	local codetyper = require("codetyper")
 	local config = codetyper.get_config()
-
 	return config.llm.ollama.host
 end

--- Get model from config
+--- Get model from stored credentials or config
 ---@return string Model name
 local function get_model()
+	-- Priority: stored credentials > config
+	local credentials = require("codetyper.credentials")
+	local stored_model = credentials.get_model("ollama")
+	if stored_model then
+		return stored_model
+	end
+
 	local codetyper = require("codetyper")
 	local config = codetyper.get_config()
-
 	return config.llm.ollama.model
 end

@@ -199,47 +211,41 @@ function M.validate()
 	return true
 end

--- Build system prompt for agent mode with tool instructions
+--- Generate with tool use support for agentic mode (text-based tool calling)
+---@param messages table[] Conversation history
 ---@param context table Context information
---@return string System prompt
-local function build_agent_system_prompt(context)
+---@param tool_definitions table Tool definitions
+---@param callback fun(response: table|nil, error: string|nil) Callback with Claude-like response format
+function M.generate_with_tools(messages, context, tool_definitions, callback)
+	local logs = require("codetyper.agent.logs")
 	local agent_prompts = require("codetyper.prompts.agent")
 	local tools_module = require("codetyper.agent.tools")

-	local system_prompt = agent_prompts.system .. "\n\n"
-	system_prompt = system_prompt .. tools_module.to_prompt_format() .. "\n\n"
-	system_prompt = system_prompt .. agent_prompts.tool_instructions
+	logs.request("ollama", get_model())
+	logs.thinking("Preparing agent request...")

-	-- Add context about current file if available
-	if context.file_path then
-		system_prompt = system_prompt .. "\n\nCurrent working context:\n"
-		system_prompt = system_prompt .. "- File: " .. context.file_path .. "\n"
-		if context.language then
-			system_prompt = system_prompt .. "- Language: " .. context.language .. "\n"
+	-- Build system prompt with tool instructions
+	local system_prompt = llm.build_system_prompt(context)
+	system_prompt = system_prompt .. "\n\n" .. agent_prompts.system
+	system_prompt = system_prompt .. "\n\n" .. agent_prompts.tool_instructions
+
+	-- Add tool descriptions
+	system_prompt = system_prompt .. "\n\n## Available Tools\n"
+	system_prompt = system_prompt .. "Call tools by outputting JSON in this exact format:\n"
+	system_prompt = system_prompt .. '```json\n{"tool": "tool_name", "arguments": {...}}\n```\n\n'
+
+	for _, tool in ipairs(tool_definitions) do
+		local name = tool.name or (tool["function"] and tool["function"].name)
+		local desc = tool.description or (tool["function"] and tool["function"].description)
+		if name then
+			system_prompt = system_prompt .. string.format("### %s\n%s\n\n", name, desc or "")
 		end
 	end

-	-- Add project root info
-	local root = utils.get_project_root()
-	if root then
-		system_prompt = system_prompt .. "- Project root: " .. root .. "\n"
-	end
-
-	return system_prompt
-end
-
--- Build request body for Ollama API with tools (chat format)
---@param messages table[] Conversation messages
---@param context table Context information
---@return table Request body
-local function build_tools_request_body(messages, context)
-	local system_prompt = build_agent_system_prompt(context)
-
 	-- Convert messages to Ollama chat format
 	local ollama_messages = {}
 	for _, msg in ipairs(messages) do
 		local content = msg.content
-		-- Handle complex content (like tool results)
 		if type(content) == "table" then
 			local text_parts = {}
 			for _, part in ipairs(content) do
@@ -251,14 +257,10 @@ local function build_tools_request_body(messages, context)
 			end
 			content = table.concat(text_parts, "\n")
 		end
-
-		table.insert(ollama_messages, {
-			role = msg.role,
-			content = content,
-		})
+		table.insert(ollama_messages, { role = msg.role, content = content })
 	end

-	return {
+	local body = {
 		model = get_model(),
 		messages = ollama_messages,
 		system = system_prompt,
@@ -268,16 +270,15 @@ local function build_tools_request_body(messages, context)
 			num_predict = 4096,
 		},
 	}
-end

--- Make HTTP request to Ollama chat API
---@param body table Request body
---@param callback fun(response: string|nil, error: string|nil, usage: table|nil) Callback function
-local function make_chat_request(body, callback)
 	local host = get_host()
 	local url = host .. "/api/chat"
 	local json_body = vim.json.encode(body)

+	local prompt_estimate = logs.estimate_tokens(json_body)
+	logs.debug(string.format("Estimated prompt: ~%d tokens", prompt_estimate))
+	logs.thinking("Sending to Ollama API...")
+
 	local cmd = {
 		"curl",
 		"-s",
@@ -302,196 +303,82 @@ local function make_chat_request(body, callback)

 			if not ok then
 				vim.schedule(function()
-					callback(nil, "Failed to parse Ollama response", nil)
+					logs.error("Failed to parse Ollama response")
+					callback(nil, "Failed to parse Ollama response")
 				end)
 				return
 			end

 			if response.error then
 				vim.schedule(function()
-					callback(nil, response.error or "Ollama API error", nil)
+					logs.error(response.error or "Ollama API error")
+					callback(nil, response.error or "Ollama API error")
 				end)
 				return
 			end

-			-- Extract usage info
-			local usage = {
-				prompt_tokens = response.prompt_eval_count or 0,
-				response_tokens = response.eval_count or 0,
-			}
+			-- Log token usage and record cost (Ollama is free but we track usage)
+			if response.prompt_eval_count or response.eval_count then
+				logs.response(response.prompt_eval_count or 0, response.eval_count or 0, "stop")

-			-- Return the message content for agent parsing
-			if response.message and response.message.content then
-				vim.schedule(function()
-					callback(response.message.content, nil, usage)
-				end)
-			else
-				vim.schedule(function()
-					callback(nil, "No response from Ollama", nil)
-				end)
+				-- Record usage for cost tracking (free for local models)
+				local cost = require("codetyper.cost")
+				cost.record_usage(
+					get_model(),
+					response.prompt_eval_count or 0,
+					response.eval_count or 0,
+					0 -- No cached tokens for Ollama
+				)
 			end
+
+			-- Parse the response text for tool calls
+			local content_text = response.message and response.message.content or ""
+			local converted = { content = {}, stop_reason = "end_turn" }
+
+			-- Try to extract JSON tool calls from response
+			local json_match = content_text:match("```json%s*(%b{})%s*```")
+			if json_match then
+				local ok_json, parsed = pcall(vim.json.decode, json_match)
+				if ok_json and parsed.tool then
+					table.insert(converted.content, {
+						type = "tool_use",
+						id = "call_" .. string.format("%x", os.time()) .. "_" .. string.format("%x", math.random(0, 0xFFFF)),
+						name = parsed.tool,
+						input = parsed.arguments or {},
+					})
+					logs.thinking("Tool call: " .. parsed.tool)
+					content_text = content_text:gsub("```json.-```", ""):gsub("^%s+", ""):gsub("%s+$", "")
+					converted.stop_reason = "tool_use"
+				end
+			end
+
+			-- Add text content
+			if content_text and content_text ~= "" then
+				table.insert(converted.content, 1, { type = "text", text = content_text })
+				logs.thinking("Response contains text")
+			end
+
+			vim.schedule(function()
+				callback(converted, nil)
+			end)
 		end,
 		on_stderr = function(_, data)
 			if data and #data > 0 and data[1] ~= "" then
 				vim.schedule(function()
-					callback(nil, "Ollama API request failed: " .. table.concat(data, "\n"), nil)
+					logs.error("Ollama API request failed: " .. table.concat(data, "\n"))
+					callback(nil, "Ollama API request failed: " .. table.concat(data, "\n"))
 				end)
 			end
 		end,
 		on_exit = function(_, code)
 			if code ~= 0 then
-				-- Don't double-report errors
+				vim.schedule(function()
+					logs.error("Ollama API request failed with code: " .. code)
+					callback(nil, "Ollama API request failed with code: " .. code)
+				end)
 			end
 		end,
 	})
 end

--- Generate response with tools using Ollama API
---@param messages table[] Conversation history
---@param context table Context information
---@param tools table Tool definitions (embedded in prompt for Ollama)
---@param callback fun(response: string|nil, error: string|nil) Callback function
-function M.generate_with_tools(messages, context, tools, callback)
-	local logs = require("codetyper.agent.logs")
-
-	-- Log the request
-	local model = get_model()
-	logs.request("ollama", model)
-	logs.thinking("Preparing API request...")
-
-	local body = build_tools_request_body(messages, context)
-
-	-- Estimate prompt tokens
-	local prompt_estimate = logs.estimate_tokens(vim.json.encode(body))
-	logs.debug(string.format("Estimated prompt: ~%d tokens", prompt_estimate))
-
-	make_chat_request(body, function(response, err, usage)
-		if err then
-			logs.error(err)
-			callback(nil, err)
-		else
-			-- Log token usage
-			if usage then
-				logs.response(usage.prompt_tokens or 0, usage.response_tokens or 0, "end_turn")
-			end
-
-			-- Log if response contains tool calls
-			if response then
-				local parser = require("codetyper.agent.parser")
-				local parsed = parser.parse_ollama_response(response)
-				if #parsed.tool_calls > 0 then
-					for _, tc in ipairs(parsed.tool_calls) do
-						logs.thinking("Tool call: " .. tc.name)
-					end
-				end
-				if parsed.text and parsed.text ~= "" then
-					logs.thinking("Response contains text")
-				end
-			end
-
-			callback(response, nil)
-		end
-	end)
-end
-
--- Generate with tool use support for agentic mode (simulated via prompts)
---@param messages table[] Conversation history
---@param context table Context information
---@param tool_definitions table Tool definitions
---@param callback fun(response: string|nil, error: string|nil) Callback with response text
-function M.generate_with_tools(messages, context, tool_definitions, callback)
-  local tools_module = require("codetyper.agent.tools")
-  local agent_prompts = require("codetyper.prompts.agent")
-
-  -- Build system prompt with agent instructions and tool definitions
-  local system_prompt = llm.build_system_prompt(context)
-  system_prompt = system_prompt .. "\n\n" .. agent_prompts.system
-  system_prompt = system_prompt .. "\n\n" .. tools_module.to_prompt_format()
-
-  -- Flatten messages to single prompt (Ollama's generate API)
-  local prompt_parts = {}
-  for _, msg in ipairs(messages) do
-    if type(msg.content) == "string" then
-      local role_prefix = msg.role == "user" and "User" or "Assistant"
-      table.insert(prompt_parts, role_prefix .. ": " .. msg.content)
-    elseif type(msg.content) == "table" then
-      -- Handle tool results
-      for _, item in ipairs(msg.content) do
-        if item.type == "tool_result" then
-          table.insert(prompt_parts, "Tool result: " .. item.content)
-        end
-      end
-    end
-  end
-
-  local body = {
-    model = get_model(),
-    system = system_prompt,
-    prompt = table.concat(prompt_parts, "\n\n"),
-    stream = false,
-    options = {
-      temperature = 0.2,
-      num_predict = 4096,
-    },
-  }
-
-  local host = get_host()
-  local url = host .. "/api/generate"
-  local json_body = vim.json.encode(body)
-
-  local cmd = {
-    "curl",
-    "-s",
-    "-X", "POST",
-    url,
-    "-H", "Content-Type: application/json",
-    "-d", json_body,
-  }
-
-  vim.fn.jobstart(cmd, {
-    stdout_buffered = true,
-    on_stdout = function(_, data)
-      if not data or #data == 0 or (data[1] == "" and #data == 1) then
-        return
-      end
-
-      local response_text = table.concat(data, "\n")
-      local ok, response = pcall(vim.json.decode, response_text)
-
-      if not ok then
-        vim.schedule(function()
-          callback(nil, "Failed to parse Ollama response")
-        end)
-        return
-      end
-
-      if response.error then
-        vim.schedule(function()
-          callback(nil, response.error or "Ollama API error")
-        end)
-        return
-      end
-
-      -- Return raw response text for parser to handle
-      vim.schedule(function()
-        callback(response.response or "", nil)
-      end)
-    end,
-    on_stderr = function(_, data)
-      if data and #data > 0 and data[1] ~= "" then
-        vim.schedule(function()
-          callback(nil, "Ollama API request failed: " .. table.concat(data, "\n"))
-        end)
-      end
-    end,
-    on_exit = function(_, code)
-      if code ~= 0 then
-        vim.schedule(function()
-          callback(nil, "Ollama API request failed with code: " .. code)
-        end)
-      end
-    end,
-  })
-end
-
 return M
--- a/lua/codetyper/llm/openai.lua
+++ b/lua/codetyper/llm/openai.lua
@@ -8,25 +8,46 @@ local llm = require("codetyper.llm")
 --- OpenAI API endpoint
 local API_URL = "https://api.openai.com/v1/chat/completions"

--- Get API key from config or environment
+--- Get API key from stored credentials, config, or environment
 ---@return string|nil API key
 local function get_api_key()
+	-- Priority: stored credentials > config > environment
+	local credentials = require("codetyper.credentials")
+	local stored_key = credentials.get_api_key("openai")
+	if stored_key then
+		return stored_key
+	end
+
 	local codetyper = require("codetyper")
 	local config = codetyper.get_config()
 	return config.llm.openai.api_key or vim.env.OPENAI_API_KEY
 end

--- Get model from config
+--- Get model from stored credentials or config
 ---@return string Model name
 local function get_model()
+	-- Priority: stored credentials > config
+	local credentials = require("codetyper.credentials")
+	local stored_model = credentials.get_model("openai")
+	if stored_model then
+		return stored_model
+	end
+
 	local codetyper = require("codetyper")
 	local config = codetyper.get_config()
 	return config.llm.openai.model
 end

--- Get endpoint from config (allows custom endpoints like Azure, OpenRouter)
+--- Get endpoint from stored credentials or config (allows custom endpoints like Azure, OpenRouter)
 ---@return string API endpoint
 local function get_endpoint()
+	-- Priority: stored credentials > config > default
+	local credentials = require("codetyper.credentials")
+	local stored_endpoint = credentials.get_endpoint("openai")
+	if stored_endpoint then
+		return stored_endpoint
+	end
+
 	local codetyper = require("codetyper")
 	local config = codetyper.get_config()
 	return config.llm.openai.endpoint or API_URL
@@ -284,9 +305,18 @@ function M.generate_with_tools(messages, context, tool_definitions, callback)
 				return
 			end

-			-- Log token usage
+			-- Log token usage and record cost
 			if response.usage then
 				logs.response(response.usage.prompt_tokens or 0, response.usage.completion_tokens or 0, "stop")
+
+				-- Record usage for cost tracking
+				local cost = require("codetyper.cost")
+				cost.record_usage(
+					model,
+					response.usage.prompt_tokens or 0,
+					response.usage.completion_tokens or 0,
+					response.usage.prompt_tokens_details and response.usage.prompt_tokens_details.cached_tokens or 0
+				)
 			end

 			-- Convert to Claude-like format for parser compatibility
--- a/lua/codetyper/llm/selector.lua
+++ b/lua/codetyper/llm/selector.lua
@@ -0,0 +1,514 @@
+---@mod codetyper.llm.selector Smart LLM selection with memory-based confidence
+---@brief [[
+--- Intelligent LLM provider selection based on brain memories.
+--- Prefers local Ollama when context is rich, falls back to Copilot otherwise.
+--- Implements verification pondering to reinforce Ollama accuracy over time.
+---@brief ]]
+
+local M = {}
+
+---@class SelectionResult
+---@field provider string Selected provider name
+---@field confidence number Confidence score (0-1)
+---@field memory_count number Number of relevant memories found
+---@field reason string Human-readable reason for selection
+
+---@class PonderResult
+---@field ollama_response string Ollama's response
+---@field verifier_response string Verifier's response
+---@field agreement_score number How much they agree (0-1)
+---@field ollama_correct boolean Whether Ollama was deemed correct
+---@field feedback string Feedback for learning
+
+--- Minimum memories required for high confidence
+local MIN_MEMORIES_FOR_LOCAL = 3
+
+--- Minimum memory relevance score for local provider
+local MIN_RELEVANCE_FOR_LOCAL = 0.6
+
+--- Agreement threshold for Ollama verification
+local AGREEMENT_THRESHOLD = 0.7
+
+--- Pondering sample rate (0-1) - how often to verify Ollama
+local PONDER_SAMPLE_RATE = 0.2
+
+--- Provider accuracy tracking (persisted in brain)
+local accuracy_cache = {
+	ollama = { correct = 0, total = 0 },
+	copilot = { correct = 0, total = 0 },
+}
+
+--- Get the brain module safely
+---@return table|nil
+local function get_brain()
+	local ok, brain = pcall(require, "codetyper.brain")
+	if ok and brain.is_initialized and brain.is_initialized() then
+		return brain
+	end
+	return nil
+end
+
+--- Load accuracy stats from brain
+local function load_accuracy_stats()
+	local brain = get_brain()
+	if not brain then
+		return
+	end
+
+	-- Query for accuracy tracking nodes
+	pcall(function()
+		local result = brain.query({
+			query = "provider_accuracy_stats",
+			types = { "metric" },
+			limit = 1,
+		})
+
+		if result and result.nodes and #result.nodes > 0 then
+			local node = result.nodes[1]
+			if node.c and node.c.d then
+				local ok, stats = pcall(vim.json.decode, node.c.d)
+				if ok and stats then
+					accuracy_cache = stats
+				end
+			end
+		end
+	end)
+end
+
+--- Save accuracy stats to brain
+local function save_accuracy_stats()
+	local brain = get_brain()
+	if not brain then
+		return
+	end
+
+	pcall(function()
+		brain.learn({
+			type = "metric",
+			summary = "provider_accuracy_stats",
+			detail = vim.json.encode(accuracy_cache),
+			weight = 1.0,
+		})
+	end)
+end
+
+--- Calculate Ollama confidence based on historical accuracy
+---@return number confidence (0-1)
+local function get_ollama_historical_confidence()
+	local stats = accuracy_cache.ollama
+	if stats.total < 5 then
+		-- Not enough data, return neutral confidence
+		return 0.5
+	end
+
+	local accuracy = stats.correct / stats.total
+	-- Boost confidence if accuracy is high
+	return math.min(1.0, accuracy * 1.2)
+end
+
+--- Query brain for relevant context
+---@param prompt string User prompt
+---@param file_path string|nil Current file path
+---@return table result {memories: table[], relevance: number, count: number}
+local function query_brain_context(prompt, file_path)
+	local result = {
+		memories = {},
+		relevance = 0,
+		count = 0,
+	}
+
+	local brain = get_brain()
+	if not brain then
+		return result
+	end
+
+	-- Query brain with multiple dimensions
+	local ok, query_result = pcall(function()
+		return brain.query({
+			query = prompt,
+			file = file_path,
+			limit = 10,
+			types = { "pattern", "correction", "convention", "fact" },
+		})
+	end)
+
+	if not ok or not query_result then
+		return result
+	end
+
+	result.memories = query_result.nodes or {}
+	result.count = #result.memories
+
+	-- Calculate average relevance
+	if result.count > 0 then
+		local total_relevance = 0
+		for _, node in ipairs(result.memories) do
+			-- Use node weight and success rate as relevance indicators
+			local node_relevance = (node.sc and node.sc.w or 0.5) * (node.sc and node.sc.sr or 0.5)
+			total_relevance = total_relevance + node_relevance
+		end
+		result.relevance = total_relevance / result.count
+	end
+
+	return result
+end
+
+--- Select the best LLM provider based on context
+---@param prompt string User prompt
+---@param context table LLM context
+---@return SelectionResult
+function M.select_provider(prompt, context)
+	-- Load accuracy stats on first call
+	if accuracy_cache.ollama.total == 0 then
+		load_accuracy_stats()
+	end
+
+	local file_path = context.file_path
+
+	-- Query brain for relevant memories
+	local brain_context = query_brain_context(prompt, file_path)
+
+	-- Calculate base confidence from memories
+	local memory_confidence = 0
+	if brain_context.count >= MIN_MEMORIES_FOR_LOCAL then
+		memory_confidence = math.min(1.0, brain_context.count / 10) * brain_context.relevance
+	end
+
+	-- Factor in historical Ollama accuracy
+	local historical_confidence = get_ollama_historical_confidence()
+
+	-- Combined confidence score
+	local combined_confidence = (memory_confidence * 0.6) + (historical_confidence * 0.4)
+
+	-- Decision logic
+	local provider = "copilot" -- Default to more capable
+	local reason = ""
+
+	if brain_context.count >= MIN_MEMORIES_FOR_LOCAL and combined_confidence >= MIN_RELEVANCE_FOR_LOCAL then
+		provider = "ollama"
+		reason = string.format(
+			"Rich context: %d memories (%.1f%% relevance), historical accuracy: %.1f%%",
+			brain_context.count,
+			brain_context.relevance * 100,
+			historical_confidence * 100
+		)
+	elseif brain_context.count > 0 and combined_confidence >= 0.4 then
+		-- Medium confidence - use Ollama but with pondering
+		provider = "ollama"
+		reason = string.format(
+			"Moderate context: %d memories, will verify with pondering",
+			brain_context.count
+		)
+	else
+		reason = string.format(
+			"Insufficient context: %d memories (need %d), using capable provider",
+			brain_context.count,
+			MIN_MEMORIES_FOR_LOCAL
+		)
+	end
+
+	return {
+		provider = provider,
+		confidence = combined_confidence,
+		memory_count = brain_context.count,
+		reason = reason,
+		memories = brain_context.memories,
+	}
+end
+
+--- Check if we should ponder (verify) this Ollama response
+---@param confidence number Current confidence level
+---@return boolean
+function M.should_ponder(confidence)
+	-- Always ponder when confidence is medium
+	if confidence >= 0.4 and confidence < 0.7 then
+		return true
+	end
+
+	-- Random sampling for high confidence to keep learning
+	if confidence >= 0.7 then
+		return math.random() < PONDER_SAMPLE_RATE
+	end
+
+	-- Low confidence shouldn't reach Ollama anyway
+	return false
+end
+
+--- Calculate agreement score between two responses
+---@param response1 string First response
+---@param response2 string Second response
+---@return number Agreement score (0-1)
+local function calculate_agreement(response1, response2)
+	-- Normalize responses
+	local norm1 = response1:lower():gsub("%s+", " "):gsub("[^%w%s]", "")
+	local norm2 = response2:lower():gsub("%s+", " "):gsub("[^%w%s]", "")
+
+	-- Extract words
+	local words1 = {}
+	for word in norm1:gmatch("%w+") do
+		words1[word] = (words1[word] or 0) + 1
+	end
+
+	local words2 = {}
+	for word in norm2:gmatch("%w+") do
+		words2[word] = (words2[word] or 0) + 1
+	end
+
+	-- Calculate Jaccard similarity
+	local intersection = 0
+	local union = 0
+
+	for word, count1 in pairs(words1) do
+		local count2 = words2[word] or 0
+		intersection = intersection + math.min(count1, count2)
+		union = union + math.max(count1, count2)
+	end
+
+	for word, count2 in pairs(words2) do
+		if not words1[word] then
+			union = union + count2
+		end
+	end
+
+	if union == 0 then
+		return 1.0 -- Both empty
+	end
+
+	-- Also check structural similarity (code structure)
+	local struct_score = 0
+	local function_count1 = select(2, response1:gsub("function", ""))
+	local function_count2 = select(2, response2:gsub("function", ""))
+	if function_count1 > 0 or function_count2 > 0 then
+		struct_score = 1 - math.abs(function_count1 - function_count2) / math.max(function_count1, function_count2, 1)
+	else
+		struct_score = 1.0
+	end
+
+	-- Combined score
+	local jaccard = intersection / union
+	return (jaccard * 0.7) + (struct_score * 0.3)
+end
+
+--- Ponder (verify) Ollama's response with another LLM
+---@param prompt string Original prompt
+---@param context table LLM context
+---@param ollama_response string Ollama's response
+---@param callback fun(result: PonderResult) Callback with pondering result
+function M.ponder(prompt, context, ollama_response, callback)
+	-- Use Copilot as verifier
+	local copilot = require("codetyper.llm.copilot")
+
+	-- Build verification prompt
+	local verify_prompt = prompt
+
+	copilot.generate(verify_prompt, context, function(verifier_response, error)
+		if error or not verifier_response then
+			-- Verification failed, assume Ollama is correct
+			callback({
+				ollama_response = ollama_response,
+				verifier_response = "",
+				agreement_score = 1.0,
+				ollama_correct = true,
+				feedback = "Verification unavailable, trusting Ollama",
+			})
+			return
+		end
+
+		-- Calculate agreement
+		local agreement = calculate_agreement(ollama_response, verifier_response)
+
+		-- Determine if Ollama was correct
+		local ollama_correct = agreement >= AGREEMENT_THRESHOLD
+
+		-- Generate feedback
+		local feedback
+		if ollama_correct then
+			feedback = string.format("Agreement: %.1f%% - Ollama response validated", agreement * 100)
+		else
+			feedback = string.format(
+				"Disagreement: %.1f%% - Ollama may need correction",
+				(1 - agreement) * 100
+			)
+		end
+
+		-- Update accuracy tracking
+		accuracy_cache.ollama.total = accuracy_cache.ollama.total + 1
+		if ollama_correct then
+			accuracy_cache.ollama.correct = accuracy_cache.ollama.correct + 1
+		end
+		save_accuracy_stats()
+
+		-- Learn from this verification
+		local brain = get_brain()
+		if brain then
+			pcall(function()
+				if ollama_correct then
+					-- Reinforce the pattern
+					brain.learn({
+						type = "correction",
+						summary = "Ollama verified correct",
+						detail = string.format(
+							"Prompt: %s\nAgreement: %.1f%%",
+							prompt:sub(1, 100),
+							agreement * 100
+						),
+						weight = 0.8,
+						file = context.file_path,
+					})
+				else
+					-- Learn the correction
+					brain.learn({
+						type = "correction",
+						summary = "Ollama needed correction",
+						detail = string.format(
+							"Prompt: %s\nOllama: %s\nCorrect: %s",
+							prompt:sub(1, 100),
+							ollama_response:sub(1, 200),
+							verifier_response:sub(1, 200)
+						),
+						weight = 0.9,
+						file = context.file_path,
+					})
+				end
+			end)
+		end
+
+		callback({
+			ollama_response = ollama_response,
+			verifier_response = verifier_response,
+			agreement_score = agreement,
+			ollama_correct = ollama_correct,
+			feedback = feedback,
+		})
+	end)
+end
+
+--- Smart generate with automatic provider selection and pondering
+---@param prompt string User prompt
+---@param context table LLM context
+---@param callback fun(response: string|nil, error: string|nil, metadata: table|nil) Callback
+function M.smart_generate(prompt, context, callback)
+	-- Select provider
+	local selection = M.select_provider(prompt, context)
+
+	-- Log selection
+	pcall(function()
+		local logs = require("codetyper.agent.logs")
+		logs.add({
+			type = "info",
+			message = string.format(
+				"LLM: %s (confidence: %.1f%%, %s)",
+				selection.provider,
+				selection.confidence * 100,
+				selection.reason
+			),
+		})
+	end)
+
+	-- Get the selected client
+	local client
+	if selection.provider == "ollama" then
+		client = require("codetyper.llm.ollama")
+	else
+		client = require("codetyper.llm.copilot")
+	end
+
+	-- Generate response
+	client.generate(prompt, context, function(response, error)
+		if error then
+			-- Fallback on error
+			if selection.provider == "ollama" then
+				-- Try Copilot as fallback
+				local copilot = require("codetyper.llm.copilot")
+				copilot.generate(prompt, context, function(fallback_response, fallback_error)
+					callback(fallback_response, fallback_error, {
+						provider = "copilot",
+						fallback = true,
+						original_provider = "ollama",
+						original_error = error,
+					})
+				end)
+				return
+			end
+			callback(nil, error, { provider = selection.provider })
+			return
+		end
+
+		-- Check if we should ponder
+		if selection.provider == "ollama" and M.should_ponder(selection.confidence) then
+			M.ponder(prompt, context, response, function(ponder_result)
+				if ponder_result.ollama_correct then
+					-- Ollama was correct, use its response
+					callback(response, nil, {
+						provider = "ollama",
+						pondered = true,
+						agreement = ponder_result.agreement_score,
+						confidence = selection.confidence,
+					})
+				else
+					-- Use verifier's response instead
+					callback(ponder_result.verifier_response, nil, {
+						provider = "copilot",
+						pondered = true,
+						agreement = ponder_result.agreement_score,
+						original_provider = "ollama",
+						corrected = true,
+					})
+				end
+			end)
+		else
+			-- No pondering needed
+			callback(response, nil, {
+				provider = selection.provider,
+				pondered = false,
+				confidence = selection.confidence,
+			})
+		end
+	end)
+end
+
+--- Get current accuracy statistics
+---@return table {ollama: {correct, total, accuracy}, copilot: {correct, total, accuracy}}
+function M.get_accuracy_stats()
+	local stats = {
+		ollama = {
+			correct = accuracy_cache.ollama.correct,
+			total = accuracy_cache.ollama.total,
+			accuracy = accuracy_cache.ollama.total > 0
+				and (accuracy_cache.ollama.correct / accuracy_cache.ollama.total)
+				or 0,
+		},
+		copilot = {
+			correct = accuracy_cache.copilot.correct,
+			total = accuracy_cache.copilot.total,
+			accuracy = accuracy_cache.copilot.total > 0
+				and (accuracy_cache.copilot.correct / accuracy_cache.copilot.total)
+				or 0,
+		},
+	}
+	return stats
+end
+
+--- Reset accuracy statistics
+function M.reset_accuracy_stats()
+	accuracy_cache = {
+		ollama = { correct = 0, total = 0 },
+		copilot = { correct = 0, total = 0 },
+	}
+	save_accuracy_stats()
+end
+
+--- Report user feedback on response quality
+---@param provider string Which provider generated the response
+---@param was_correct boolean Whether the response was good
+function M.report_feedback(provider, was_correct)
+	if accuracy_cache[provider] then
+		accuracy_cache[provider].total = accuracy_cache[provider].total + 1
+		if was_correct then
+			accuracy_cache[provider].correct = accuracy_cache[provider].correct + 1
+		end
+		save_accuracy_stats()
+	end
+end
+
+return M