Improve agent autonomy and diff view readability
Agent behavior improvements: - Add project context detection (tsconfig.json, pom.xml, etc.) - Enforce validation after changes (tsc --noEmit, mvn compile, etc.) - Run tests automatically - never ask "do you want me to run tests" - Complete full loop: create → type-check → test → confirm - Add command detection for direct execution (run tree, run ls) Diff view improvements: - Use darker backgrounds for added/removed lines - Add diffLineBgAdded, diffLineBgRemoved, diffLineText theme colors - Improve text visibility with white text on dark backgrounds - Update both React/Ink and SolidJS diff components Streaming fixes: - Fix tool call argument accumulation using OpenAI index field - Fix streaming content display after tool calls - Add consecutive error tracking to prevent token waste Other changes: - ESC to abort operations, Ctrl+C to exit - Fix model selection when provider changes in cascade mode - Add debug logging for troubleshooting - Move tests to root tests/ folder - Fix banner test GRADIENT_COLORS reference
This commit is contained in:
@@ -1,203 +0,0 @@
|
||||
/**
|
||||
* Unit tests for Streaming Agent
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "bun:test";
|
||||
|
||||
import {
|
||||
createInitialStreamingState,
|
||||
createStreamAccumulator,
|
||||
} from "@/types/streaming";
|
||||
|
||||
import type {
|
||||
StreamingState,
|
||||
StreamAccumulator,
|
||||
PartialToolCall,
|
||||
} from "@/types/streaming";
|
||||
|
||||
describe("Streaming Agent Types", () => {
|
||||
describe("createInitialStreamingState", () => {
|
||||
it("should create state with idle status", () => {
|
||||
const state = createInitialStreamingState();
|
||||
|
||||
expect(state.status).toBe("idle");
|
||||
expect(state.content).toBe("");
|
||||
expect(state.pendingToolCalls).toHaveLength(0);
|
||||
expect(state.completedToolCalls).toHaveLength(0);
|
||||
expect(state.error).toBeNull();
|
||||
expect(state.modelSwitched).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("createStreamAccumulator", () => {
|
||||
it("should create empty accumulator", () => {
|
||||
const accumulator = createStreamAccumulator();
|
||||
|
||||
expect(accumulator.content).toBe("");
|
||||
expect(accumulator.toolCalls.size).toBe(0);
|
||||
expect(accumulator.modelSwitch).toBeNull();
|
||||
});
|
||||
|
||||
it("should accumulate content", () => {
|
||||
const accumulator = createStreamAccumulator();
|
||||
|
||||
accumulator.content += "Hello ";
|
||||
accumulator.content += "World";
|
||||
|
||||
expect(accumulator.content).toBe("Hello World");
|
||||
});
|
||||
|
||||
it("should store partial tool calls", () => {
|
||||
const accumulator = createStreamAccumulator();
|
||||
|
||||
const partial: PartialToolCall = {
|
||||
index: 0,
|
||||
id: "call_123",
|
||||
name: "read",
|
||||
argumentsBuffer: '{"path": "/test',
|
||||
isComplete: false,
|
||||
};
|
||||
|
||||
accumulator.toolCalls.set(0, partial);
|
||||
|
||||
expect(accumulator.toolCalls.size).toBe(1);
|
||||
expect(accumulator.toolCalls.get(0)?.name).toBe("read");
|
||||
});
|
||||
|
||||
it("should accumulate tool call arguments", () => {
|
||||
const accumulator = createStreamAccumulator();
|
||||
|
||||
const partial: PartialToolCall = {
|
||||
index: 0,
|
||||
id: "call_123",
|
||||
name: "read",
|
||||
argumentsBuffer: "",
|
||||
isComplete: false,
|
||||
};
|
||||
|
||||
accumulator.toolCalls.set(0, partial);
|
||||
|
||||
// Simulate streaming arguments
|
||||
partial.argumentsBuffer += '{"path": ';
|
||||
partial.argumentsBuffer += '"/test.ts"}';
|
||||
|
||||
expect(partial.argumentsBuffer).toBe('{"path": "/test.ts"}');
|
||||
|
||||
// Verify JSON is valid
|
||||
const parsed = JSON.parse(partial.argumentsBuffer);
|
||||
expect(parsed.path).toBe("/test.ts");
|
||||
});
|
||||
});
|
||||
|
||||
describe("StreamingState transitions", () => {
|
||||
it("should represent idle to streaming transition", () => {
|
||||
const state: StreamingState = {
|
||||
...createInitialStreamingState(),
|
||||
status: "streaming",
|
||||
content: "Processing your request",
|
||||
};
|
||||
|
||||
expect(state.status).toBe("streaming");
|
||||
expect(state.content).toBe("Processing your request");
|
||||
});
|
||||
|
||||
it("should represent tool call accumulation", () => {
|
||||
const partial: PartialToolCall = {
|
||||
index: 0,
|
||||
id: "call_456",
|
||||
name: "bash",
|
||||
argumentsBuffer: '{"command": "ls -la"}',
|
||||
isComplete: false,
|
||||
};
|
||||
|
||||
const state: StreamingState = {
|
||||
...createInitialStreamingState(),
|
||||
status: "accumulating_tool",
|
||||
pendingToolCalls: [partial],
|
||||
};
|
||||
|
||||
expect(state.status).toBe("accumulating_tool");
|
||||
expect(state.pendingToolCalls).toHaveLength(1);
|
||||
expect(state.pendingToolCalls[0].name).toBe("bash");
|
||||
});
|
||||
|
||||
it("should represent completion state", () => {
|
||||
const state: StreamingState = {
|
||||
...createInitialStreamingState(),
|
||||
status: "complete",
|
||||
content: "Task completed successfully.",
|
||||
completedToolCalls: [
|
||||
{ id: "call_789", name: "write", arguments: { path: "/out.txt" } },
|
||||
],
|
||||
};
|
||||
|
||||
expect(state.status).toBe("complete");
|
||||
expect(state.completedToolCalls).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("should represent error state", () => {
|
||||
const state: StreamingState = {
|
||||
...createInitialStreamingState(),
|
||||
status: "error",
|
||||
error: "Connection timeout",
|
||||
};
|
||||
|
||||
expect(state.status).toBe("error");
|
||||
expect(state.error).toBe("Connection timeout");
|
||||
});
|
||||
|
||||
it("should represent model switch", () => {
|
||||
const state: StreamingState = {
|
||||
...createInitialStreamingState(),
|
||||
status: "streaming",
|
||||
modelSwitched: {
|
||||
from: "gpt-4",
|
||||
to: "gpt-4-unlimited",
|
||||
reason: "Quota exceeded",
|
||||
},
|
||||
};
|
||||
|
||||
expect(state.modelSwitched).not.toBeNull();
|
||||
expect(state.modelSwitched?.from).toBe("gpt-4");
|
||||
expect(state.modelSwitched?.to).toBe("gpt-4-unlimited");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Tool call finalization", () => {
|
||||
it("should convert partial to complete tool call", () => {
|
||||
const partial: PartialToolCall = {
|
||||
index: 0,
|
||||
id: "call_abc",
|
||||
name: "edit",
|
||||
argumentsBuffer:
|
||||
'{"file_path": "/src/app.ts", "old_string": "foo", "new_string": "bar"}',
|
||||
isComplete: true,
|
||||
};
|
||||
|
||||
const args = JSON.parse(partial.argumentsBuffer);
|
||||
|
||||
expect(args.file_path).toBe("/src/app.ts");
|
||||
expect(args.old_string).toBe("foo");
|
||||
expect(args.new_string).toBe("bar");
|
||||
});
|
||||
|
||||
it("should handle malformed JSON gracefully", () => {
|
||||
const partial: PartialToolCall = {
|
||||
index: 0,
|
||||
id: "call_def",
|
||||
name: "read",
|
||||
argumentsBuffer: '{"path": "/incomplete',
|
||||
isComplete: true,
|
||||
};
|
||||
|
||||
let args: Record<string, unknown> = {};
|
||||
try {
|
||||
args = JSON.parse(partial.argumentsBuffer);
|
||||
} catch {
|
||||
args = {};
|
||||
}
|
||||
|
||||
expect(args).toEqual({});
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -23,7 +23,7 @@ import type {
|
||||
import { chatStream } from "@providers/chat";
|
||||
import { getTool, getToolsForApi, refreshMCPTools } from "@tools/index";
|
||||
import { initializePermissions } from "@services/permissions";
|
||||
import { MAX_ITERATIONS } from "@constants/agent";
|
||||
import { MAX_ITERATIONS, MAX_CONSECUTIVE_ERRORS } from "@constants/agent";
|
||||
import { createStreamAccumulator } from "@/types/streaming";
|
||||
|
||||
// =============================================================================
|
||||
@@ -80,33 +80,47 @@ const processStreamChunk = (
|
||||
tool_call: () => {
|
||||
if (!chunk.toolCall) return;
|
||||
|
||||
const tc = chunk.toolCall;
|
||||
const index = tc.id ? getToolCallIndex(tc.id, accumulator) : 0;
|
||||
const tc = chunk.toolCall as {
|
||||
index?: number;
|
||||
id?: string;
|
||||
function?: { name?: string; arguments?: string };
|
||||
};
|
||||
|
||||
// OpenAI streaming format includes index in each chunk
|
||||
// Use index from chunk if available, otherwise find by id or default to 0
|
||||
const chunkIndex = tc.index ?? (tc.id ? getToolCallIndex(tc.id, accumulator) : 0);
|
||||
|
||||
// Get or create partial tool call
|
||||
let partial = accumulator.toolCalls.get(index);
|
||||
if (!partial && tc.id) {
|
||||
let partial = accumulator.toolCalls.get(chunkIndex);
|
||||
if (!partial) {
|
||||
// Create new partial - use id if provided, generate one otherwise
|
||||
partial = {
|
||||
index,
|
||||
id: tc.id,
|
||||
index: chunkIndex,
|
||||
id: tc.id ?? `tool_${chunkIndex}_${Date.now()}`,
|
||||
name: tc.function?.name ?? "",
|
||||
argumentsBuffer: "",
|
||||
isComplete: false,
|
||||
};
|
||||
accumulator.toolCalls.set(index, partial);
|
||||
accumulator.toolCalls.set(chunkIndex, partial);
|
||||
if (tc.id) {
|
||||
callbacks.onToolCallStart?.(partial);
|
||||
}
|
||||
}
|
||||
|
||||
// Update id if provided (first chunk has the real id)
|
||||
if (tc.id && partial.id.startsWith("tool_")) {
|
||||
partial.id = tc.id;
|
||||
callbacks.onToolCallStart?.(partial);
|
||||
}
|
||||
|
||||
if (partial) {
|
||||
// Update name if provided
|
||||
if (tc.function?.name) {
|
||||
partial.name = tc.function.name;
|
||||
}
|
||||
// Update name if provided
|
||||
if (tc.function?.name) {
|
||||
partial.name = tc.function.name;
|
||||
}
|
||||
|
||||
// Accumulate arguments
|
||||
if (tc.function?.arguments) {
|
||||
partial.argumentsBuffer += tc.function.arguments;
|
||||
}
|
||||
// Accumulate arguments
|
||||
if (tc.function?.arguments) {
|
||||
partial.argumentsBuffer += tc.function.arguments;
|
||||
}
|
||||
},
|
||||
|
||||
@@ -165,10 +179,20 @@ const getToolCallIndex = (
|
||||
*/
|
||||
const finalizeToolCall = (partial: PartialToolCall): ToolCall => {
|
||||
let args: Record<string, unknown> = {};
|
||||
try {
|
||||
args = JSON.parse(partial.argumentsBuffer || "{}");
|
||||
} catch {
|
||||
args = {};
|
||||
const rawBuffer = partial.argumentsBuffer || "";
|
||||
|
||||
if (!rawBuffer) {
|
||||
args = { __debug_error: "Empty arguments buffer" };
|
||||
} else {
|
||||
try {
|
||||
args = JSON.parse(rawBuffer);
|
||||
} catch (e) {
|
||||
args = {
|
||||
__debug_error: "JSON parse failed",
|
||||
__debug_buffer: rawBuffer.substring(0, 200),
|
||||
__debug_parseError: e instanceof Error ? e.message : String(e),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -210,12 +234,13 @@ const executeTool = async (
|
||||
const validatedArgs = tool.parameters.parse(toolCall.arguments);
|
||||
return await tool.execute(validatedArgs, ctx);
|
||||
} catch (error: unknown) {
|
||||
const receivedArgs = JSON.stringify(toolCall.arguments);
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
return {
|
||||
success: false,
|
||||
title: "Tool error",
|
||||
title: "Tool validation error",
|
||||
output: "",
|
||||
error: errorMessage,
|
||||
error: `${toolCall.name}: ${errorMessage}\nReceived: ${receivedArgs}`,
|
||||
};
|
||||
}
|
||||
};
|
||||
@@ -296,6 +321,7 @@ export const runAgentLoopStream = async (
|
||||
const allToolCalls: { call: ToolCall; result: ToolResult }[] = [];
|
||||
let iterations = 0;
|
||||
let finalResponse = "";
|
||||
let consecutiveErrors = 0;
|
||||
|
||||
// Initialize
|
||||
await initializePermissions();
|
||||
@@ -331,6 +357,9 @@ export const runAgentLoopStream = async (
|
||||
state.options.onText?.(response.content);
|
||||
}
|
||||
|
||||
// Track if all tool calls in this iteration failed
|
||||
let allFailed = true;
|
||||
|
||||
// Execute each tool call
|
||||
for (const toolCall of response.toolCalls) {
|
||||
state.options.onToolCall?.(toolCall);
|
||||
@@ -340,6 +369,12 @@ export const runAgentLoopStream = async (
|
||||
|
||||
state.options.onToolResult?.(toolCall.id, result);
|
||||
|
||||
// Track success/failure
|
||||
if (result.success) {
|
||||
allFailed = false;
|
||||
consecutiveErrors = 0;
|
||||
}
|
||||
|
||||
// Add tool result message
|
||||
const toolResultMessage: ToolResultMessage = {
|
||||
role: "tool",
|
||||
@@ -350,6 +385,21 @@ export const runAgentLoopStream = async (
|
||||
};
|
||||
agentMessages.push(toolResultMessage);
|
||||
}
|
||||
|
||||
// Check for repeated failures
|
||||
if (allFailed) {
|
||||
consecutiveErrors++;
|
||||
if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
|
||||
const errorMsg = `Stopping: ${consecutiveErrors} consecutive tool errors. Check model compatibility with tool calling.`;
|
||||
state.options.onError?.(errorMsg);
|
||||
return {
|
||||
success: false,
|
||||
finalResponse: errorMsg,
|
||||
iterations,
|
||||
toolCalls: allToolCalls,
|
||||
};
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No tool calls - this is the final response
|
||||
finalResponse = response.content || "";
|
||||
|
||||
@@ -24,7 +24,10 @@ export type {
|
||||
export { initializeChatService } from "@services/chat-tui/initialize";
|
||||
|
||||
// Re-export message handling
|
||||
export { handleMessage } from "@services/chat-tui/message-handler";
|
||||
export {
|
||||
handleMessage,
|
||||
abortCurrentOperation,
|
||||
} from "@services/chat-tui/message-handler";
|
||||
|
||||
// Re-export command handling
|
||||
export { executeCommand } from "@services/chat-tui/commands";
|
||||
|
||||
@@ -43,7 +43,7 @@ import {
|
||||
checkOllamaAvailability,
|
||||
checkCopilotAvailability,
|
||||
} from "@services/cascading-provider";
|
||||
import { chat } from "@providers/chat";
|
||||
import { chat, getDefaultModel } from "@providers/chat";
|
||||
import { AUDIT_SYSTEM_PROMPT, createAuditPrompt, parseAuditResponse } from "@prompts/audit-prompt";
|
||||
import { PROVIDER_IDS } from "@constants/provider-quality";
|
||||
import { appStore } from "@tui/index";
|
||||
@@ -55,6 +55,12 @@ import type {
|
||||
ToolCallInfo,
|
||||
} from "@/types/chat-service";
|
||||
import { addDebugLog } from "@tui-solid/components/debug-log-panel";
|
||||
import { FILE_MODIFYING_TOOLS } from "@constants/tools";
|
||||
import type { StreamCallbacksWithState } from "@interfaces/StreamCallbacksWithState";
|
||||
import {
|
||||
detectCommand,
|
||||
executeDetectedCommand,
|
||||
} from "@services/command-detection";
|
||||
|
||||
// Track last response for feedback learning
|
||||
let lastResponseContext: {
|
||||
@@ -63,7 +69,25 @@ let lastResponseContext: {
|
||||
response: string;
|
||||
} | null = null;
|
||||
|
||||
const FILE_MODIFYING_TOOLS = ["write", "edit"];
|
||||
// Track current running agent for abort capability
|
||||
let currentAgent: { stop: () => void } | null = null;
|
||||
|
||||
/**
|
||||
* Abort the currently running agent operation
|
||||
* @returns true if an operation was aborted, false if nothing was running
|
||||
*/
|
||||
export const abortCurrentOperation = (): boolean => {
|
||||
if (currentAgent) {
|
||||
currentAgent.stop();
|
||||
currentAgent = null;
|
||||
appStore.cancelStreaming();
|
||||
appStore.stopThinking();
|
||||
appStore.setMode("idle");
|
||||
addDebugLog("state", "Operation aborted by user");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
const createToolCallHandler =
|
||||
(
|
||||
@@ -72,7 +96,7 @@ const createToolCallHandler =
|
||||
) =>
|
||||
(call: { id: string; name: string; arguments?: Record<string, unknown> }) => {
|
||||
const args = call.arguments;
|
||||
if (FILE_MODIFYING_TOOLS.includes(call.name) && args?.path) {
|
||||
if ((FILE_MODIFYING_TOOLS as readonly string[]).includes(call.name) && args?.path) {
|
||||
toolCallRef.current = { name: call.name, path: String(args.path) };
|
||||
} else {
|
||||
toolCallRef.current = { name: call.name };
|
||||
@@ -117,10 +141,10 @@ const createToolResultHandler =
|
||||
/**
|
||||
* Create streaming callbacks for TUI integration
|
||||
*/
|
||||
const createStreamCallbacks = (): StreamCallbacks => {
|
||||
const createStreamCallbacks = (): StreamCallbacksWithState => {
|
||||
let chunkCount = 0;
|
||||
|
||||
return {
|
||||
const callbacks: StreamCallbacks = {
|
||||
onContentChunk: (content: string) => {
|
||||
chunkCount++;
|
||||
addDebugLog("stream", `Chunk #${chunkCount}: "${content.substring(0, 30)}${content.length > 30 ? "..." : ""}"`);
|
||||
@@ -155,8 +179,10 @@ const createStreamCallbacks = (): StreamCallbacks => {
|
||||
},
|
||||
|
||||
onComplete: () => {
|
||||
addDebugLog("stream", `Stream complete (${chunkCount} chunks)`);
|
||||
appStore.completeStreaming();
|
||||
// Note: Don't call completeStreaming() here!
|
||||
// The agent loop may have multiple iterations (tool calls + final response)
|
||||
// Streaming will be completed manually after the entire agent finishes
|
||||
addDebugLog("stream", `Stream iteration done (${chunkCount} chunks total)`);
|
||||
},
|
||||
|
||||
onError: (error: string) => {
|
||||
@@ -168,6 +194,11 @@ const createStreamCallbacks = (): StreamCallbacks => {
|
||||
});
|
||||
},
|
||||
};
|
||||
|
||||
return {
|
||||
callbacks,
|
||||
hasReceivedContent: () => chunkCount > 0,
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -245,6 +276,50 @@ export const handleMessage = async (
|
||||
// Check for feedback on previous response
|
||||
await checkUserFeedback(message, callbacks);
|
||||
|
||||
// Detect explicit command requests and execute directly
|
||||
const detected = detectCommand(message);
|
||||
if (detected.detected && detected.command) {
|
||||
addDebugLog("info", `Detected command: ${detected.command}`);
|
||||
|
||||
// Show the user's request
|
||||
appStore.addLog({
|
||||
type: "user",
|
||||
content: message,
|
||||
});
|
||||
|
||||
// Show what we're running
|
||||
appStore.addLog({
|
||||
type: "tool",
|
||||
content: detected.command,
|
||||
metadata: {
|
||||
toolName: "bash",
|
||||
toolStatus: "running",
|
||||
toolDescription: `Running: ${detected.command}`,
|
||||
},
|
||||
});
|
||||
|
||||
appStore.setMode("tool_execution");
|
||||
const result = await executeDetectedCommand(detected.command, process.cwd());
|
||||
appStore.setMode("idle");
|
||||
|
||||
// Show result
|
||||
if (result.success && result.output) {
|
||||
appStore.addLog({
|
||||
type: "assistant",
|
||||
content: result.output,
|
||||
});
|
||||
} else if (!result.success) {
|
||||
appStore.addLog({
|
||||
type: "error",
|
||||
content: result.error || "Command failed",
|
||||
});
|
||||
}
|
||||
|
||||
// Save to session (for persistence only, not UI)
|
||||
await saveSession();
|
||||
return;
|
||||
}
|
||||
|
||||
// Get interaction mode and cascade setting from app store
|
||||
const { interactionMode, cascadeEnabled } = appStore.getState();
|
||||
const isReadOnlyMode = interactionMode === "ask" || interactionMode === "code-review";
|
||||
@@ -397,23 +472,34 @@ export const handleMessage = async (
|
||||
}
|
||||
}
|
||||
|
||||
// Determine the correct model for the provider
|
||||
// If provider changed, use the provider's default model instead of state.model
|
||||
const effectiveModel =
|
||||
effectiveProvider === state.provider
|
||||
? state.model
|
||||
: getDefaultModel(effectiveProvider);
|
||||
|
||||
// Start streaming UI
|
||||
addDebugLog("state", `Starting request: provider=${effectiveProvider}, model=${state.model}`);
|
||||
addDebugLog("state", `Starting request: provider=${effectiveProvider}, model=${effectiveModel}`);
|
||||
addDebugLog("state", `Mode: ${appStore.getState().interactionMode}, Cascade: ${cascadeEnabled}`);
|
||||
appStore.setMode("thinking");
|
||||
appStore.startThinking();
|
||||
appStore.startStreaming();
|
||||
addDebugLog("state", "Streaming started");
|
||||
|
||||
const streamCallbacks = createStreamCallbacks();
|
||||
const streamState = createStreamCallbacks();
|
||||
const agent = createStreamingAgent(
|
||||
process.cwd(),
|
||||
{
|
||||
provider: effectiveProvider,
|
||||
model: state.model,
|
||||
model: effectiveModel,
|
||||
verbose: state.verbose,
|
||||
autoApprove: state.autoApprove,
|
||||
chatMode: isReadOnlyMode,
|
||||
onText: (text: string) => {
|
||||
addDebugLog("info", `onText callback: "${text.substring(0, 50)}..."`);
|
||||
appStore.appendStreamContent(text);
|
||||
},
|
||||
onToolCall: createToolCallHandler(callbacks, toolCallRef),
|
||||
onToolResult: createToolResultHandler(callbacks, toolCallRef),
|
||||
onError: (error) => {
|
||||
@@ -423,9 +509,12 @@ export const handleMessage = async (
|
||||
callbacks.onLog("system", warning);
|
||||
},
|
||||
},
|
||||
streamCallbacks,
|
||||
streamState.callbacks,
|
||||
);
|
||||
|
||||
// Store agent reference for abort capability
|
||||
currentAgent = agent;
|
||||
|
||||
try {
|
||||
addDebugLog("api", `Agent.run() started with ${state.messages.length} messages`);
|
||||
const result = await agent.run(state.messages);
|
||||
@@ -471,14 +560,18 @@ export const handleMessage = async (
|
||||
|
||||
// Check if streaming content was received - if not, add the response as a log
|
||||
// This handles cases where streaming didn't work or content was all in final response
|
||||
const streamingState = appStore.getState().streamingLog;
|
||||
if (!streamingState.content && finalResponse) {
|
||||
if (!streamState.hasReceivedContent() && finalResponse) {
|
||||
addDebugLog("info", "No streaming content received, adding fallback log");
|
||||
// Streaming didn't receive content, manually add the response
|
||||
appStore.cancelStreaming(); // Remove empty streaming log
|
||||
appStore.addLog({
|
||||
type: "assistant",
|
||||
content: finalResponse,
|
||||
});
|
||||
} else {
|
||||
// Streaming received content - finalize the streaming log
|
||||
addDebugLog("info", "Completing streaming with received content");
|
||||
appStore.completeStreaming();
|
||||
}
|
||||
|
||||
addMessage("user", message);
|
||||
@@ -501,5 +594,8 @@ export const handleMessage = async (
|
||||
appStore.cancelStreaming();
|
||||
appStore.stopThinking();
|
||||
callbacks.onLog("error", String(error));
|
||||
} finally {
|
||||
// Clear agent reference when done
|
||||
currentAgent = null;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
import type { Message } from "@/types/providers";
|
||||
import type { AgentOptions } from "@interfaces/AgentOptions";
|
||||
import type { AgentResult } from "@interfaces/AgentResult";
|
||||
import type { StreamingChatOptions } from "@interfaces/StreamingChatOptions";
|
||||
import type {
|
||||
StreamCallbacks,
|
||||
PartialToolCall,
|
||||
@@ -16,13 +17,8 @@ import type { ToolCall, ToolResult } from "@/types/tools";
|
||||
import { createStreamingAgent } from "@services/agent-stream";
|
||||
import { appStore } from "@tui/index";
|
||||
|
||||
// =============================================================================
|
||||
// Types
|
||||
// =============================================================================
|
||||
|
||||
export interface StreamingChatOptions extends AgentOptions {
|
||||
onModelSwitch?: (info: ModelSwitchInfo) => void;
|
||||
}
|
||||
// Re-export for convenience
|
||||
export type { StreamingChatOptions } from "@interfaces/StreamingChatOptions";
|
||||
|
||||
// =============================================================================
|
||||
// TUI Streaming Callbacks
|
||||
|
||||
@@ -5,16 +5,13 @@
|
||||
import { usageStore } from "@stores/usage-store";
|
||||
import { getUserInfo } from "@providers/copilot/credentials";
|
||||
import { getCopilotUsage } from "@providers/copilot/usage";
|
||||
import { PROGRESS_BAR } from "@constants/ui";
|
||||
import type {
|
||||
ChatServiceState,
|
||||
ChatServiceCallbacks,
|
||||
} from "@/types/chat-service";
|
||||
import type { CopilotQuotaDetail } from "@/types/copilot-usage";
|
||||
|
||||
const BAR_WIDTH = 40;
|
||||
const FILLED_CHAR = "█";
|
||||
const EMPTY_CHAR = "░";
|
||||
|
||||
const formatNumber = (num: number): string => {
|
||||
return num.toLocaleString();
|
||||
};
|
||||
@@ -35,9 +32,12 @@ const formatDuration = (ms: number): string => {
|
||||
|
||||
const renderBar = (percent: number): string => {
|
||||
const clampedPercent = Math.max(0, Math.min(100, percent));
|
||||
const filledWidth = Math.round((clampedPercent / 100) * BAR_WIDTH);
|
||||
const emptyWidth = BAR_WIDTH - filledWidth;
|
||||
return FILLED_CHAR.repeat(filledWidth) + EMPTY_CHAR.repeat(emptyWidth);
|
||||
const filledWidth = Math.round((clampedPercent / 100) * PROGRESS_BAR.WIDTH);
|
||||
const emptyWidth = PROGRESS_BAR.WIDTH - filledWidth;
|
||||
return (
|
||||
PROGRESS_BAR.FILLED_CHAR.repeat(filledWidth) +
|
||||
PROGRESS_BAR.EMPTY_CHAR.repeat(emptyWidth)
|
||||
);
|
||||
};
|
||||
|
||||
const formatQuotaBar = (
|
||||
@@ -55,7 +55,7 @@ const formatQuotaBar = (
|
||||
|
||||
if (quota.unlimited) {
|
||||
lines.push(name);
|
||||
lines.push(FILLED_CHAR.repeat(BAR_WIDTH) + " Unlimited");
|
||||
lines.push(PROGRESS_BAR.FILLED_CHAR.repeat(PROGRESS_BAR.WIDTH) + " Unlimited");
|
||||
return lines;
|
||||
}
|
||||
|
||||
|
||||
158
src/services/command-detection.ts
Normal file
158
src/services/command-detection.ts
Normal file
@@ -0,0 +1,158 @@
|
||||
/**
|
||||
* Command Detection Service
|
||||
*
|
||||
* Detects when user explicitly requests to run a command
|
||||
* and executes it directly without relying on LLM decision-making.
|
||||
*/
|
||||
|
||||
import { executeBash } from "@tools/bash/execute";
|
||||
import type { ToolContext } from "@/types/tools";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
|
||||
/**
|
||||
* Patterns that indicate an explicit command request
|
||||
*/
|
||||
const COMMAND_PATTERNS = [
|
||||
// "run <command>" patterns
|
||||
/^run\s+(.+)$/i,
|
||||
/^execute\s+(.+)$/i,
|
||||
/^exec\s+(.+)$/i,
|
||||
// "run a/the <command> command" patterns
|
||||
/^run\s+(?:a\s+|the\s+)?(.+?)\s+command$/i,
|
||||
// "use <command> to" patterns
|
||||
/^use\s+(\S+)\s+to\s+/i,
|
||||
// Direct command requests
|
||||
/^show\s+me\s+(?:the\s+)?(?:output\s+of\s+)?(.+)$/i,
|
||||
// "can you run" patterns
|
||||
/^(?:can\s+you\s+)?(?:please\s+)?run\s+(.+?)(?:\s+for\s+me)?$/i,
|
||||
];
|
||||
|
||||
/**
|
||||
* Common shell commands that should be executed directly
|
||||
*/
|
||||
const DIRECT_COMMANDS = new Set([
|
||||
"ls",
|
||||
"tree",
|
||||
"pwd",
|
||||
"cat",
|
||||
"head",
|
||||
"tail",
|
||||
"find",
|
||||
"grep",
|
||||
"wc",
|
||||
"du",
|
||||
"df",
|
||||
"ps",
|
||||
"top",
|
||||
"which",
|
||||
"whoami",
|
||||
"date",
|
||||
"echo",
|
||||
"env",
|
||||
"printenv",
|
||||
"uname",
|
||||
]);
|
||||
|
||||
export interface DetectedCommand {
|
||||
detected: boolean;
|
||||
command?: string;
|
||||
originalMessage: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if the user message is an explicit command request
|
||||
*/
|
||||
export const detectCommand = (message: string): DetectedCommand => {
|
||||
const trimmed = message.trim();
|
||||
|
||||
// Check patterns
|
||||
for (const pattern of COMMAND_PATTERNS) {
|
||||
const match = trimmed.match(pattern);
|
||||
if (match) {
|
||||
const command = match[1].trim();
|
||||
// Validate it looks like a real command
|
||||
if (command && command.length > 0 && command.length < 500) {
|
||||
return {
|
||||
detected: true,
|
||||
command: normalizeCommand(command),
|
||||
originalMessage: message,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if message starts with a known command
|
||||
const firstWord = trimmed.split(/\s+/)[0].toLowerCase();
|
||||
if (DIRECT_COMMANDS.has(firstWord)) {
|
||||
return {
|
||||
detected: true,
|
||||
command: trimmed,
|
||||
originalMessage: message,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
detected: false,
|
||||
originalMessage: message,
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Normalize command - handle common variations
|
||||
*/
|
||||
const normalizeCommand = (command: string): string => {
|
||||
// Remove quotes if wrapped
|
||||
if (
|
||||
(command.startsWith('"') && command.endsWith('"')) ||
|
||||
(command.startsWith("'") && command.endsWith("'"))
|
||||
) {
|
||||
command = command.slice(1, -1);
|
||||
}
|
||||
|
||||
// Handle "tree command" -> "tree"
|
||||
if (command.endsWith(" command")) {
|
||||
command = command.slice(0, -8).trim();
|
||||
}
|
||||
|
||||
// Handle "the tree" -> "tree"
|
||||
if (command.startsWith("the ")) {
|
||||
command = command.slice(4);
|
||||
}
|
||||
|
||||
// Handle "a ls" -> "ls"
|
||||
if (command.startsWith("a ")) {
|
||||
command = command.slice(2);
|
||||
}
|
||||
|
||||
return command;
|
||||
};
|
||||
|
||||
/**
|
||||
* Execute a detected command directly
|
||||
*/
|
||||
export const executeDetectedCommand = async (
|
||||
command: string,
|
||||
workingDir: string,
|
||||
abortController?: AbortController,
|
||||
): Promise<{
|
||||
success: boolean;
|
||||
output: string;
|
||||
error?: string;
|
||||
}> => {
|
||||
const ctx: ToolContext = {
|
||||
sessionId: uuidv4(),
|
||||
messageId: uuidv4(),
|
||||
workingDir,
|
||||
abort: abortController ?? new AbortController(),
|
||||
autoApprove: true, // Direct command requests are auto-approved
|
||||
onMetadata: () => {},
|
||||
};
|
||||
|
||||
const result = await executeBash({ command }, ctx);
|
||||
|
||||
return {
|
||||
success: result.success,
|
||||
output: result.output,
|
||||
error: result.error,
|
||||
};
|
||||
};
|
||||
@@ -1,231 +0,0 @@
|
||||
/**
|
||||
* Unit tests for Vector Store
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "bun:test";
|
||||
|
||||
import {
|
||||
cosineSimilarity,
|
||||
euclideanDistance,
|
||||
upsertEmbedding,
|
||||
removeEmbedding,
|
||||
hasEmbedding,
|
||||
getEmbedding,
|
||||
findSimilar,
|
||||
findAboveThreshold,
|
||||
getIndexStats,
|
||||
} from "@services/learning/vector-store";
|
||||
|
||||
import { createEmptyIndex } from "@/types/embeddings";
|
||||
|
||||
describe("Vector Store", () => {
|
||||
describe("cosineSimilarity", () => {
|
||||
it("should return 1 for identical vectors", () => {
|
||||
const a = [1, 0, 0];
|
||||
const b = [1, 0, 0];
|
||||
|
||||
expect(cosineSimilarity(a, b)).toBeCloseTo(1);
|
||||
});
|
||||
|
||||
it("should return 0 for orthogonal vectors", () => {
|
||||
const a = [1, 0, 0];
|
||||
const b = [0, 1, 0];
|
||||
|
||||
expect(cosineSimilarity(a, b)).toBeCloseTo(0);
|
||||
});
|
||||
|
||||
it("should return -1 for opposite vectors", () => {
|
||||
const a = [1, 0, 0];
|
||||
const b = [-1, 0, 0];
|
||||
|
||||
expect(cosineSimilarity(a, b)).toBeCloseTo(-1);
|
||||
});
|
||||
|
||||
it("should handle normalized vectors", () => {
|
||||
const a = [0.6, 0.8, 0];
|
||||
const b = [0.8, 0.6, 0];
|
||||
|
||||
const similarity = cosineSimilarity(a, b);
|
||||
expect(similarity).toBeGreaterThan(0);
|
||||
expect(similarity).toBeLessThan(1);
|
||||
});
|
||||
|
||||
it("should return 0 for mismatched lengths", () => {
|
||||
const a = [1, 0, 0];
|
||||
const b = [1, 0];
|
||||
|
||||
expect(cosineSimilarity(a, b)).toBe(0);
|
||||
});
|
||||
|
||||
it("should handle zero vectors", () => {
|
||||
const a = [0, 0, 0];
|
||||
const b = [1, 0, 0];
|
||||
|
||||
expect(cosineSimilarity(a, b)).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("euclideanDistance", () => {
|
||||
it("should return 0 for identical vectors", () => {
|
||||
const a = [1, 2, 3];
|
||||
const b = [1, 2, 3];
|
||||
|
||||
expect(euclideanDistance(a, b)).toBe(0);
|
||||
});
|
||||
|
||||
it("should compute correct distance", () => {
|
||||
const a = [0, 0, 0];
|
||||
const b = [3, 4, 0];
|
||||
|
||||
expect(euclideanDistance(a, b)).toBe(5);
|
||||
});
|
||||
|
||||
it("should return Infinity for mismatched lengths", () => {
|
||||
const a = [1, 0, 0];
|
||||
const b = [1, 0];
|
||||
|
||||
expect(euclideanDistance(a, b)).toBe(Infinity);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Index Operations", () => {
|
||||
it("should create empty index", () => {
|
||||
const index = createEmptyIndex("test-model");
|
||||
|
||||
expect(index.version).toBe(1);
|
||||
expect(index.model).toBe("test-model");
|
||||
expect(Object.keys(index.embeddings)).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("should upsert embedding", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
const embedding = [0.1, 0.2, 0.3];
|
||||
|
||||
index = upsertEmbedding(index, "learn_1", embedding);
|
||||
|
||||
expect(hasEmbedding(index, "learn_1")).toBe(true);
|
||||
expect(getEmbedding(index, "learn_1")?.embedding).toEqual(embedding);
|
||||
});
|
||||
|
||||
it("should update existing embedding", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
const embedding1 = [0.1, 0.2, 0.3];
|
||||
const embedding2 = [0.4, 0.5, 0.6];
|
||||
|
||||
index = upsertEmbedding(index, "learn_1", embedding1);
|
||||
index = upsertEmbedding(index, "learn_1", embedding2);
|
||||
|
||||
expect(getEmbedding(index, "learn_1")?.embedding).toEqual(embedding2);
|
||||
});
|
||||
|
||||
it("should remove embedding", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
index = upsertEmbedding(index, "learn_1", [0.1, 0.2, 0.3]);
|
||||
index = upsertEmbedding(index, "learn_2", [0.4, 0.5, 0.6]);
|
||||
|
||||
index = removeEmbedding(index, "learn_1");
|
||||
|
||||
expect(hasEmbedding(index, "learn_1")).toBe(false);
|
||||
expect(hasEmbedding(index, "learn_2")).toBe(true);
|
||||
});
|
||||
|
||||
it("should return null for missing embedding", () => {
|
||||
const index = createEmptyIndex("test-model");
|
||||
|
||||
expect(getEmbedding(index, "nonexistent")).toBeNull();
|
||||
});
|
||||
|
||||
it("should track index stats", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
index = upsertEmbedding(index, "learn_1", [0.1, 0.2, 0.3]);
|
||||
index = upsertEmbedding(index, "learn_2", [0.4, 0.5, 0.6]);
|
||||
|
||||
const stats = getIndexStats(index);
|
||||
|
||||
expect(stats.count).toBe(2);
|
||||
expect(stats.model).toBe("test-model");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Similarity Search", () => {
|
||||
it("should find similar embeddings", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
|
||||
// Add embeddings with known similarities
|
||||
index = upsertEmbedding(index, "a", [1, 0, 0]);
|
||||
index = upsertEmbedding(index, "b", [0.9, 0.1, 0]);
|
||||
index = upsertEmbedding(index, "c", [0, 1, 0]);
|
||||
|
||||
const query = [1, 0, 0];
|
||||
const results = findSimilar(index, query, 2, 0);
|
||||
|
||||
expect(results).toHaveLength(2);
|
||||
expect(results[0].id).toBe("a");
|
||||
expect(results[0].score).toBeCloseTo(1);
|
||||
expect(results[1].id).toBe("b");
|
||||
});
|
||||
|
||||
it("should respect minSimilarity threshold", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
|
||||
index = upsertEmbedding(index, "a", [1, 0, 0]);
|
||||
index = upsertEmbedding(index, "b", [0, 1, 0]);
|
||||
|
||||
const query = [1, 0, 0];
|
||||
const results = findSimilar(index, query, 10, 0.5);
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].id).toBe("a");
|
||||
});
|
||||
|
||||
it("should limit results to topK", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const embedding = [Math.random(), Math.random(), Math.random()];
|
||||
index = upsertEmbedding(index, `learn_${i}`, embedding);
|
||||
}
|
||||
|
||||
const query = [0.5, 0.5, 0.5];
|
||||
const results = findSimilar(index, query, 3, 0);
|
||||
|
||||
expect(results.length).toBeLessThanOrEqual(3);
|
||||
});
|
||||
|
||||
it("should find all above threshold", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
|
||||
index = upsertEmbedding(index, "a", [1, 0, 0]);
|
||||
index = upsertEmbedding(index, "b", [0.95, 0.05, 0]);
|
||||
index = upsertEmbedding(index, "c", [0.9, 0.1, 0]);
|
||||
index = upsertEmbedding(index, "d", [0, 1, 0]);
|
||||
|
||||
const query = [1, 0, 0];
|
||||
const results = findAboveThreshold(index, query, 0.85);
|
||||
|
||||
expect(results.length).toBe(3);
|
||||
expect(results.map((r) => r.id)).toContain("a");
|
||||
expect(results.map((r) => r.id)).toContain("b");
|
||||
expect(results.map((r) => r.id)).toContain("c");
|
||||
});
|
||||
|
||||
it("should return empty array for no matches", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
|
||||
index = upsertEmbedding(index, "a", [1, 0, 0]);
|
||||
|
||||
const query = [-1, 0, 0];
|
||||
const results = findSimilar(index, query, 10, 0.5);
|
||||
|
||||
expect(results).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("should handle empty index", () => {
|
||||
const index = createEmptyIndex("test-model");
|
||||
const query = [1, 0, 0];
|
||||
const results = findSimilar(index, query, 10, 0);
|
||||
|
||||
expect(results).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,152 +0,0 @@
|
||||
/**
|
||||
* Unit tests for Bash Pattern Matcher
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "bun:test";
|
||||
|
||||
import {
|
||||
matchesBashPattern,
|
||||
isBashAllowedByIndex,
|
||||
findMatchingBashPatterns,
|
||||
generateBashPattern,
|
||||
extractCommandPrefix,
|
||||
} from "@services/permissions/matchers/bash";
|
||||
import { buildPatternIndex } from "@services/permissions/pattern-index";
|
||||
import type { PermissionPattern } from "@/types/permissions";
|
||||
|
||||
describe("Bash Pattern Matcher", () => {
|
||||
describe("matchesBashPattern", () => {
|
||||
it("should match exact command with wildcard args", () => {
|
||||
const pattern: PermissionPattern = {
|
||||
tool: "Bash",
|
||||
command: "git",
|
||||
args: "*",
|
||||
};
|
||||
|
||||
expect(matchesBashPattern("git", pattern)).toBe(true);
|
||||
expect(matchesBashPattern("git status", pattern)).toBe(true);
|
||||
expect(matchesBashPattern("git commit -m 'msg'", pattern)).toBe(true);
|
||||
});
|
||||
|
||||
it("should not match different command", () => {
|
||||
const pattern: PermissionPattern = {
|
||||
tool: "Bash",
|
||||
command: "git",
|
||||
args: "*",
|
||||
};
|
||||
|
||||
expect(matchesBashPattern("npm install", pattern)).toBe(false);
|
||||
expect(matchesBashPattern("gitx status", pattern)).toBe(false);
|
||||
});
|
||||
|
||||
it("should match command with specific args prefix", () => {
|
||||
const pattern: PermissionPattern = {
|
||||
tool: "Bash",
|
||||
command: "git",
|
||||
args: "status*",
|
||||
};
|
||||
|
||||
expect(matchesBashPattern("git status", pattern)).toBe(true);
|
||||
expect(matchesBashPattern("git status --short", pattern)).toBe(true);
|
||||
expect(matchesBashPattern("git commit", pattern)).toBe(false);
|
||||
});
|
||||
|
||||
it("should match exact args", () => {
|
||||
const pattern: PermissionPattern = {
|
||||
tool: "Bash",
|
||||
command: "npm",
|
||||
args: "install",
|
||||
};
|
||||
|
||||
expect(matchesBashPattern("npm install", pattern)).toBe(true);
|
||||
expect(matchesBashPattern("npm install lodash", pattern)).toBe(false);
|
||||
});
|
||||
|
||||
it("should reject non-Bash patterns", () => {
|
||||
const pattern: PermissionPattern = {
|
||||
tool: "Read",
|
||||
path: "*",
|
||||
};
|
||||
|
||||
expect(matchesBashPattern("ls", pattern)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isBashAllowedByIndex", () => {
|
||||
it("should check against index patterns", () => {
|
||||
const index = buildPatternIndex(["Bash(git:*)", "Bash(npm install:*)"]);
|
||||
|
||||
expect(isBashAllowedByIndex("git status", index)).toBe(true);
|
||||
expect(isBashAllowedByIndex("git commit", index)).toBe(true);
|
||||
expect(isBashAllowedByIndex("npm install lodash", index)).toBe(true);
|
||||
expect(isBashAllowedByIndex("npm run build", index)).toBe(false);
|
||||
expect(isBashAllowedByIndex("rm -rf /", index)).toBe(false);
|
||||
});
|
||||
|
||||
it("should return false for empty index", () => {
|
||||
const index = buildPatternIndex([]);
|
||||
|
||||
expect(isBashAllowedByIndex("git status", index)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("findMatchingBashPatterns", () => {
|
||||
it("should find all matching patterns", () => {
|
||||
const index = buildPatternIndex([
|
||||
"Bash(git:*)",
|
||||
"Bash(git status:*)",
|
||||
"Bash(npm:*)",
|
||||
]);
|
||||
|
||||
const matches = findMatchingBashPatterns("git status", index);
|
||||
|
||||
expect(matches.length).toBe(2);
|
||||
expect(matches.map((m) => m.raw)).toContain("Bash(git:*)");
|
||||
expect(matches.map((m) => m.raw)).toContain("Bash(git status:*)");
|
||||
});
|
||||
|
||||
it("should return empty for no matches", () => {
|
||||
const index = buildPatternIndex(["Bash(git:*)"]);
|
||||
|
||||
const matches = findMatchingBashPatterns("npm install", index);
|
||||
|
||||
expect(matches).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("generateBashPattern", () => {
|
||||
it("should generate pattern for multi-word commands", () => {
|
||||
expect(generateBashPattern("git status")).toBe("Bash(git status:*)");
|
||||
expect(generateBashPattern("npm install lodash")).toBe(
|
||||
"Bash(npm install:*)",
|
||||
);
|
||||
expect(generateBashPattern("docker run nginx")).toBe(
|
||||
"Bash(docker run:*)",
|
||||
);
|
||||
});
|
||||
|
||||
it("should generate pattern for single commands", () => {
|
||||
expect(generateBashPattern("ls")).toBe("Bash(ls:*)");
|
||||
expect(generateBashPattern("pwd")).toBe("Bash(pwd:*)");
|
||||
});
|
||||
|
||||
it("should handle commands with many args", () => {
|
||||
expect(generateBashPattern("git commit -m 'message'")).toBe(
|
||||
"Bash(git commit:*)",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractCommandPrefix", () => {
|
||||
it("should extract multi-word prefix", () => {
|
||||
expect(extractCommandPrefix("git status")).toBe("git status");
|
||||
expect(extractCommandPrefix("npm install lodash")).toBe("npm install");
|
||||
expect(extractCommandPrefix("bun test --watch")).toBe("bun test");
|
||||
});
|
||||
|
||||
it("should extract single word for non-recognized commands", () => {
|
||||
expect(extractCommandPrefix("ls -la")).toBe("ls");
|
||||
expect(extractCommandPrefix("cat file.txt")).toBe("cat");
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,158 +0,0 @@
|
||||
/**
|
||||
* Unit tests for Path Pattern Matcher
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "bun:test";
|
||||
|
||||
import {
|
||||
matchesPathPattern,
|
||||
matchesFilePattern,
|
||||
isFileOpAllowedByIndex,
|
||||
findMatchingFilePatterns,
|
||||
generateFilePattern,
|
||||
normalizePath,
|
||||
isPathInDirectory,
|
||||
} from "@services/permissions/matchers/path";
|
||||
import { buildPatternIndex } from "@services/permissions/pattern-index";
|
||||
import type { PermissionPattern } from "@/types/permissions";
|
||||
|
||||
describe("Path Pattern Matcher", () => {
|
||||
describe("matchesPathPattern", () => {
|
||||
it("should match wildcard pattern", () => {
|
||||
expect(matchesPathPattern("/any/path/file.ts", "*")).toBe(true);
|
||||
expect(matchesPathPattern("relative/file.js", "*")).toBe(true);
|
||||
});
|
||||
|
||||
it("should match directory prefix pattern", () => {
|
||||
expect(matchesPathPattern("src/file.ts", "src/*")).toBe(true);
|
||||
expect(matchesPathPattern("src/nested/file.ts", "src/*")).toBe(true);
|
||||
expect(matchesPathPattern("tests/file.ts", "src/*")).toBe(false);
|
||||
});
|
||||
|
||||
it("should match extension pattern", () => {
|
||||
expect(matchesPathPattern("file.ts", "*.ts")).toBe(true);
|
||||
expect(matchesPathPattern("src/nested/file.ts", "*.ts")).toBe(true);
|
||||
expect(matchesPathPattern("file.js", "*.ts")).toBe(false);
|
||||
});
|
||||
|
||||
it("should match exact path", () => {
|
||||
expect(matchesPathPattern("src/file.ts", "src/file.ts")).toBe(true);
|
||||
expect(matchesPathPattern("src/other.ts", "src/file.ts")).toBe(false);
|
||||
});
|
||||
|
||||
it("should match substring", () => {
|
||||
expect(
|
||||
matchesPathPattern("/path/to/config/settings.json", "config"),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("matchesFilePattern", () => {
|
||||
it("should match with parsed pattern", () => {
|
||||
const pattern: PermissionPattern = {
|
||||
tool: "Read",
|
||||
path: "*.ts",
|
||||
};
|
||||
|
||||
expect(matchesFilePattern("file.ts", pattern)).toBe(true);
|
||||
expect(matchesFilePattern("file.js", pattern)).toBe(false);
|
||||
});
|
||||
|
||||
it("should return false for pattern without path", () => {
|
||||
const pattern: PermissionPattern = {
|
||||
tool: "Bash",
|
||||
command: "git",
|
||||
};
|
||||
|
||||
expect(matchesFilePattern("file.ts", pattern)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isFileOpAllowedByIndex", () => {
|
||||
it("should check Read operations", () => {
|
||||
const index = buildPatternIndex(["Read(*.ts)", "Read(src/*)"]);
|
||||
|
||||
expect(isFileOpAllowedByIndex("Read", "file.ts", index)).toBe(true);
|
||||
expect(isFileOpAllowedByIndex("Read", "src/nested.js", index)).toBe(true);
|
||||
expect(isFileOpAllowedByIndex("Read", "tests/file.js", index)).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it("should check Write operations separately", () => {
|
||||
const index = buildPatternIndex(["Read(*)", "Write(src/*)"]);
|
||||
|
||||
expect(isFileOpAllowedByIndex("Read", "any/file.ts", index)).toBe(true);
|
||||
expect(isFileOpAllowedByIndex("Write", "any/file.ts", index)).toBe(false);
|
||||
expect(isFileOpAllowedByIndex("Write", "src/file.ts", index)).toBe(true);
|
||||
});
|
||||
|
||||
it("should return false for empty index", () => {
|
||||
const index = buildPatternIndex([]);
|
||||
|
||||
expect(isFileOpAllowedByIndex("Read", "file.ts", index)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("findMatchingFilePatterns", () => {
|
||||
it("should find all matching patterns", () => {
|
||||
const index = buildPatternIndex(["Read(*)", "Read(*.ts)", "Read(src/*)"]);
|
||||
|
||||
const matches = findMatchingFilePatterns("Read", "src/file.ts", index);
|
||||
|
||||
expect(matches.length).toBe(3);
|
||||
});
|
||||
|
||||
it("should return empty for no matches", () => {
|
||||
const index = buildPatternIndex(["Read(src/*)"]);
|
||||
|
||||
const matches = findMatchingFilePatterns("Read", "tests/file.ts", index);
|
||||
|
||||
expect(matches).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("generateFilePattern", () => {
|
||||
it("should generate extension-based pattern for common extensions", () => {
|
||||
expect(generateFilePattern("Read", "file.ts")).toBe("Read(*.ts)");
|
||||
expect(generateFilePattern("Write", "file.json")).toBe("Write(*.json)");
|
||||
expect(generateFilePattern("Edit", "file.tsx")).toBe("Edit(*.tsx)");
|
||||
});
|
||||
|
||||
it("should generate directory-based pattern when appropriate", () => {
|
||||
expect(generateFilePattern("Read", "src/file.xyz")).toBe("Read(src/*)");
|
||||
});
|
||||
|
||||
it("should fall back to basename", () => {
|
||||
expect(generateFilePattern("Read", "Makefile")).toBe("Read(Makefile)");
|
||||
});
|
||||
});
|
||||
|
||||
describe("normalizePath", () => {
|
||||
it("should normalize path separators", () => {
|
||||
expect(normalizePath("src/file.ts")).toBe("src/file.ts");
|
||||
expect(normalizePath("src//file.ts")).toBe("src/file.ts");
|
||||
expect(normalizePath("./src/file.ts")).toBe("src/file.ts");
|
||||
});
|
||||
});
|
||||
|
||||
describe("isPathInDirectory", () => {
|
||||
it("should check if path is in directory", () => {
|
||||
expect(isPathInDirectory("/project/src/file.ts", "/project/src")).toBe(
|
||||
true,
|
||||
);
|
||||
expect(
|
||||
isPathInDirectory("/project/src/nested/file.ts", "/project/src"),
|
||||
).toBe(true);
|
||||
expect(isPathInDirectory("/project/tests/file.ts", "/project/src")).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it("should not match partial directory names", () => {
|
||||
expect(
|
||||
isPathInDirectory("/project/src-backup/file.ts", "/project/src"),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,186 +0,0 @@
|
||||
/**
|
||||
* Unit tests for Permission Pattern Index
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "bun:test";
|
||||
|
||||
import {
|
||||
createPatternIndex,
|
||||
buildPatternIndex,
|
||||
addToIndex,
|
||||
removeFromIndex,
|
||||
getPatternsForTool,
|
||||
hasPattern,
|
||||
getRawPatterns,
|
||||
mergeIndexes,
|
||||
getIndexStats,
|
||||
} from "@services/permissions/pattern-index";
|
||||
|
||||
describe("Permission Pattern Index", () => {
|
||||
describe("createPatternIndex", () => {
|
||||
it("should create empty index", () => {
|
||||
const index = createPatternIndex();
|
||||
|
||||
expect(index.all).toHaveLength(0);
|
||||
expect(index.byTool.size).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildPatternIndex", () => {
|
||||
it("should build index from patterns", () => {
|
||||
const patterns = [
|
||||
"Bash(git:*)",
|
||||
"Bash(npm install:*)",
|
||||
"Read(*)",
|
||||
"Write(src/*)",
|
||||
];
|
||||
|
||||
const index = buildPatternIndex(patterns);
|
||||
|
||||
expect(index.all).toHaveLength(4);
|
||||
expect(index.byTool.get("Bash")).toHaveLength(2);
|
||||
expect(index.byTool.get("Read")).toHaveLength(1);
|
||||
expect(index.byTool.get("Write")).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("should skip invalid patterns", () => {
|
||||
const patterns = ["Bash(git:*)", "invalid pattern", "Read(*)"];
|
||||
|
||||
const index = buildPatternIndex(patterns);
|
||||
|
||||
expect(index.all).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("should handle empty array", () => {
|
||||
const index = buildPatternIndex([]);
|
||||
|
||||
expect(index.all).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("addToIndex", () => {
|
||||
it("should add pattern to index", () => {
|
||||
let index = createPatternIndex();
|
||||
index = addToIndex(index, "Bash(git:*)");
|
||||
|
||||
expect(index.all).toHaveLength(1);
|
||||
expect(hasPattern(index, "Bash(git:*)")).toBe(true);
|
||||
});
|
||||
|
||||
it("should not duplicate patterns", () => {
|
||||
let index = buildPatternIndex(["Bash(git:*)"]);
|
||||
index = addToIndex(index, "Bash(git:*)");
|
||||
|
||||
expect(index.all).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("should add to correct tool bucket", () => {
|
||||
let index = createPatternIndex();
|
||||
index = addToIndex(index, "Read(src/*)");
|
||||
|
||||
expect(getPatternsForTool(index, "Read")).toHaveLength(1);
|
||||
expect(getPatternsForTool(index, "Bash")).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("removeFromIndex", () => {
|
||||
it("should remove pattern from index", () => {
|
||||
let index = buildPatternIndex(["Bash(git:*)", "Read(*)"]);
|
||||
index = removeFromIndex(index, "Bash(git:*)");
|
||||
|
||||
expect(index.all).toHaveLength(1);
|
||||
expect(hasPattern(index, "Bash(git:*)")).toBe(false);
|
||||
expect(hasPattern(index, "Read(*)")).toBe(true);
|
||||
});
|
||||
|
||||
it("should handle non-existent pattern", () => {
|
||||
const index = buildPatternIndex(["Bash(git:*)"]);
|
||||
const result = removeFromIndex(index, "Read(*)");
|
||||
|
||||
expect(result.all).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getPatternsForTool", () => {
|
||||
it("should return patterns for specific tool", () => {
|
||||
const index = buildPatternIndex([
|
||||
"Bash(git:*)",
|
||||
"Bash(npm:*)",
|
||||
"Read(*)",
|
||||
]);
|
||||
|
||||
const bashPatterns = getPatternsForTool(index, "Bash");
|
||||
const readPatterns = getPatternsForTool(index, "Read");
|
||||
const writePatterns = getPatternsForTool(index, "Write");
|
||||
|
||||
expect(bashPatterns).toHaveLength(2);
|
||||
expect(readPatterns).toHaveLength(1);
|
||||
expect(writePatterns).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getRawPatterns", () => {
|
||||
it("should return all raw pattern strings", () => {
|
||||
const patterns = ["Bash(git:*)", "Read(*)"];
|
||||
const index = buildPatternIndex(patterns);
|
||||
|
||||
const raw = getRawPatterns(index);
|
||||
|
||||
expect(raw).toEqual(patterns);
|
||||
});
|
||||
});
|
||||
|
||||
describe("mergeIndexes", () => {
|
||||
it("should merge multiple indexes", () => {
|
||||
const index1 = buildPatternIndex(["Bash(git:*)"]);
|
||||
const index2 = buildPatternIndex(["Read(*)"]);
|
||||
const index3 = buildPatternIndex(["Write(src/*)"]);
|
||||
|
||||
const merged = mergeIndexes(index1, index2, index3);
|
||||
|
||||
expect(merged.all).toHaveLength(3);
|
||||
expect(getPatternsForTool(merged, "Bash")).toHaveLength(1);
|
||||
expect(getPatternsForTool(merged, "Read")).toHaveLength(1);
|
||||
expect(getPatternsForTool(merged, "Write")).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("should preserve duplicates from different indexes", () => {
|
||||
const index1 = buildPatternIndex(["Bash(git:*)"]);
|
||||
const index2 = buildPatternIndex(["Bash(git:*)"]);
|
||||
|
||||
const merged = mergeIndexes(index1, index2);
|
||||
|
||||
// Duplicates preserved (session might override global)
|
||||
expect(merged.all).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("should handle empty indexes", () => {
|
||||
const index1 = createPatternIndex();
|
||||
const index2 = buildPatternIndex(["Read(*)"]);
|
||||
|
||||
const merged = mergeIndexes(index1, index2);
|
||||
|
||||
expect(merged.all).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getIndexStats", () => {
|
||||
it("should return correct statistics", () => {
|
||||
const index = buildPatternIndex([
|
||||
"Bash(git:*)",
|
||||
"Bash(npm:*)",
|
||||
"Read(*)",
|
||||
"Write(src/*)",
|
||||
"Edit(*.ts)",
|
||||
]);
|
||||
|
||||
const stats = getIndexStats(index);
|
||||
|
||||
expect(stats.total).toBe(5);
|
||||
expect(stats.byTool["Bash"]).toBe(2);
|
||||
expect(stats.byTool["Read"]).toBe(1);
|
||||
expect(stats.byTool["Write"]).toBe(1);
|
||||
expect(stats.byTool["Edit"]).toBe(1);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,427 +0,0 @@
|
||||
/**
|
||||
* Unit tests for Memory Selection Layer
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "bun:test";
|
||||
|
||||
import {
|
||||
selectRelevantMemories,
|
||||
computeRelevance,
|
||||
computeMandatoryItems,
|
||||
createMemoryItem,
|
||||
createQueryContext,
|
||||
createMemoryStore,
|
||||
addMemory,
|
||||
findMemoriesByType,
|
||||
findMemoriesByPath,
|
||||
pruneOldMemories,
|
||||
} from "../memory-selection";
|
||||
|
||||
import type {
|
||||
MemoryItem,
|
||||
QueryContext,
|
||||
SelectionInput,
|
||||
} from "@src/types/reasoning";
|
||||
|
||||
describe("Memory Selection Layer", () => {
|
||||
const createTestMemory = (
|
||||
content: string,
|
||||
type: MemoryItem["type"] = "CONVERSATION",
|
||||
options: Partial<MemoryItem> = {},
|
||||
): MemoryItem => ({
|
||||
id: `mem_${Math.random().toString(36).slice(2)}`,
|
||||
content,
|
||||
tokens: content.toLowerCase().split(/\s+/),
|
||||
entities: [],
|
||||
timestamp: Date.now(),
|
||||
type,
|
||||
causalLinks: [],
|
||||
tokenCount: Math.ceil(content.length * 0.25),
|
||||
...options,
|
||||
});
|
||||
|
||||
describe("computeRelevance", () => {
|
||||
it("should score higher for keyword overlap", () => {
|
||||
const memory = createTestMemory(
|
||||
"The function handles database queries efficiently",
|
||||
);
|
||||
const queryHighOverlap = createQueryContext(
|
||||
"database query optimization",
|
||||
{},
|
||||
);
|
||||
const queryLowOverlap = createQueryContext("user interface design", {});
|
||||
|
||||
const highScore = computeRelevance(memory, queryHighOverlap);
|
||||
const lowScore = computeRelevance(memory, queryLowOverlap);
|
||||
|
||||
expect(highScore.total).toBeGreaterThan(lowScore.total);
|
||||
});
|
||||
|
||||
it("should score higher for recent memories", () => {
|
||||
const recentMemory = createTestMemory("Recent content", "CONVERSATION", {
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
const oldMemory = createTestMemory("Old content", "CONVERSATION", {
|
||||
timestamp: Date.now() - 3600000, // 1 hour ago
|
||||
});
|
||||
|
||||
const query = createQueryContext("content search", {});
|
||||
|
||||
const recentScore = computeRelevance(recentMemory, query);
|
||||
const oldScore = computeRelevance(oldMemory, query);
|
||||
|
||||
expect(recentScore.breakdown.recency).toBeGreaterThan(
|
||||
oldScore.breakdown.recency,
|
||||
);
|
||||
});
|
||||
|
||||
it("should give type bonus to ERROR type", () => {
|
||||
const errorMemory = createTestMemory("Error: connection failed", "ERROR");
|
||||
const conversationMemory = createTestMemory(
|
||||
"Error: connection failed",
|
||||
"CONVERSATION",
|
||||
);
|
||||
|
||||
const query = createQueryContext("error handling", {});
|
||||
|
||||
const errorScore = computeRelevance(errorMemory, query);
|
||||
const convScore = computeRelevance(conversationMemory, query);
|
||||
|
||||
expect(errorScore.breakdown.typeBonus).toBeGreaterThan(
|
||||
convScore.breakdown.typeBonus,
|
||||
);
|
||||
});
|
||||
|
||||
it("should score causal links", () => {
|
||||
const linkedMemory = createTestMemory("Linked memory", "CONVERSATION", {
|
||||
causalLinks: ["active_item_1"],
|
||||
});
|
||||
const unlinkedMemory = createTestMemory(
|
||||
"Unlinked memory",
|
||||
"CONVERSATION",
|
||||
{
|
||||
causalLinks: [],
|
||||
},
|
||||
);
|
||||
|
||||
const query = createQueryContext("test", {
|
||||
activeItems: ["active_item_1"],
|
||||
});
|
||||
|
||||
const linkedScore = computeRelevance(linkedMemory, query);
|
||||
const unlinkedScore = computeRelevance(unlinkedMemory, query);
|
||||
|
||||
expect(linkedScore.breakdown.causalLink).toBe(1);
|
||||
expect(unlinkedScore.breakdown.causalLink).toBe(0);
|
||||
});
|
||||
|
||||
it("should score path overlap", () => {
|
||||
const memoryWithPath = createTestMemory("File content", "FILE_CONTENT", {
|
||||
filePaths: ["/src/services/agent.ts"],
|
||||
});
|
||||
|
||||
const queryMatchingPath = createQueryContext("agent implementation", {
|
||||
activePaths: ["/src/services/agent.ts"],
|
||||
});
|
||||
|
||||
const queryDifferentPath = createQueryContext("agent implementation", {
|
||||
activePaths: ["/src/utils/helpers.ts"],
|
||||
});
|
||||
|
||||
const matchingScore = computeRelevance(memoryWithPath, queryMatchingPath);
|
||||
const differentScore = computeRelevance(
|
||||
memoryWithPath,
|
||||
queryDifferentPath,
|
||||
);
|
||||
|
||||
expect(matchingScore.breakdown.pathOverlap).toBeGreaterThan(
|
||||
differentScore.breakdown.pathOverlap,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("selectRelevantMemories", () => {
|
||||
it("should select memories within token budget", () => {
|
||||
const memories = [
|
||||
createTestMemory("First memory content here", "CONVERSATION", {
|
||||
tokenCount: 100,
|
||||
}),
|
||||
createTestMemory("Second memory content here", "CONVERSATION", {
|
||||
tokenCount: 100,
|
||||
}),
|
||||
createTestMemory("Third memory content here", "CONVERSATION", {
|
||||
tokenCount: 100,
|
||||
}),
|
||||
];
|
||||
|
||||
const input: SelectionInput = {
|
||||
memories,
|
||||
query: createQueryContext("memory content", {}),
|
||||
tokenBudget: 250,
|
||||
mandatoryItems: [],
|
||||
};
|
||||
|
||||
const result = selectRelevantMemories(input);
|
||||
|
||||
expect(result.tokenUsage).toBeLessThanOrEqual(250);
|
||||
});
|
||||
|
||||
it("should always include mandatory items", () => {
|
||||
const memories = [
|
||||
createTestMemory("Important memory", "CONVERSATION", {
|
||||
id: "mandatory_1",
|
||||
}),
|
||||
createTestMemory("Irrelevant memory about cooking", "CONVERSATION"),
|
||||
];
|
||||
|
||||
const input: SelectionInput = {
|
||||
memories,
|
||||
query: createQueryContext("completely unrelated topic", {}),
|
||||
tokenBudget: 1000,
|
||||
mandatoryItems: ["mandatory_1"],
|
||||
};
|
||||
|
||||
const result = selectRelevantMemories(input);
|
||||
|
||||
expect(result.selected.some((m) => m.id === "mandatory_1")).toBe(true);
|
||||
});
|
||||
|
||||
it("should exclude low relevance items", () => {
|
||||
const memories = [
|
||||
createTestMemory(
|
||||
"Highly relevant database query optimization",
|
||||
"CONVERSATION",
|
||||
),
|
||||
createTestMemory(
|
||||
"xyz abc def completely unrelated topic",
|
||||
"CONVERSATION",
|
||||
),
|
||||
];
|
||||
|
||||
const input: SelectionInput = {
|
||||
memories,
|
||||
query: createQueryContext("database query optimization", {}),
|
||||
tokenBudget: 1000,
|
||||
mandatoryItems: [],
|
||||
};
|
||||
|
||||
const result = selectRelevantMemories(input);
|
||||
|
||||
// At least one memory should be selected (the relevant one)
|
||||
expect(result.selected.length).toBeGreaterThanOrEqual(1);
|
||||
// The first (relevant) memory should be selected
|
||||
expect(result.selected.some((m) => m.content.includes("database"))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it("should return scores for all selected items", () => {
|
||||
const memories = [
|
||||
createTestMemory("First memory", "CONVERSATION", { id: "mem_1" }),
|
||||
createTestMemory("Second memory", "CONVERSATION", { id: "mem_2" }),
|
||||
];
|
||||
|
||||
const input: SelectionInput = {
|
||||
memories,
|
||||
query: createQueryContext("memory", {}),
|
||||
tokenBudget: 1000,
|
||||
mandatoryItems: [],
|
||||
};
|
||||
|
||||
const result = selectRelevantMemories(input);
|
||||
|
||||
for (const selected of result.selected) {
|
||||
expect(result.scores.has(selected.id)).toBe(true);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("computeMandatoryItems", () => {
|
||||
it("should include recent memories", () => {
|
||||
const now = Date.now();
|
||||
const memories = [
|
||||
createTestMemory("Recent", "CONVERSATION", {
|
||||
id: "recent",
|
||||
timestamp: now,
|
||||
}),
|
||||
createTestMemory("Old", "CONVERSATION", {
|
||||
id: "old",
|
||||
timestamp: now - 600000,
|
||||
}),
|
||||
];
|
||||
|
||||
const mandatory = computeMandatoryItems(memories, now);
|
||||
|
||||
expect(mandatory).toContain("recent");
|
||||
});
|
||||
|
||||
it("should include recent error memories", () => {
|
||||
const now = Date.now();
|
||||
const memories = [
|
||||
createTestMemory("Error occurred", "ERROR", {
|
||||
id: "error_1",
|
||||
timestamp: now - 300000, // 5 minutes ago
|
||||
}),
|
||||
];
|
||||
|
||||
const mandatory = computeMandatoryItems(memories, now);
|
||||
|
||||
expect(mandatory).toContain("error_1");
|
||||
});
|
||||
|
||||
it("should include decision memories", () => {
|
||||
const now = Date.now();
|
||||
const memories = [
|
||||
createTestMemory("Decided to use TypeScript", "DECISION", {
|
||||
id: "decision_1",
|
||||
}),
|
||||
createTestMemory("Decided to use React", "DECISION", {
|
||||
id: "decision_2",
|
||||
}),
|
||||
createTestMemory("Decided to use Bun", "DECISION", {
|
||||
id: "decision_3",
|
||||
}),
|
||||
createTestMemory("Decided to use Zustand", "DECISION", {
|
||||
id: "decision_4",
|
||||
}),
|
||||
];
|
||||
|
||||
const mandatory = computeMandatoryItems(memories, now);
|
||||
|
||||
// Should include last 3 decisions
|
||||
expect(mandatory).toContain("decision_2");
|
||||
expect(mandatory).toContain("decision_3");
|
||||
expect(mandatory).toContain("decision_4");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Memory Store Operations", () => {
|
||||
describe("createMemoryStore", () => {
|
||||
it("should create empty store with max items", () => {
|
||||
const store = createMemoryStore(500);
|
||||
|
||||
expect(store.items).toHaveLength(0);
|
||||
expect(store.maxItems).toBe(500);
|
||||
});
|
||||
});
|
||||
|
||||
describe("addMemory", () => {
|
||||
it("should add memory to store", () => {
|
||||
let store = createMemoryStore(100);
|
||||
const memory = createMemoryItem("Test content", "CONVERSATION");
|
||||
|
||||
store = addMemory(store, memory);
|
||||
|
||||
expect(store.items).toHaveLength(1);
|
||||
expect(store.items[0].content).toBe("Test content");
|
||||
});
|
||||
|
||||
it("should prune oldest items when exceeding max", () => {
|
||||
let store = createMemoryStore(3);
|
||||
|
||||
for (let i = 0; i < 5; i++) {
|
||||
const memory = createMemoryItem(`Memory ${i}`, "CONVERSATION");
|
||||
store = addMemory(store, memory);
|
||||
}
|
||||
|
||||
expect(store.items.length).toBeLessThanOrEqual(3);
|
||||
});
|
||||
});
|
||||
|
||||
describe("findMemoriesByType", () => {
|
||||
it("should filter by type", () => {
|
||||
let store = createMemoryStore(100);
|
||||
store = addMemory(
|
||||
store,
|
||||
createMemoryItem("Conversation", "CONVERSATION"),
|
||||
);
|
||||
store = addMemory(store, createMemoryItem("Error", "ERROR"));
|
||||
store = addMemory(
|
||||
store,
|
||||
createMemoryItem("Tool result", "TOOL_RESULT"),
|
||||
);
|
||||
|
||||
const errors = findMemoriesByType(store, "ERROR");
|
||||
|
||||
expect(errors).toHaveLength(1);
|
||||
expect(errors[0].content).toBe("Error");
|
||||
});
|
||||
});
|
||||
|
||||
describe("findMemoriesByPath", () => {
|
||||
it("should find memories by file path", () => {
|
||||
let store = createMemoryStore(100);
|
||||
store = addMemory(store, {
|
||||
...createMemoryItem("File content", "FILE_CONTENT"),
|
||||
filePaths: ["/src/services/agent.ts"],
|
||||
});
|
||||
store = addMemory(store, {
|
||||
...createMemoryItem("Other file", "FILE_CONTENT"),
|
||||
filePaths: ["/src/utils/helpers.ts"],
|
||||
});
|
||||
|
||||
const results = findMemoriesByPath(store, "agent.ts");
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].content).toBe("File content");
|
||||
});
|
||||
});
|
||||
|
||||
describe("pruneOldMemories", () => {
|
||||
it("should remove memories older than threshold", () => {
|
||||
const now = Date.now();
|
||||
let store = createMemoryStore(100);
|
||||
|
||||
store = addMemory(store, {
|
||||
...createMemoryItem("Recent", "CONVERSATION"),
|
||||
timestamp: now,
|
||||
});
|
||||
store = addMemory(store, {
|
||||
...createMemoryItem("Old", "CONVERSATION"),
|
||||
timestamp: now - 7200000, // 2 hours ago
|
||||
});
|
||||
|
||||
const pruned = pruneOldMemories(store, 3600000); // 1 hour threshold
|
||||
|
||||
expect(pruned.items).toHaveLength(1);
|
||||
expect(pruned.items[0].content).toBe("Recent");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("createMemoryItem", () => {
|
||||
it("should create memory with correct structure", () => {
|
||||
const memory = createMemoryItem("Test content", "CONVERSATION", {
|
||||
filePaths: ["/test.ts"],
|
||||
causalLinks: ["prev_memory"],
|
||||
});
|
||||
|
||||
expect(memory.content).toBe("Test content");
|
||||
expect(memory.type).toBe("CONVERSATION");
|
||||
expect(memory.filePaths).toContain("/test.ts");
|
||||
expect(memory.causalLinks).toContain("prev_memory");
|
||||
expect(memory.tokenCount).toBeGreaterThan(0);
|
||||
expect(memory.id).toMatch(/^mem_/);
|
||||
});
|
||||
|
||||
it("should tokenize content", () => {
|
||||
const memory = createMemoryItem("Hello world test", "CONVERSATION");
|
||||
|
||||
expect(memory.tokens.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("createQueryContext", () => {
|
||||
it("should create query context with tokens", () => {
|
||||
const context = createQueryContext("database query optimization", {
|
||||
activePaths: ["/src/db.ts"],
|
||||
activeItems: ["item_1"],
|
||||
});
|
||||
|
||||
expect(context.tokens.length).toBeGreaterThan(0);
|
||||
expect(context.activePaths).toContain("/src/db.ts");
|
||||
expect(context.activeItems).toContain("item_1");
|
||||
expect(context.timestamp).toBeDefined();
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,276 +0,0 @@
|
||||
/**
|
||||
* Unit tests for Quality Evaluation Layer
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "bun:test";
|
||||
|
||||
import {
|
||||
evaluateQuality,
|
||||
computeVerdict,
|
||||
hasHallucinationMarkers,
|
||||
hasContradiction,
|
||||
} from "../quality-evaluation";
|
||||
|
||||
import type {
|
||||
QualityEvalInput,
|
||||
TaskConstraints,
|
||||
AttemptRecord,
|
||||
} from "@src/types/reasoning";
|
||||
|
||||
describe("Quality Evaluation Layer", () => {
|
||||
const createDefaultInput = (
|
||||
overrides: Partial<QualityEvalInput> = {},
|
||||
): QualityEvalInput => ({
|
||||
responseText: "Here is the solution to your problem.",
|
||||
responseToolCalls: [],
|
||||
expectedType: "text",
|
||||
queryTokens: ["solution", "problem"],
|
||||
queryEntities: [],
|
||||
previousAttempts: [],
|
||||
taskConstraints: {
|
||||
requiredOutputs: [],
|
||||
expectedToolCalls: [],
|
||||
maxResponseTokens: 4000,
|
||||
requiresCode: false,
|
||||
},
|
||||
...overrides,
|
||||
});
|
||||
|
||||
describe("evaluateQuality", () => {
|
||||
it("should accept a high-quality text response", () => {
|
||||
const input = createDefaultInput({
|
||||
responseText:
|
||||
"Here is the solution to your problem. I've analyzed the issue and found the root cause.",
|
||||
queryTokens: ["solution", "problem", "analyze", "issue"],
|
||||
});
|
||||
|
||||
const result = evaluateQuality(input);
|
||||
|
||||
expect(result.score).toBeGreaterThan(0.5);
|
||||
expect(result.verdict).toBe("ACCEPT");
|
||||
expect(result.deficiencies).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("should reject an empty response", () => {
|
||||
const input = createDefaultInput({
|
||||
responseText: "",
|
||||
responseToolCalls: [],
|
||||
});
|
||||
|
||||
const result = evaluateQuality(input);
|
||||
|
||||
expect(result.verdict).not.toBe("ACCEPT");
|
||||
expect(result.deficiencies).toContain("EMPTY_RESPONSE");
|
||||
});
|
||||
|
||||
it("should detect missing tool calls when expected", () => {
|
||||
const input = createDefaultInput({
|
||||
responseText: "I will read the file now.",
|
||||
responseToolCalls: [],
|
||||
expectedType: "tool_call",
|
||||
taskConstraints: {
|
||||
requiredOutputs: [],
|
||||
expectedToolCalls: ["read"],
|
||||
maxResponseTokens: 4000,
|
||||
requiresCode: false,
|
||||
},
|
||||
});
|
||||
|
||||
const result = evaluateQuality(input);
|
||||
|
||||
expect(result.deficiencies).toContain("MISSING_TOOL_CALL");
|
||||
});
|
||||
|
||||
it("should accept response with tool calls when expected", () => {
|
||||
const input = createDefaultInput({
|
||||
responseText: "Let me read that file.",
|
||||
responseToolCalls: [
|
||||
{ id: "1", name: "read", arguments: { path: "/test.ts" } },
|
||||
],
|
||||
expectedType: "tool_call",
|
||||
taskConstraints: {
|
||||
requiredOutputs: [],
|
||||
expectedToolCalls: ["read"],
|
||||
maxResponseTokens: 4000,
|
||||
requiresCode: false,
|
||||
},
|
||||
});
|
||||
|
||||
const result = evaluateQuality(input);
|
||||
|
||||
expect(result.score).toBeGreaterThan(0.5);
|
||||
});
|
||||
|
||||
it("should detect query mismatch", () => {
|
||||
const input = createDefaultInput({
|
||||
responseText: "The weather today is sunny and warm.",
|
||||
queryTokens: ["database", "migration", "schema", "postgresql"],
|
||||
});
|
||||
|
||||
const result = evaluateQuality(input);
|
||||
|
||||
// With no token overlap, relevance should be lower than perfect match
|
||||
expect(result.metrics.relevance).toBeLessThan(1);
|
||||
});
|
||||
|
||||
it("should detect incomplete code when required", () => {
|
||||
const input = createDefaultInput({
|
||||
responseText: "Here is some text without any code.",
|
||||
taskConstraints: {
|
||||
requiredOutputs: [],
|
||||
expectedToolCalls: [],
|
||||
maxResponseTokens: 4000,
|
||||
requiresCode: true,
|
||||
codeLanguage: "typescript",
|
||||
},
|
||||
});
|
||||
|
||||
const result = evaluateQuality(input);
|
||||
|
||||
expect(result.deficiencies).toContain("INCOMPLETE_CODE");
|
||||
});
|
||||
|
||||
it("should accept valid code block when required", () => {
|
||||
const input = createDefaultInput({
|
||||
responseText:
|
||||
"Here is the function:\n\n```typescript\nfunction add(a: number, b: number): number {\n return a + b;\n}\n```",
|
||||
taskConstraints: {
|
||||
requiredOutputs: [],
|
||||
expectedToolCalls: [],
|
||||
maxResponseTokens: 4000,
|
||||
requiresCode: true,
|
||||
codeLanguage: "typescript",
|
||||
},
|
||||
});
|
||||
|
||||
const result = evaluateQuality(input);
|
||||
|
||||
expect(result.deficiencies).not.toContain("INCOMPLETE_CODE");
|
||||
expect(result.deficiencies).not.toContain("WRONG_LANGUAGE");
|
||||
});
|
||||
});
|
||||
|
||||
describe("computeVerdict", () => {
|
||||
it("should return ACCEPT for score >= 0.70", () => {
|
||||
expect(computeVerdict(0.7)).toBe("ACCEPT");
|
||||
expect(computeVerdict(0.85)).toBe("ACCEPT");
|
||||
expect(computeVerdict(1.0)).toBe("ACCEPT");
|
||||
});
|
||||
|
||||
it("should return RETRY for score between 0.40 and 0.70", () => {
|
||||
expect(computeVerdict(0.69)).toBe("RETRY");
|
||||
expect(computeVerdict(0.55)).toBe("RETRY");
|
||||
expect(computeVerdict(0.4)).toBe("RETRY");
|
||||
});
|
||||
|
||||
it("should return ESCALATE for score between 0.20 and 0.40", () => {
|
||||
expect(computeVerdict(0.39)).toBe("ESCALATE");
|
||||
expect(computeVerdict(0.3)).toBe("ESCALATE");
|
||||
expect(computeVerdict(0.2)).toBe("ESCALATE");
|
||||
});
|
||||
|
||||
it("should return ABORT for score < 0.20", () => {
|
||||
expect(computeVerdict(0.19)).toBe("ABORT");
|
||||
expect(computeVerdict(0.1)).toBe("ABORT");
|
||||
expect(computeVerdict(0)).toBe("ABORT");
|
||||
});
|
||||
});
|
||||
|
||||
describe("hasHallucinationMarkers", () => {
|
||||
it("should detect 'I don't have access' pattern", () => {
|
||||
expect(
|
||||
hasHallucinationMarkers(
|
||||
"I don't have access to the file but I'll assume...",
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("should detect 'assuming exists' pattern", () => {
|
||||
expect(
|
||||
hasHallucinationMarkers(
|
||||
"Assuming the function exists, here's how to use it",
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("should detect placeholder pattern", () => {
|
||||
expect(
|
||||
hasHallucinationMarkers("Replace [placeholder] with your value"),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("should not flag normal responses", () => {
|
||||
expect(
|
||||
hasHallucinationMarkers("Here is the implementation you requested."),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("hasContradiction", () => {
|
||||
it("should detect 'but actually' pattern", () => {
|
||||
expect(
|
||||
hasContradiction(
|
||||
"The function returns true, but actually it returns false",
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("should detect 'wait, no' pattern", () => {
|
||||
expect(
|
||||
hasContradiction(
|
||||
"It's in the utils folder. Wait, no, it's in helpers.",
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("should detect 'on second thought' pattern", () => {
|
||||
expect(
|
||||
hasContradiction(
|
||||
"Let me use forEach. On second thought, I'll use map.",
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("should not flag normal responses", () => {
|
||||
expect(
|
||||
hasContradiction(
|
||||
"The function takes two parameters and returns their sum.",
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("structural validation", () => {
|
||||
it("should detect malformed code blocks", () => {
|
||||
const input = createDefaultInput({
|
||||
responseText:
|
||||
"Here is the code:\n```typescript\nfunction test() {\n return 1;\n", // Missing closing ```
|
||||
});
|
||||
|
||||
const result = evaluateQuality(input);
|
||||
|
||||
expect(result.metrics.structural).toBeLessThan(1);
|
||||
});
|
||||
|
||||
it("should accept well-formed code blocks", () => {
|
||||
const input = createDefaultInput({
|
||||
responseText:
|
||||
"Here is the code:\n```typescript\nfunction test() {\n return 1;\n}\n```",
|
||||
});
|
||||
|
||||
const result = evaluateQuality(input);
|
||||
|
||||
expect(result.metrics.structural).toBeGreaterThan(0.5);
|
||||
});
|
||||
|
||||
it("should detect unbalanced braces", () => {
|
||||
const input = createDefaultInput({
|
||||
responseText: "The object is { name: 'test', value: { nested: true }",
|
||||
});
|
||||
|
||||
const result = evaluateQuality(input);
|
||||
|
||||
expect(result.metrics.structural).toBeLessThan(1);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,312 +0,0 @@
|
||||
/**
|
||||
* Unit tests for Retry Policy Layer
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "bun:test";
|
||||
|
||||
import {
|
||||
createInitialRetryState,
|
||||
createRetryBudget,
|
||||
computeRetryTransition,
|
||||
splitTaskDescription,
|
||||
isRetryable,
|
||||
getCurrentTier,
|
||||
getRemainingAttempts,
|
||||
} from "../retry-policy";
|
||||
|
||||
import type {
|
||||
RetryPolicyInput,
|
||||
RetryTrigger,
|
||||
DeficiencyTag,
|
||||
} from "@src/types/reasoning";
|
||||
|
||||
describe("Retry Policy Layer", () => {
|
||||
describe("createInitialRetryState", () => {
|
||||
it("should create state with INITIAL kind", () => {
|
||||
const state = createInitialRetryState();
|
||||
|
||||
expect(state.currentState.kind).toBe("INITIAL");
|
||||
expect(state.totalAttempts).toBe(0);
|
||||
expect(state.history).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("should create budget with default limits", () => {
|
||||
const state = createInitialRetryState();
|
||||
|
||||
expect(state.budget.maxTotalAttempts).toBe(12);
|
||||
expect(state.budget.maxPerTier).toBe(2);
|
||||
expect(state.budget.maxTimeMs).toBe(60000);
|
||||
});
|
||||
});
|
||||
|
||||
describe("createRetryBudget", () => {
|
||||
it("should allow overriding defaults", () => {
|
||||
const budget = createRetryBudget({
|
||||
maxTotalAttempts: 20,
|
||||
maxPerTier: 3,
|
||||
});
|
||||
|
||||
expect(budget.maxTotalAttempts).toBe(20);
|
||||
expect(budget.maxPerTier).toBe(3);
|
||||
expect(budget.maxTimeMs).toBe(60000);
|
||||
});
|
||||
});
|
||||
|
||||
describe("computeRetryTransition", () => {
|
||||
it("should transition from INITIAL to RETRY_SAME on first retry", () => {
|
||||
const state = createInitialRetryState();
|
||||
const input: RetryPolicyInput = {
|
||||
currentState: state,
|
||||
trigger: {
|
||||
event: "QUALITY_VERDICT",
|
||||
verdict: "RETRY",
|
||||
deficiencies: ["QUERY_MISMATCH"],
|
||||
},
|
||||
availableTools: ["read", "write"],
|
||||
contextBudget: 8000,
|
||||
};
|
||||
|
||||
const result = computeRetryTransition(input);
|
||||
|
||||
expect(result.nextState.currentState.kind).toBe("RETRY_SAME");
|
||||
expect(result.nextState.totalAttempts).toBe(1);
|
||||
expect(result.action.kind).toBe("RETRY");
|
||||
});
|
||||
|
||||
it("should eventually advance to next tier after repeated failures", () => {
|
||||
let state = createInitialRetryState();
|
||||
const trigger = {
|
||||
event: "QUALITY_VERDICT" as const,
|
||||
verdict: "RETRY" as const,
|
||||
deficiencies: [] as string[],
|
||||
};
|
||||
|
||||
// Run multiple iterations and verify tiers eventually change
|
||||
let sawTierChange = false;
|
||||
let lastKind = state.currentState.kind;
|
||||
|
||||
for (let i = 0; i < 8; i++) {
|
||||
const result = computeRetryTransition({
|
||||
currentState: state,
|
||||
trigger,
|
||||
availableTools: ["read"],
|
||||
contextBudget: 8000,
|
||||
});
|
||||
state = result.nextState;
|
||||
|
||||
if (
|
||||
state.currentState.kind !== lastKind &&
|
||||
state.currentState.kind !== "INITIAL"
|
||||
) {
|
||||
sawTierChange = true;
|
||||
lastKind = state.currentState.kind;
|
||||
}
|
||||
}
|
||||
|
||||
// Should have seen at least one tier change
|
||||
expect(sawTierChange).toBe(true);
|
||||
});
|
||||
|
||||
it("should exhaust after exceeding max total attempts", () => {
|
||||
const state = createInitialRetryState();
|
||||
state.budget.maxTotalAttempts = 2;
|
||||
state.totalAttempts = 2;
|
||||
|
||||
const result = computeRetryTransition({
|
||||
currentState: state,
|
||||
trigger: {
|
||||
event: "QUALITY_VERDICT",
|
||||
verdict: "RETRY",
|
||||
deficiencies: [],
|
||||
},
|
||||
availableTools: ["read"],
|
||||
contextBudget: 8000,
|
||||
});
|
||||
|
||||
expect(result.nextState.currentState.kind).toBe("EXHAUSTED");
|
||||
expect(result.action.kind).toBe("ABORT");
|
||||
});
|
||||
|
||||
it("should return REDUCE_CONTEXT transform when simplifying", () => {
|
||||
let state = createInitialRetryState();
|
||||
state.currentState = { kind: "RETRY_SAME", attempts: 2, tierAttempts: 2 };
|
||||
|
||||
const result = computeRetryTransition({
|
||||
currentState: state,
|
||||
trigger: {
|
||||
event: "QUALITY_VERDICT",
|
||||
verdict: "RETRY",
|
||||
deficiencies: [],
|
||||
},
|
||||
availableTools: ["read"],
|
||||
contextBudget: 8000,
|
||||
});
|
||||
|
||||
if (
|
||||
result.action.kind === "RETRY" &&
|
||||
result.action.transform.kind === "REDUCE_CONTEXT"
|
||||
) {
|
||||
expect(result.action.transform.delta).toBeDefined();
|
||||
}
|
||||
});
|
||||
|
||||
it("should escalate to user on permission denied errors", () => {
|
||||
const state = createInitialRetryState();
|
||||
state.currentState = {
|
||||
kind: "RETRY_ALTERNATIVE",
|
||||
attempts: 10,
|
||||
tierAttempts: 2,
|
||||
};
|
||||
|
||||
const result = computeRetryTransition({
|
||||
currentState: state,
|
||||
trigger: {
|
||||
event: "TOOL_EXECUTION_FAILED",
|
||||
error: {
|
||||
toolName: "bash",
|
||||
errorType: "PERMISSION_DENIED",
|
||||
message: "Permission denied",
|
||||
},
|
||||
},
|
||||
availableTools: ["read"],
|
||||
contextBudget: 8000,
|
||||
});
|
||||
|
||||
expect(result.action.kind).toBe("ESCALATE_TO_USER");
|
||||
});
|
||||
});
|
||||
|
||||
describe("splitTaskDescription", () => {
|
||||
it("should split 'first...then' pattern", () => {
|
||||
const result = splitTaskDescription(
|
||||
"First, read the file. Then, update the content.",
|
||||
);
|
||||
|
||||
expect(result.length).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
|
||||
it("should split numbered list pattern", () => {
|
||||
const result = splitTaskDescription(
|
||||
"1. Read file 2. Parse content 3. Write output",
|
||||
);
|
||||
|
||||
expect(result.length).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
|
||||
it("should return single item for atomic tasks", () => {
|
||||
const result = splitTaskDescription("Read the configuration file");
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0]).toBe("Read the configuration file");
|
||||
});
|
||||
|
||||
it("should split bulleted list pattern", () => {
|
||||
const result = splitTaskDescription(
|
||||
"- Create file\n- Add content\n- Save changes",
|
||||
);
|
||||
|
||||
expect(result.length).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isRetryable", () => {
|
||||
it("should return true for INITIAL state", () => {
|
||||
const state = createInitialRetryState();
|
||||
|
||||
expect(isRetryable(state)).toBe(true);
|
||||
});
|
||||
|
||||
it("should return true for RETRY_SAME state", () => {
|
||||
const state = createInitialRetryState();
|
||||
state.currentState = { kind: "RETRY_SAME", attempts: 1, tierAttempts: 1 };
|
||||
|
||||
expect(isRetryable(state)).toBe(true);
|
||||
});
|
||||
|
||||
it("should return false for EXHAUSTED state", () => {
|
||||
const state = createInitialRetryState();
|
||||
state.currentState = {
|
||||
kind: "EXHAUSTED",
|
||||
attempts: 12,
|
||||
tierAttempts: 0,
|
||||
exhaustionReason: "MAX_TIERS_EXCEEDED",
|
||||
};
|
||||
|
||||
expect(isRetryable(state)).toBe(false);
|
||||
});
|
||||
|
||||
it("should return false for COMPLETE state", () => {
|
||||
const state = createInitialRetryState();
|
||||
state.currentState = { kind: "COMPLETE", attempts: 5, tierAttempts: 0 };
|
||||
|
||||
expect(isRetryable(state)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getCurrentTier", () => {
|
||||
it("should return current tier kind", () => {
|
||||
const state = createInitialRetryState();
|
||||
|
||||
expect(getCurrentTier(state)).toBe("INITIAL");
|
||||
|
||||
state.currentState = {
|
||||
kind: "RETRY_DECOMPOSED",
|
||||
attempts: 5,
|
||||
tierAttempts: 1,
|
||||
};
|
||||
|
||||
expect(getCurrentTier(state)).toBe("RETRY_DECOMPOSED");
|
||||
});
|
||||
});
|
||||
|
||||
describe("getRemainingAttempts", () => {
|
||||
it("should calculate remaining attempts correctly", () => {
|
||||
const state = createInitialRetryState();
|
||||
state.totalAttempts = 4;
|
||||
|
||||
expect(getRemainingAttempts(state)).toBe(8);
|
||||
|
||||
state.totalAttempts = 12;
|
||||
|
||||
expect(getRemainingAttempts(state)).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("state machine progression", () => {
|
||||
it("should progress through tiers and eventually exhaust", () => {
|
||||
let state = createInitialRetryState();
|
||||
const trigger: RetryTrigger = {
|
||||
event: "QUALITY_VERDICT",
|
||||
verdict: "RETRY",
|
||||
deficiencies: [],
|
||||
};
|
||||
|
||||
// Track which tiers we've seen
|
||||
const seenTiers = new Set<string>();
|
||||
let iterations = 0;
|
||||
const maxIterations = 15;
|
||||
|
||||
while (
|
||||
iterations < maxIterations &&
|
||||
state.currentState.kind !== "EXHAUSTED"
|
||||
) {
|
||||
const result = computeRetryTransition({
|
||||
currentState: state,
|
||||
trigger,
|
||||
availableTools: ["read", "write"],
|
||||
contextBudget: 8000,
|
||||
});
|
||||
|
||||
seenTiers.add(result.nextState.currentState.kind);
|
||||
state = result.nextState;
|
||||
iterations++;
|
||||
}
|
||||
|
||||
// Should have reached EXHAUSTED
|
||||
expect(state.currentState.kind).toBe("EXHAUSTED");
|
||||
|
||||
// Should have seen multiple tiers along the way
|
||||
expect(seenTiers.size).toBeGreaterThan(1);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,504 +0,0 @@
|
||||
/**
|
||||
* Unit tests for Termination Detection Layer
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "bun:test";
|
||||
|
||||
import {
|
||||
createInitialTerminationState,
|
||||
processTerminationTrigger,
|
||||
computeTerminationConfidence,
|
||||
extractValidationFailures,
|
||||
isComplete,
|
||||
isFailed,
|
||||
isTerminal,
|
||||
requiresValidation,
|
||||
getConfidencePercentage,
|
||||
} from "../termination-detection";
|
||||
|
||||
import type {
|
||||
TerminationState,
|
||||
TerminationTrigger,
|
||||
CompletionSignal,
|
||||
ValidationResult,
|
||||
} from "@src/types/reasoning";
|
||||
|
||||
describe("Termination Detection Layer", () => {
|
||||
describe("createInitialTerminationState", () => {
|
||||
it("should create state with RUNNING status", () => {
|
||||
const state = createInitialTerminationState();
|
||||
|
||||
expect(state.status).toBe("RUNNING");
|
||||
expect(state.completionSignals).toHaveLength(0);
|
||||
expect(state.validationResults).toHaveLength(0);
|
||||
expect(state.confidenceScore).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("processTerminationTrigger", () => {
|
||||
describe("MODEL_OUTPUT trigger", () => {
|
||||
it("should detect completion signals from model text", () => {
|
||||
const state = createInitialTerminationState();
|
||||
const trigger: TerminationTrigger = {
|
||||
event: "MODEL_OUTPUT",
|
||||
content: "I've completed the task successfully.",
|
||||
hasToolCalls: false,
|
||||
};
|
||||
|
||||
const result = processTerminationTrigger(state, trigger);
|
||||
|
||||
expect(result.evidence.signals.length).toBeGreaterThan(0);
|
||||
expect(
|
||||
result.evidence.signals.some((s) => s.source === "MODEL_STATEMENT"),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("should detect no pending actions when no tool calls", () => {
|
||||
const state = createInitialTerminationState();
|
||||
const trigger: TerminationTrigger = {
|
||||
event: "MODEL_OUTPUT",
|
||||
content: "Here is the answer.",
|
||||
hasToolCalls: false,
|
||||
};
|
||||
|
||||
const result = processTerminationTrigger(state, trigger);
|
||||
|
||||
expect(
|
||||
result.evidence.signals.some(
|
||||
(s) => s.source === "NO_PENDING_ACTIONS",
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("should not add NO_PENDING_ACTIONS when tool calls present", () => {
|
||||
const state = createInitialTerminationState();
|
||||
const trigger: TerminationTrigger = {
|
||||
event: "MODEL_OUTPUT",
|
||||
content: "Let me read that file.",
|
||||
hasToolCalls: true,
|
||||
};
|
||||
|
||||
const result = processTerminationTrigger(state, trigger);
|
||||
|
||||
expect(
|
||||
result.evidence.signals.some(
|
||||
(s) => s.source === "NO_PENDING_ACTIONS",
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("TOOL_COMPLETED trigger", () => {
|
||||
it("should add TOOL_SUCCESS signal on successful tool execution", () => {
|
||||
const state = createInitialTerminationState();
|
||||
const trigger: TerminationTrigger = {
|
||||
event: "TOOL_COMPLETED",
|
||||
toolName: "write",
|
||||
success: true,
|
||||
};
|
||||
|
||||
const result = processTerminationTrigger(state, trigger);
|
||||
|
||||
expect(
|
||||
result.evidence.signals.some((s) => s.source === "TOOL_SUCCESS"),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("should not add signal on failed tool execution", () => {
|
||||
const state = createInitialTerminationState();
|
||||
const trigger: TerminationTrigger = {
|
||||
event: "TOOL_COMPLETED",
|
||||
toolName: "write",
|
||||
success: false,
|
||||
};
|
||||
|
||||
const result = processTerminationTrigger(state, trigger);
|
||||
|
||||
expect(
|
||||
result.evidence.signals.some((s) => s.source === "TOOL_SUCCESS"),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("USER_INPUT trigger", () => {
|
||||
it("should immediately confirm completion on user acceptance", () => {
|
||||
const state = createInitialTerminationState();
|
||||
const trigger: TerminationTrigger = {
|
||||
event: "USER_INPUT",
|
||||
isAcceptance: true,
|
||||
};
|
||||
|
||||
const result = processTerminationTrigger(state, trigger);
|
||||
|
||||
expect(result.status).toBe("CONFIRMED_COMPLETE");
|
||||
expect(
|
||||
result.evidence.signals.some((s) => s.source === "USER_ACCEPT"),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("VALIDATION_RESULT trigger", () => {
|
||||
it("should update validation results", () => {
|
||||
const state = createInitialTerminationState();
|
||||
state.status = "AWAITING_VALIDATION";
|
||||
|
||||
const trigger: TerminationTrigger = {
|
||||
event: "VALIDATION_RESULT",
|
||||
result: {
|
||||
checkId: "file_exists_check",
|
||||
passed: true,
|
||||
details: "All files exist",
|
||||
duration: 100,
|
||||
},
|
||||
};
|
||||
|
||||
const result = processTerminationTrigger(state, trigger);
|
||||
|
||||
expect(result.evidence.validationResults).toHaveLength(1);
|
||||
expect(result.evidence.validationResults[0].passed).toBe(true);
|
||||
});
|
||||
|
||||
it("should update existing validation result", () => {
|
||||
const state = createInitialTerminationState();
|
||||
state.status = "AWAITING_VALIDATION";
|
||||
state.validationResults = [
|
||||
{
|
||||
checkId: "file_exists_check",
|
||||
passed: false,
|
||||
details: "File missing",
|
||||
duration: 50,
|
||||
},
|
||||
];
|
||||
|
||||
const trigger: TerminationTrigger = {
|
||||
event: "VALIDATION_RESULT",
|
||||
result: {
|
||||
checkId: "file_exists_check",
|
||||
passed: true,
|
||||
details: "File now exists",
|
||||
duration: 100,
|
||||
},
|
||||
};
|
||||
|
||||
const result = processTerminationTrigger(state, trigger);
|
||||
|
||||
expect(result.evidence.validationResults).toHaveLength(1);
|
||||
expect(result.evidence.validationResults[0].passed).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("status transitions", () => {
|
||||
it("should accumulate signals and increase confidence over time", () => {
|
||||
const state = createInitialTerminationState();
|
||||
state.completionSignals = [
|
||||
{ source: "MODEL_STATEMENT", timestamp: Date.now(), confidence: 0.3 },
|
||||
{ source: "TOOL_SUCCESS", timestamp: Date.now(), confidence: 0.5 },
|
||||
{ source: "TOOL_SUCCESS", timestamp: Date.now(), confidence: 0.5 },
|
||||
];
|
||||
|
||||
const trigger: TerminationTrigger = {
|
||||
event: "MODEL_OUTPUT",
|
||||
content: "I've completed the task successfully.",
|
||||
hasToolCalls: false,
|
||||
};
|
||||
|
||||
const result = processTerminationTrigger(state, trigger);
|
||||
|
||||
// Confidence should increase with more signals
|
||||
expect(result.confidence).toBeGreaterThan(0);
|
||||
expect(result.evidence.signals.length).toBeGreaterThan(
|
||||
state.completionSignals.length,
|
||||
);
|
||||
});
|
||||
|
||||
it("should transition from POTENTIALLY_COMPLETE to AWAITING_VALIDATION", () => {
|
||||
const state = createInitialTerminationState();
|
||||
state.status = "POTENTIALLY_COMPLETE";
|
||||
|
||||
const trigger: TerminationTrigger = {
|
||||
event: "TOOL_COMPLETED",
|
||||
toolName: "write",
|
||||
success: true,
|
||||
};
|
||||
|
||||
const result = processTerminationTrigger(state, trigger);
|
||||
|
||||
expect(result.status).toBe("AWAITING_VALIDATION");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("computeTerminationConfidence", () => {
|
||||
it("should compute low confidence with no signals or results", () => {
|
||||
const confidence = computeTerminationConfidence([], []);
|
||||
|
||||
expect(confidence).toBe(0);
|
||||
});
|
||||
|
||||
it("should compute confidence from signals", () => {
|
||||
const signals: CompletionSignal[] = [
|
||||
{ source: "MODEL_STATEMENT", timestamp: Date.now(), confidence: 0.3 },
|
||||
{ source: "TOOL_SUCCESS", timestamp: Date.now(), confidence: 0.5 },
|
||||
];
|
||||
|
||||
const confidence = computeTerminationConfidence(signals, []);
|
||||
|
||||
expect(confidence).toBeGreaterThan(0);
|
||||
expect(confidence).toBeLessThanOrEqual(0.4); // Signal max is 0.4
|
||||
});
|
||||
|
||||
it("should compute confidence from validation results", () => {
|
||||
const results: ValidationResult[] = [
|
||||
{
|
||||
checkId: "file_exists_check",
|
||||
passed: true,
|
||||
details: "OK",
|
||||
duration: 100,
|
||||
},
|
||||
{
|
||||
checkId: "syntax_valid_check",
|
||||
passed: true,
|
||||
details: "OK",
|
||||
duration: 100,
|
||||
},
|
||||
];
|
||||
|
||||
const confidence = computeTerminationConfidence([], results);
|
||||
|
||||
expect(confidence).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("should compute combined confidence", () => {
|
||||
const signals: CompletionSignal[] = [
|
||||
{ source: "TOOL_SUCCESS", timestamp: Date.now(), confidence: 0.5 },
|
||||
];
|
||||
const results: ValidationResult[] = [
|
||||
{
|
||||
checkId: "file_exists_check",
|
||||
passed: true,
|
||||
details: "OK",
|
||||
duration: 100,
|
||||
},
|
||||
];
|
||||
|
||||
const combinedConfidence = computeTerminationConfidence(signals, results);
|
||||
const signalOnlyConfidence = computeTerminationConfidence(signals, []);
|
||||
const resultOnlyConfidence = computeTerminationConfidence([], results);
|
||||
|
||||
expect(combinedConfidence).toBeGreaterThan(signalOnlyConfidence);
|
||||
expect(combinedConfidence).toBeGreaterThan(resultOnlyConfidence);
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractValidationFailures", () => {
|
||||
it("should extract failed validations", () => {
|
||||
const results: ValidationResult[] = [
|
||||
{ checkId: "check_1", passed: true, details: "OK", duration: 100 },
|
||||
{
|
||||
checkId: "check_2",
|
||||
passed: false,
|
||||
details: "File not found",
|
||||
duration: 50,
|
||||
},
|
||||
{
|
||||
checkId: "check_3",
|
||||
passed: false,
|
||||
details: "Syntax error",
|
||||
duration: 75,
|
||||
},
|
||||
];
|
||||
|
||||
const failures = extractValidationFailures(results);
|
||||
|
||||
expect(failures).toHaveLength(2);
|
||||
expect(failures.map((f) => f.checkId)).toContain("check_2");
|
||||
expect(failures.map((f) => f.checkId)).toContain("check_3");
|
||||
});
|
||||
|
||||
it("should mark permission errors as non-recoverable", () => {
|
||||
const results: ValidationResult[] = [
|
||||
{
|
||||
checkId: "check_1",
|
||||
passed: false,
|
||||
details: "Permission denied",
|
||||
duration: 100,
|
||||
},
|
||||
];
|
||||
|
||||
const failures = extractValidationFailures(results);
|
||||
|
||||
expect(failures[0].recoverable).toBe(false);
|
||||
});
|
||||
|
||||
it("should mark other errors as recoverable", () => {
|
||||
const results: ValidationResult[] = [
|
||||
{
|
||||
checkId: "check_1",
|
||||
passed: false,
|
||||
details: "Timeout occurred",
|
||||
duration: 100,
|
||||
},
|
||||
];
|
||||
|
||||
const failures = extractValidationFailures(results);
|
||||
|
||||
expect(failures[0].recoverable).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("state query functions", () => {
|
||||
describe("isComplete", () => {
|
||||
it("should return true only for CONFIRMED_COMPLETE", () => {
|
||||
const completeState: TerminationState = {
|
||||
...createInitialTerminationState(),
|
||||
status: "CONFIRMED_COMPLETE",
|
||||
};
|
||||
const runningState: TerminationState = {
|
||||
...createInitialTerminationState(),
|
||||
status: "RUNNING",
|
||||
};
|
||||
|
||||
expect(isComplete(completeState)).toBe(true);
|
||||
expect(isComplete(runningState)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isFailed", () => {
|
||||
it("should return true only for FAILED", () => {
|
||||
const failedState: TerminationState = {
|
||||
...createInitialTerminationState(),
|
||||
status: "FAILED",
|
||||
};
|
||||
const runningState: TerminationState = {
|
||||
...createInitialTerminationState(),
|
||||
status: "RUNNING",
|
||||
};
|
||||
|
||||
expect(isFailed(failedState)).toBe(true);
|
||||
expect(isFailed(runningState)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isTerminal", () => {
|
||||
it("should return true for CONFIRMED_COMPLETE or FAILED", () => {
|
||||
expect(
|
||||
isTerminal({
|
||||
...createInitialTerminationState(),
|
||||
status: "CONFIRMED_COMPLETE",
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(
|
||||
isTerminal({ ...createInitialTerminationState(), status: "FAILED" }),
|
||||
).toBe(true);
|
||||
expect(
|
||||
isTerminal({ ...createInitialTerminationState(), status: "RUNNING" }),
|
||||
).toBe(false);
|
||||
expect(
|
||||
isTerminal({
|
||||
...createInitialTerminationState(),
|
||||
status: "AWAITING_VALIDATION",
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("requiresValidation", () => {
|
||||
it("should return true for POTENTIALLY_COMPLETE and AWAITING_VALIDATION", () => {
|
||||
expect(
|
||||
requiresValidation({
|
||||
...createInitialTerminationState(),
|
||||
status: "POTENTIALLY_COMPLETE",
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(
|
||||
requiresValidation({
|
||||
...createInitialTerminationState(),
|
||||
status: "AWAITING_VALIDATION",
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(
|
||||
requiresValidation({
|
||||
...createInitialTerminationState(),
|
||||
status: "RUNNING",
|
||||
}),
|
||||
).toBe(false);
|
||||
expect(
|
||||
requiresValidation({
|
||||
...createInitialTerminationState(),
|
||||
status: "CONFIRMED_COMPLETE",
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getConfidencePercentage", () => {
|
||||
it("should format confidence as percentage", () => {
|
||||
const state: TerminationState = {
|
||||
...createInitialTerminationState(),
|
||||
confidenceScore: 0.756,
|
||||
};
|
||||
|
||||
expect(getConfidencePercentage(state)).toBe("75.6%");
|
||||
});
|
||||
|
||||
it("should handle zero confidence", () => {
|
||||
const state = createInitialTerminationState();
|
||||
|
||||
expect(getConfidencePercentage(state)).toBe("0.0%");
|
||||
});
|
||||
|
||||
it("should handle 100% confidence", () => {
|
||||
const state: TerminationState = {
|
||||
...createInitialTerminationState(),
|
||||
confidenceScore: 1.0,
|
||||
};
|
||||
|
||||
expect(getConfidencePercentage(state)).toBe("100.0%");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("decision computation", () => {
|
||||
it("should return CONTINUE for low confidence", () => {
|
||||
const state = createInitialTerminationState();
|
||||
const trigger: TerminationTrigger = {
|
||||
event: "MODEL_OUTPUT",
|
||||
content: "Working on it...",
|
||||
hasToolCalls: true,
|
||||
};
|
||||
|
||||
const result = processTerminationTrigger(state, trigger);
|
||||
|
||||
expect(result.decision.kind).toBe("CONTINUE");
|
||||
});
|
||||
|
||||
it("should return VALIDATE for potentially complete state", () => {
|
||||
const state: TerminationState = {
|
||||
...createInitialTerminationState(),
|
||||
status: "POTENTIALLY_COMPLETE",
|
||||
confidenceScore: 0.6,
|
||||
};
|
||||
const trigger: TerminationTrigger = {
|
||||
event: "TOOL_COMPLETED",
|
||||
toolName: "write",
|
||||
success: true,
|
||||
};
|
||||
|
||||
const result = processTerminationTrigger(state, trigger);
|
||||
|
||||
expect(result.decision.kind).toBe("VALIDATE");
|
||||
});
|
||||
|
||||
it("should return COMPLETE for confirmed completion", () => {
|
||||
const state = createInitialTerminationState();
|
||||
const trigger: TerminationTrigger = {
|
||||
event: "USER_INPUT",
|
||||
isAcceptance: true,
|
||||
};
|
||||
|
||||
const result = processTerminationTrigger(state, trigger);
|
||||
|
||||
expect(result.decision.kind).toBe("COMPLETE");
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,435 +0,0 @@
|
||||
/**
|
||||
* Unit tests for Reasoning Utilities
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "bun:test";
|
||||
|
||||
import {
|
||||
estimateTokens,
|
||||
tokenize,
|
||||
jaccardSimilarity,
|
||||
weightedSum,
|
||||
extractEntities,
|
||||
createEntityTable,
|
||||
truncateMiddle,
|
||||
foldCode,
|
||||
extractCodeBlocks,
|
||||
recencyDecay,
|
||||
generateId,
|
||||
isValidJson,
|
||||
hasBalancedBraces,
|
||||
countMatches,
|
||||
sum,
|
||||
unique,
|
||||
groupBy,
|
||||
} from "../utils";
|
||||
|
||||
describe("Reasoning Utilities", () => {
|
||||
describe("estimateTokens", () => {
|
||||
it("should estimate tokens based on character count", () => {
|
||||
const text = "Hello world"; // 11 chars
|
||||
const tokens = estimateTokens(text);
|
||||
|
||||
expect(tokens).toBeGreaterThan(0);
|
||||
expect(tokens).toBeLessThan(text.length);
|
||||
});
|
||||
|
||||
it("should handle empty string", () => {
|
||||
expect(estimateTokens("")).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("tokenize", () => {
|
||||
it("should split text into lowercase tokens", () => {
|
||||
const tokens = tokenize("Hello World Test");
|
||||
|
||||
expect(tokens.every((t) => t === t.toLowerCase())).toBe(true);
|
||||
});
|
||||
|
||||
it("should filter stop words", () => {
|
||||
const tokens = tokenize("the quick brown fox jumps over the lazy dog");
|
||||
|
||||
expect(tokens).not.toContain("the");
|
||||
// "over" may or may not be filtered depending on stop words list
|
||||
expect(tokens).toContain("quick");
|
||||
expect(tokens).toContain("brown");
|
||||
});
|
||||
|
||||
it("should filter short tokens", () => {
|
||||
const tokens = tokenize("I am a test");
|
||||
|
||||
expect(tokens).not.toContain("i");
|
||||
expect(tokens).not.toContain("am");
|
||||
expect(tokens).not.toContain("a");
|
||||
});
|
||||
|
||||
it("should handle punctuation", () => {
|
||||
const tokens = tokenize("Hello, world! How are you?");
|
||||
|
||||
expect(tokens.every((t) => !/[,!?]/.test(t))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("jaccardSimilarity", () => {
|
||||
it("should return 1 for identical sets", () => {
|
||||
const similarity = jaccardSimilarity(["a", "b", "c"], ["a", "b", "c"]);
|
||||
|
||||
expect(similarity).toBe(1);
|
||||
});
|
||||
|
||||
it("should return 0 for disjoint sets", () => {
|
||||
const similarity = jaccardSimilarity(["a", "b", "c"], ["d", "e", "f"]);
|
||||
|
||||
expect(similarity).toBe(0);
|
||||
});
|
||||
|
||||
it("should return correct value for partial overlap", () => {
|
||||
const similarity = jaccardSimilarity(["a", "b", "c"], ["b", "c", "d"]);
|
||||
|
||||
// Intersection: {b, c} = 2, Union: {a, b, c, d} = 4
|
||||
expect(similarity).toBe(0.5);
|
||||
});
|
||||
|
||||
it("should handle empty sets", () => {
|
||||
expect(jaccardSimilarity([], [])).toBe(0);
|
||||
expect(jaccardSimilarity(["a"], [])).toBe(0);
|
||||
expect(jaccardSimilarity([], ["a"])).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("weightedSum", () => {
|
||||
it("should compute weighted sum correctly", () => {
|
||||
const result = weightedSum([1, 2, 3], [0.5, 0.3, 0.2]);
|
||||
|
||||
expect(result).toBeCloseTo(1 * 0.5 + 2 * 0.3 + 3 * 0.2);
|
||||
});
|
||||
|
||||
it("should throw for mismatched lengths", () => {
|
||||
expect(() => weightedSum([1, 2], [0.5])).toThrow();
|
||||
});
|
||||
|
||||
it("should handle empty arrays", () => {
|
||||
expect(weightedSum([], [])).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractEntities", () => {
|
||||
it("should extract file paths", () => {
|
||||
const entities = extractEntities(
|
||||
"Check the file src/index.ts for details",
|
||||
"msg_1",
|
||||
);
|
||||
|
||||
expect(
|
||||
entities.some((e) => e.type === "FILE" && e.value.includes("index.ts")),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("should extract function names", () => {
|
||||
const entities = extractEntities(
|
||||
"function handleClick() { return 1; }",
|
||||
"msg_1",
|
||||
);
|
||||
|
||||
expect(entities.some((e) => e.type === "FUNCTION")).toBe(true);
|
||||
});
|
||||
|
||||
it("should extract URLs", () => {
|
||||
const entities = extractEntities(
|
||||
"Visit https://example.com for more info",
|
||||
"msg_1",
|
||||
);
|
||||
|
||||
expect(
|
||||
entities.some(
|
||||
(e) => e.type === "URL" && e.value.includes("example.com"),
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("should set source message ID", () => {
|
||||
const entities = extractEntities("file.ts", "test_msg");
|
||||
|
||||
if (entities.length > 0) {
|
||||
expect(entities[0].sourceMessageId).toBe("test_msg");
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("createEntityTable", () => {
|
||||
it("should organize entities by type", () => {
|
||||
const entities = [
|
||||
{
|
||||
type: "FILE" as const,
|
||||
value: "test.ts",
|
||||
sourceMessageId: "msg_1",
|
||||
frequency: 1,
|
||||
},
|
||||
{
|
||||
type: "FILE" as const,
|
||||
value: "other.ts",
|
||||
sourceMessageId: "msg_1",
|
||||
frequency: 1,
|
||||
},
|
||||
{
|
||||
type: "URL" as const,
|
||||
value: "https://test.com",
|
||||
sourceMessageId: "msg_1",
|
||||
frequency: 1,
|
||||
},
|
||||
];
|
||||
|
||||
const table = createEntityTable(entities);
|
||||
|
||||
expect(table.byType.FILE).toHaveLength(2);
|
||||
expect(table.byType.URL).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("should organize entities by source", () => {
|
||||
const entities = [
|
||||
{
|
||||
type: "FILE" as const,
|
||||
value: "test.ts",
|
||||
sourceMessageId: "msg_1",
|
||||
frequency: 1,
|
||||
},
|
||||
{
|
||||
type: "FILE" as const,
|
||||
value: "other.ts",
|
||||
sourceMessageId: "msg_2",
|
||||
frequency: 1,
|
||||
},
|
||||
];
|
||||
|
||||
const table = createEntityTable(entities);
|
||||
|
||||
expect(table.bySource["msg_1"]).toHaveLength(1);
|
||||
expect(table.bySource["msg_2"]).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("truncateMiddle", () => {
|
||||
it("should truncate long text", () => {
|
||||
const text = "a".repeat(200);
|
||||
const result = truncateMiddle(text, 50, 50);
|
||||
|
||||
expect(result.length).toBeLessThan(text.length);
|
||||
expect(result).toContain("truncated");
|
||||
});
|
||||
|
||||
it("should not truncate short text", () => {
|
||||
const text = "short text";
|
||||
const result = truncateMiddle(text, 50, 50);
|
||||
|
||||
expect(result).toBe(text);
|
||||
});
|
||||
|
||||
it("should preserve head and tail", () => {
|
||||
const text = "HEAD_CONTENT_MIDDLE_STUFF_TAIL_CONTENT";
|
||||
const result = truncateMiddle(text, 12, 12);
|
||||
|
||||
expect(result.startsWith("HEAD_CONTENT")).toBe(true);
|
||||
expect(result.endsWith("TAIL_CONTENT")).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("foldCode", () => {
|
||||
it("should fold long code blocks", () => {
|
||||
const code = Array.from({ length: 50 }, (_, i) => `line ${i + 1}`).join(
|
||||
"\n",
|
||||
);
|
||||
const result = foldCode(code, { keepLines: 5, tailLines: 3 });
|
||||
|
||||
expect(result.split("\n").length).toBeLessThan(50);
|
||||
expect(result).toContain("folded");
|
||||
});
|
||||
|
||||
it("should not fold short code blocks", () => {
|
||||
const code = "line 1\nline 2\nline 3";
|
||||
const result = foldCode(code, { keepLines: 5, tailLines: 3 });
|
||||
|
||||
expect(result).toBe(code);
|
||||
});
|
||||
|
||||
it("should preserve first and last lines", () => {
|
||||
const code = Array.from({ length: 50 }, (_, i) => `line ${i + 1}`).join(
|
||||
"\n",
|
||||
);
|
||||
const result = foldCode(code, { keepLines: 2, tailLines: 2 });
|
||||
|
||||
expect(result).toContain("line 1");
|
||||
expect(result).toContain("line 2");
|
||||
expect(result).toContain("line 49");
|
||||
expect(result).toContain("line 50");
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractCodeBlocks", () => {
|
||||
it("should extract code blocks with language", () => {
|
||||
const text =
|
||||
"Here is code:\n```typescript\nconst x = 1;\n```\nMore text.";
|
||||
const blocks = extractCodeBlocks(text);
|
||||
|
||||
expect(blocks).toHaveLength(1);
|
||||
expect(blocks[0].language).toBe("typescript");
|
||||
expect(blocks[0].content).toContain("const x = 1");
|
||||
});
|
||||
|
||||
it("should extract multiple code blocks", () => {
|
||||
const text = "```js\ncode1\n```\n\n```python\ncode2\n```";
|
||||
const blocks = extractCodeBlocks(text);
|
||||
|
||||
expect(blocks).toHaveLength(2);
|
||||
expect(blocks[0].language).toBe("js");
|
||||
expect(blocks[1].language).toBe("python");
|
||||
});
|
||||
|
||||
it("should handle code blocks without language", () => {
|
||||
const text = "```\nsome code\n```";
|
||||
const blocks = extractCodeBlocks(text);
|
||||
|
||||
expect(blocks).toHaveLength(1);
|
||||
expect(blocks[0].language).toBe("unknown");
|
||||
});
|
||||
|
||||
it("should track positions", () => {
|
||||
const text = "Start\n```ts\ncode\n```\nEnd";
|
||||
const blocks = extractCodeBlocks(text);
|
||||
|
||||
expect(blocks[0].startIndex).toBeGreaterThan(0);
|
||||
expect(blocks[0].endIndex).toBeGreaterThan(blocks[0].startIndex);
|
||||
});
|
||||
});
|
||||
|
||||
describe("recencyDecay", () => {
|
||||
it("should return 1 for current time", () => {
|
||||
const now = Date.now();
|
||||
const decay = recencyDecay(now, now, 30);
|
||||
|
||||
expect(decay).toBe(1);
|
||||
});
|
||||
|
||||
it("should return 0.5 at half-life", () => {
|
||||
const now = Date.now();
|
||||
const halfLifeAgo = now - 30 * 60 * 1000; // 30 minutes ago
|
||||
const decay = recencyDecay(halfLifeAgo, now, 30);
|
||||
|
||||
expect(decay).toBeCloseTo(0.5, 2);
|
||||
});
|
||||
|
||||
it("should decrease with age", () => {
|
||||
const now = Date.now();
|
||||
const recent = recencyDecay(now - 60000, now, 30);
|
||||
const old = recencyDecay(now - 3600000, now, 30);
|
||||
|
||||
expect(recent).toBeGreaterThan(old);
|
||||
});
|
||||
});
|
||||
|
||||
describe("generateId", () => {
|
||||
it("should generate unique IDs", () => {
|
||||
const ids = new Set<string>();
|
||||
|
||||
for (let i = 0; i < 100; i++) {
|
||||
ids.add(generateId());
|
||||
}
|
||||
|
||||
expect(ids.size).toBe(100);
|
||||
});
|
||||
|
||||
it("should include prefix when provided", () => {
|
||||
const id = generateId("test");
|
||||
|
||||
expect(id.startsWith("test_")).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isValidJson", () => {
|
||||
it("should return true for valid JSON", () => {
|
||||
expect(isValidJson('{"key": "value"}')).toBe(true);
|
||||
expect(isValidJson("[1, 2, 3]")).toBe(true);
|
||||
expect(isValidJson('"string"')).toBe(true);
|
||||
});
|
||||
|
||||
it("should return false for invalid JSON", () => {
|
||||
expect(isValidJson("{key: value}")).toBe(false);
|
||||
expect(isValidJson("not json")).toBe(false);
|
||||
expect(isValidJson("{incomplete")).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("hasBalancedBraces", () => {
|
||||
it("should return true for balanced braces", () => {
|
||||
expect(hasBalancedBraces("{ foo: { bar: [] } }")).toBe(true);
|
||||
expect(hasBalancedBraces("function() { return (a + b); }")).toBe(true);
|
||||
});
|
||||
|
||||
it("should return false for unbalanced braces", () => {
|
||||
expect(hasBalancedBraces("{ foo: { bar }")).toBe(false);
|
||||
expect(hasBalancedBraces("function() { return (a + b); ")).toBe(false);
|
||||
expect(hasBalancedBraces("{ ] }")).toBe(false);
|
||||
});
|
||||
|
||||
it("should handle empty string", () => {
|
||||
expect(hasBalancedBraces("")).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("countMatches", () => {
|
||||
it("should count pattern matches", () => {
|
||||
expect(countMatches("aaa", /a/g)).toBe(3);
|
||||
expect(countMatches("hello world", /o/g)).toBe(2);
|
||||
});
|
||||
|
||||
it("should handle no matches", () => {
|
||||
expect(countMatches("hello", /z/g)).toBe(0);
|
||||
});
|
||||
|
||||
it("should handle case-insensitive patterns", () => {
|
||||
expect(countMatches("Hello HELLO hello", /hello/gi)).toBe(3);
|
||||
});
|
||||
});
|
||||
|
||||
describe("sum", () => {
|
||||
it("should sum numbers", () => {
|
||||
expect(sum([1, 2, 3])).toBe(6);
|
||||
expect(sum([0.1, 0.2, 0.3])).toBeCloseTo(0.6);
|
||||
});
|
||||
|
||||
it("should return 0 for empty array", () => {
|
||||
expect(sum([])).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("unique", () => {
|
||||
it("should remove duplicates", () => {
|
||||
expect(unique([1, 2, 2, 3, 3, 3])).toEqual([1, 2, 3]);
|
||||
expect(unique(["a", "b", "a"])).toEqual(["a", "b"]);
|
||||
});
|
||||
|
||||
it("should handle empty array", () => {
|
||||
expect(unique([])).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("groupBy", () => {
|
||||
it("should group by key function", () => {
|
||||
const items = [
|
||||
{ type: "a", value: 1 },
|
||||
{ type: "b", value: 2 },
|
||||
{ type: "a", value: 3 },
|
||||
];
|
||||
|
||||
const grouped = groupBy(items, (item) => item.type);
|
||||
|
||||
expect(grouped.a).toHaveLength(2);
|
||||
expect(grouped.b).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("should handle empty array", () => {
|
||||
const grouped = groupBy([], (x: string) => x);
|
||||
|
||||
expect(Object.keys(grouped)).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user