Improve agent autonomy and diff view readability

Agent behavior improvements:
  - Add project context detection (tsconfig.json, pom.xml, etc.)
  - Enforce validation after changes (tsc --noEmit, mvn compile, etc.)
  - Run tests automatically - never ask "do you want me to run tests"
  - Complete full loop: create → type-check → test → confirm
  - Add command detection for direct execution (run tree, run ls)

  Diff view improvements:
  - Use darker backgrounds for added/removed lines
  - Add diffLineBgAdded, diffLineBgRemoved, diffLineText theme colors
  - Improve text visibility with white text on dark backgrounds
  - Update both React/Ink and SolidJS diff components

  Streaming fixes:
  - Fix tool call argument accumulation using OpenAI index field
  - Fix streaming content display after tool calls
  - Add consecutive error tracking to prevent token waste

  Other changes:
  - ESC to abort operations, Ctrl+C to exit
  - Fix model selection when provider changes in cascade mode
  - Add debug logging for troubleshooting
  - Move tests to root tests/ folder
  - Fix banner test GRADIENT_COLORS reference
This commit is contained in:
2026-01-29 07:33:30 -05:00
parent ad02852489
commit 187cc68304
62 changed files with 2005 additions and 2075 deletions

54
src/api/copilot/auth.ts Normal file
View File

@@ -0,0 +1,54 @@
/**
* Copilot Authentication API
*
* Low-level API calls for GitHub OAuth device flow
*/
import got from "got";
import {
GITHUB_CLIENT_ID,
GITHUB_DEVICE_CODE_URL,
GITHUB_ACCESS_TOKEN_URL,
} from "@constants/copilot";
import type { DeviceCodeResponse, AccessTokenResponse } from "@/types/copilot";
/**
* Initiate GitHub device authentication flow
*/
export const requestDeviceCode = async (): Promise<DeviceCodeResponse> => {
const response = await got
.post(GITHUB_DEVICE_CODE_URL, {
headers: {
Accept: "application/json",
},
form: {
client_id: GITHUB_CLIENT_ID,
scope: "read:user",
},
})
.json<DeviceCodeResponse>();
return response;
};
/**
* Poll for access token after user authorization
*/
export const requestAccessToken = async (
deviceCode: string,
): Promise<AccessTokenResponse> => {
const response = await got
.post(GITHUB_ACCESS_TOKEN_URL, {
headers: {
Accept: "application/json",
},
form: {
client_id: GITHUB_CLIENT_ID,
device_code: deviceCode,
grant_type: "urn:ietf:params:oauth:grant-type:device_code",
},
})
.json<AccessTokenResponse>();
return response;
};

197
src/api/copilot/chat.ts Normal file
View File

@@ -0,0 +1,197 @@
/**
* Copilot Chat API
*
* Low-level API calls for chat completions
*/
import got from "got";
import type { CopilotToken } from "@/types/copilot";
import type {
Message,
ChatCompletionOptions,
ChatCompletionResponse,
StreamChunk,
} from "@/types/providers";
import { buildCopilotHeaders } from "@api/copilot/token";
interface FormattedMessage {
role: string;
content: string;
tool_call_id?: string;
tool_calls?: Message["tool_calls"];
}
interface ChatRequestBody {
model: string;
messages: FormattedMessage[];
max_tokens: number;
temperature: number;
stream: boolean;
tools?: ChatCompletionOptions["tools"];
tool_choice?: string;
}
interface ChatApiResponse {
error?: { message?: string };
choices?: Array<{
message?: { content?: string; tool_calls?: Message["tool_calls"] };
finish_reason?: ChatCompletionResponse["finishReason"];
}>;
usage?: {
prompt_tokens?: number;
completion_tokens?: number;
total_tokens?: number;
};
}
const formatMessages = (messages: Message[]): FormattedMessage[] =>
messages.map((msg) => {
const formatted: FormattedMessage = {
role: msg.role,
content: msg.content,
};
if (msg.tool_call_id) {
formatted.tool_call_id = msg.tool_call_id;
}
if (msg.tool_calls) {
formatted.tool_calls = msg.tool_calls;
}
return formatted;
});
/**
* Get the chat endpoint from token
*/
export const getEndpoint = (token: CopilotToken): string =>
(token.endpoints?.api ?? "https://api.githubcopilot.com") +
"/chat/completions";
/**
* Build request body for chat API
*/
export const buildRequestBody = (
messages: Message[],
model: string,
options?: ChatCompletionOptions,
stream = false,
): ChatRequestBody => {
const body: ChatRequestBody = {
model,
messages: formatMessages(messages),
max_tokens: options?.maxTokens ?? 4096,
temperature: options?.temperature ?? 0.3,
stream,
};
if (options?.tools && options.tools.length > 0) {
body.tools = options.tools;
body.tool_choice = "auto";
}
return body;
};
/**
* Execute non-streaming chat request
*/
export const executeChatRequest = async (
endpoint: string,
token: CopilotToken,
body: ChatRequestBody,
): Promise<ChatCompletionResponse> => {
const response = await got
.post(endpoint, {
headers: buildCopilotHeaders(token),
json: body,
})
.json<ChatApiResponse>();
if (response.error) {
throw new Error(response.error.message ?? "Copilot API error");
}
const choice = response.choices?.[0];
if (!choice) {
throw new Error("No response from Copilot");
}
const result: ChatCompletionResponse = {
content: choice.message?.content ?? null,
finishReason: choice.finish_reason,
};
if (choice.message?.tool_calls) {
result.toolCalls = choice.message.tool_calls;
}
if (response.usage) {
result.usage = {
promptTokens: response.usage.prompt_tokens ?? 0,
completionTokens: response.usage.completion_tokens ?? 0,
totalTokens: response.usage.total_tokens ?? 0,
};
}
return result;
};
/**
* Execute streaming chat request
*/
export const executeStreamRequest = (
endpoint: string,
token: CopilotToken,
body: ChatRequestBody,
onChunk: (chunk: StreamChunk) => void,
): Promise<void> =>
new Promise((resolve, reject) => {
const stream = got.stream.post(endpoint, {
headers: buildCopilotHeaders(token),
json: body,
});
let buffer = "";
stream.on("data", (data: Buffer) => {
buffer += data.toString();
const lines = buffer.split("\n");
buffer = lines.pop() ?? "";
for (const line of lines) {
if (line.startsWith("data: ")) {
const jsonStr = line.slice(6).trim();
if (jsonStr === "[DONE]") {
onChunk({ type: "done" });
return;
}
try {
const parsed = JSON.parse(jsonStr);
const delta = parsed.choices?.[0]?.delta;
if (delta?.content) {
onChunk({ type: "content", content: delta.content });
}
if (delta?.tool_calls) {
for (const tc of delta.tool_calls) {
onChunk({ type: "tool_call", toolCall: tc });
}
}
} catch {
// Ignore parse errors in stream
}
}
}
});
stream.on("error", (error: Error) => {
onChunk({ type: "error", error: error.message });
reject(error);
});
stream.on("end", resolve);
});

22
src/api/copilot/index.ts Normal file
View File

@@ -0,0 +1,22 @@
/**
* Copilot API exports
*/
export {
fetchCopilotToken,
buildCopilotHeaders,
} from "@api/copilot/token";
export {
requestDeviceCode,
requestAccessToken,
} from "@api/copilot/auth";
export { fetchModels } from "@api/copilot/models";
export {
getEndpoint,
buildRequestBody,
executeChatRequest,
executeStreamRequest,
} from "@api/copilot/chat";

31
src/api/copilot/models.ts Normal file
View File

@@ -0,0 +1,31 @@
/**
* Copilot Models API
*
* Low-level API calls for fetching available models
*/
import got from "got";
import { COPILOT_MODELS_URL } from "@constants/copilot";
import type { CopilotToken } from "@/types/copilot";
import type { ModelsApiResponse } from "@interfaces/CopilotModels";
/**
* Fetch available models from Copilot API
*/
export const fetchModels = async (
token: CopilotToken,
): Promise<ModelsApiResponse> => {
const response = await got
.get(COPILOT_MODELS_URL, {
headers: {
Authorization: `Bearer ${token.token}`,
Accept: "application/json",
"User-Agent": "GitHubCopilotChat/0.26.7",
"Editor-Version": "vscode/1.105.1",
"Editor-Plugin-Version": "copilot-chat/0.26.7",
},
})
.json<ModelsApiResponse>();
return response;
};

46
src/api/copilot/token.ts Normal file
View File

@@ -0,0 +1,46 @@
/**
* Copilot Token API
*
* Low-level API calls for Copilot token management
*/
import got from "got";
import { COPILOT_AUTH_URL } from "@constants/copilot";
import type { CopilotToken } from "@/types/copilot";
/**
* Refresh Copilot access token using OAuth token
*/
export const fetchCopilotToken = async (
oauthToken: string,
): Promise<CopilotToken> => {
const response = await got
.get(COPILOT_AUTH_URL, {
headers: {
Authorization: `token ${oauthToken}`,
Accept: "application/json",
},
})
.json<CopilotToken>();
if (!response.token) {
throw new Error("Failed to refresh Copilot token");
}
return response;
};
/**
* Build standard headers for Copilot API requests
*/
export const buildCopilotHeaders = (
token: CopilotToken,
): Record<string, string> => ({
Authorization: `Bearer ${token.token}`,
"Content-Type": "application/json",
"User-Agent": "GitHubCopilotChat/0.26.7",
"Editor-Version": "vscode/1.105.1",
"Editor-Plugin-Version": "copilot-chat/0.26.7",
"Copilot-Integration-Id": "vscode-chat",
"Openai-Intent": "conversation-edits",
});

9
src/api/index.ts Normal file
View File

@@ -0,0 +1,9 @@
/**
* API Layer
*
* Low-level HTTP API calls for external services.
* Business logic should remain in providers/services.
*/
export * as copilotApi from "@api/copilot";
export * as ollamaApi from "@api/ollama";

105
src/api/ollama/chat.ts Normal file
View File

@@ -0,0 +1,105 @@
/**
* Ollama Chat API
*
* Low-level API calls for chat completions
*/
import got from "got";
import { OLLAMA_ENDPOINTS, OLLAMA_TIMEOUTS } from "@constants/ollama";
import type {
OllamaChatRequest,
OllamaChatResponse,
} from "@/types/ollama";
import type { StreamChunk } from "@/types/providers";
/**
* Execute non-streaming chat request to Ollama
*/
export const executeChatRequest = async (
baseUrl: string,
body: OllamaChatRequest,
): Promise<OllamaChatResponse> => {
const response = await got
.post(`${baseUrl}${OLLAMA_ENDPOINTS.CHAT}`, {
json: body,
timeout: { request: OLLAMA_TIMEOUTS.CHAT },
})
.json<OllamaChatResponse>();
if (response.error) {
throw new Error(response.error);
}
return response;
};
/**
* Execute streaming chat request to Ollama
*/
export const executeStreamRequest = (
baseUrl: string,
body: OllamaChatRequest,
onChunk: (chunk: StreamChunk) => void,
): Promise<void> =>
new Promise((resolve, reject) => {
const stream = got.stream.post(`${baseUrl}${OLLAMA_ENDPOINTS.CHAT}`, {
json: body,
timeout: { request: OLLAMA_TIMEOUTS.CHAT },
});
let buffer = "";
stream.on("data", (data: Buffer) => {
buffer += data.toString();
const lines = buffer.split("\n");
buffer = lines.pop() ?? "";
for (const line of lines) {
if (!line.trim()) continue;
try {
const parsed = JSON.parse(line) as OllamaChatResponse;
if (parsed.error) {
onChunk({ type: "error", error: parsed.error });
continue;
}
if (parsed.message?.content) {
onChunk({ type: "content", content: parsed.message.content });
}
if (parsed.message?.tool_calls) {
for (const tc of parsed.message.tool_calls) {
onChunk({
type: "tool_call",
toolCall: {
id: tc.id ?? `call_${Date.now()}`,
function: {
name: tc.function.name,
arguments:
typeof tc.function.arguments === "string"
? tc.function.arguments
: JSON.stringify(tc.function.arguments),
},
},
});
}
}
if (parsed.done) {
onChunk({ type: "done" });
}
} catch {
// Ignore parse errors
}
}
});
stream.on("error", (error: Error) => {
onChunk({ type: "error", error: error.message });
reject(error);
});
stream.on("end", resolve);
});

13
src/api/ollama/index.ts Normal file
View File

@@ -0,0 +1,13 @@
/**
* Ollama API exports
*/
export {
executeChatRequest,
executeStreamRequest,
} from "@api/ollama/chat";
export {
fetchModels,
checkHealth,
} from "@api/ollama/models";

38
src/api/ollama/models.ts Normal file
View File

@@ -0,0 +1,38 @@
/**
* Ollama Models API
*
* Low-level API calls for model management
*/
import got from "got";
import { OLLAMA_ENDPOINTS, OLLAMA_TIMEOUTS } from "@constants/ollama";
import type { OllamaTagsResponse } from "@/types/ollama";
/**
* Fetch available models from Ollama
*/
export const fetchModels = async (
baseUrl: string,
): Promise<OllamaTagsResponse> => {
const response = await got
.get(`${baseUrl}${OLLAMA_ENDPOINTS.TAGS}`, {
timeout: { request: OLLAMA_TIMEOUTS.TAGS },
})
.json<OllamaTagsResponse>();
return response;
};
/**
* Check if Ollama is running and accessible
*/
export const checkHealth = async (baseUrl: string): Promise<boolean> => {
try {
await got.get(`${baseUrl}${OLLAMA_ENDPOINTS.TAGS}`, {
timeout: { request: OLLAMA_TIMEOUTS.VALIDATION },
});
return true;
} catch {
return false;
}
};

View File

@@ -3,3 +3,9 @@
*/
export const MAX_ITERATIONS = 50;
/**
* Maximum consecutive tool errors before stopping the agent loop
* Prevents wasting tokens on repeated validation failures
*/
export const MAX_CONSECUTIVE_ERRORS = 3;

View File

@@ -18,6 +18,7 @@ export const OLLAMA_ENDPOINTS = {
export const OLLAMA_TIMEOUTS = {
VALIDATION: 5000,
TAGS: 10000,
CHAT: 120000,
} as const;

View File

@@ -39,6 +39,9 @@ const DEFAULT_COLORS: ThemeColors = {
diffContext: "#808080",
diffHeader: "#ffffff",
diffHunk: "#00ffff",
diffLineBgAdded: "#1a3d1a",
diffLineBgRemoved: "#3d1a1a",
diffLineText: "#ffffff",
roleUser: "#00ffff",
roleAssistant: "#00ff00",
@@ -92,6 +95,9 @@ const DRACULA_COLORS: ThemeColors = {
diffContext: "#6272a4",
diffHeader: "#f8f8f2",
diffHunk: "#8be9fd",
diffLineBgAdded: "#1a3d2a",
diffLineBgRemoved: "#3d1a2a",
diffLineText: "#f8f8f2",
roleUser: "#8be9fd",
roleAssistant: "#50fa7b",
@@ -145,6 +151,9 @@ const NORD_COLORS: ThemeColors = {
diffContext: "#4c566a",
diffHeader: "#eceff4",
diffHunk: "#81a1c1",
diffLineBgAdded: "#2e3d35",
diffLineBgRemoved: "#3d2e35",
diffLineText: "#eceff4",
roleUser: "#88c0d0",
roleAssistant: "#a3be8c",
@@ -198,6 +207,9 @@ const TOKYO_NIGHT_COLORS: ThemeColors = {
diffContext: "#565f89",
diffHeader: "#c0caf5",
diffHunk: "#7dcfff",
diffLineBgAdded: "#1a2d1a",
diffLineBgRemoved: "#2d1a2a",
diffLineText: "#c0caf5",
roleUser: "#7dcfff",
roleAssistant: "#9ece6a",
@@ -251,6 +263,9 @@ const GRUVBOX_COLORS: ThemeColors = {
diffContext: "#665c54",
diffHeader: "#ebdbb2",
diffHunk: "#8ec07c",
diffLineBgAdded: "#3d3a1a",
diffLineBgRemoved: "#3d1a1a",
diffLineText: "#ebdbb2",
roleUser: "#83a598",
roleAssistant: "#b8bb26",
@@ -304,6 +319,9 @@ const MONOKAI_COLORS: ThemeColors = {
diffContext: "#75715e",
diffHeader: "#f8f8f2",
diffHunk: "#66d9ef",
diffLineBgAdded: "#2d3d1a",
diffLineBgRemoved: "#3d1a2a",
diffLineText: "#f8f8f2",
roleUser: "#66d9ef",
roleAssistant: "#a6e22e",
@@ -357,6 +375,9 @@ const CATPPUCCIN_COLORS: ThemeColors = {
diffContext: "#6c7086",
diffHeader: "#cdd6f4",
diffHunk: "#89dceb",
diffLineBgAdded: "#1a3d2a",
diffLineBgRemoved: "#3d1a2a",
diffLineText: "#cdd6f4",
roleUser: "#89dceb",
roleAssistant: "#a6e3a1",
@@ -410,6 +431,9 @@ const ONE_DARK_COLORS: ThemeColors = {
diffContext: "#5c6370",
diffHeader: "#abb2bf",
diffHunk: "#56b6c2",
diffLineBgAdded: "#2a3d2a",
diffLineBgRemoved: "#3d2a2a",
diffLineText: "#abb2bf",
roleUser: "#56b6c2",
roleAssistant: "#98c379",
@@ -463,6 +487,9 @@ const SOLARIZED_DARK_COLORS: ThemeColors = {
diffContext: "#586e75",
diffHeader: "#93a1a1",
diffHunk: "#2aa198",
diffLineBgAdded: "#0a2a1a",
diffLineBgRemoved: "#2a0a1a",
diffLineText: "#93a1a1",
roleUser: "#2aa198",
roleAssistant: "#859900",
@@ -516,6 +543,9 @@ const GITHUB_DARK_COLORS: ThemeColors = {
diffContext: "#8b949e",
diffHeader: "#c9d1d9",
diffHunk: "#58a6ff",
diffLineBgAdded: "#0d2818",
diffLineBgRemoved: "#2d0d0d",
diffLineText: "#c9d1d9",
roleUser: "#58a6ff",
roleAssistant: "#3fb950",
@@ -569,6 +599,9 @@ const ROSE_PINE_COLORS: ThemeColors = {
diffContext: "#6e6a86",
diffHeader: "#e0def4",
diffHunk: "#9ccfd8",
diffLineBgAdded: "#1a2a3d",
diffLineBgRemoved: "#3d1a2a",
diffLineText: "#e0def4",
roleUser: "#9ccfd8",
roleAssistant: "#31748f",
@@ -622,6 +655,9 @@ const KANAGAWA_COLORS: ThemeColors = {
diffContext: "#727169",
diffHeader: "#dcd7ba",
diffHunk: "#7fb4ca",
diffLineBgAdded: "#2a3d2a",
diffLineBgRemoved: "#3d2a2a",
diffLineText: "#dcd7ba",
roleUser: "#7fb4ca",
roleAssistant: "#98bb6c",
@@ -675,6 +711,9 @@ const AYU_DARK_COLORS: ThemeColors = {
diffContext: "#636e78",
diffHeader: "#bfbdb6",
diffHunk: "#59c2ff",
diffLineBgAdded: "#1a3d1a",
diffLineBgRemoved: "#3d1a1a",
diffLineText: "#bfbdb6",
roleUser: "#59c2ff",
roleAssistant: "#7fd962",
@@ -728,6 +767,9 @@ const CARGDEV_CYBERPUNK_COLORS: ThemeColors = {
diffContext: "#666666",
diffHeader: "#f8f8f2",
diffHunk: "#8be9fd",
diffLineBgAdded: "#0d2a1a",
diffLineBgRemoved: "#2a0d1a",
diffLineText: "#f8f8f2",
roleUser: "#8be9fd",
roleAssistant: "#50fa7b",

View File

@@ -11,3 +11,8 @@ export const SCHEMA_SKIP_VALUES: Record<string, unknown> = {
export type SchemaSkipKey = (typeof SCHEMA_SKIP_KEYS)[number];
export const TOOL_NAMES = ["read", "glob", "grep"];
/**
* Tools that can modify files
*/
export const FILE_MODIFYING_TOOLS = ["write", "edit"] as const;

View File

@@ -4,10 +4,11 @@
// Keyboard hints displayed in status bar
export const STATUS_HINTS = {
INTERRUPT: "ctrl+c to interrupt",
INTERRUPT_CONFIRM: "ctrl+c again to confirm",
INTERRUPT: "esc to interrupt",
INTERRUPT_CONFIRM: "ctrl+c again to exit",
TOGGLE_TODOS: "ctrl+t to hide todos",
TOGGLE_TODOS_SHOW: "ctrl+t to show todos",
TOGGLE_PLAN: "ctrl+p to toggle plan",
} as const;
// Time formatting
@@ -39,3 +40,10 @@ export const TERMINAL_SEQUENCES = {
HIDE_CURSOR: "\x1b[?25l",
SHOW_CURSOR: "\x1b[?25h",
} as const;
// Progress bar display
export const PROGRESS_BAR = {
WIDTH: 40,
FILLED_CHAR: "█",
EMPTY_CHAR: "░",
} as const;

View File

@@ -0,0 +1,32 @@
/**
* Copilot Models API Interfaces
*/
export interface ModelBilling {
is_premium: boolean;
multiplier: number;
restricted_to?: string[];
}
export interface ModelCapabilities {
type?: string;
limits?: {
max_output_tokens?: number;
};
supports?: {
tool_calls?: boolean;
streaming?: boolean;
};
}
export interface ModelsApiModel {
id: string;
name?: string;
model_picker_enabled?: boolean;
billing?: ModelBilling;
capabilities?: ModelCapabilities;
}
export interface ModelsApiResponse {
data: ModelsApiModel[];
}

View File

@@ -0,0 +1,10 @@
/**
* Stream callbacks with state tracking
*/
import type { StreamCallbacks } from "@/types/streaming";
export interface StreamCallbacksWithState {
callbacks: StreamCallbacks;
hasReceivedContent: () => boolean;
}

View File

@@ -0,0 +1,10 @@
/**
* Streaming Chat Options
*/
import type { AgentOptions } from "@interfaces/AgentOptions";
import type { ModelSwitchInfo } from "@/types/streaming";
export interface StreamingChatOptions extends AgentOptions {
onModelSwitch?: (info: ModelSwitchInfo) => void;
}

View File

@@ -21,10 +21,19 @@ You are an AUTONOMOUS agent. When given a task:
## When to Use Tools Proactively
Before answering questions or making changes, ALWAYS:
- **Detect project type first**: Use glob to find config files (tsconfig.json, package.json, pom.xml, Cargo.toml, go.mod)
- **Use glob** to find relevant files when you need to understand project structure
- **Use grep** to search for patterns, function definitions, or implementations
- **Use read** to understand existing code before making changes
- **Use bash** for git operations, running tests, builds, and npm/bun commands
- **Use bash** for git operations, running tests, builds, type-checking, and compiling
## CRITICAL: Execute Commands When Requested
When the user explicitly asks you to run a command (e.g., "run tree", "run ls", "execute bash"), you MUST:
1. **Actually run the command** using the bash tool - do NOT just explain what it would do
2. Show the real output from the command
3. Never substitute a command request with a text explanation
4. If a command fails, show the actual error
## Examples of Agentic Behavior
@@ -55,6 +64,15 @@ assistant: [Uses grep to find auth middleware]
The auth middleware in src/middleware/auth.ts:15 validates JWT tokens and attaches the user object to the request.
</example>
<example>
user: create tests for the validation module
assistant: [Uses read to understand src/utils/validation.ts]
[Uses glob to check existing test patterns]
[Uses write to create tests/validation.test.ts]
[Uses bash to run bun test tests/validation.test.ts]
Created tests/validation.test.ts with 12 tests covering all validation functions. All tests pass.
</example>
# Tone and Style
- Be concise. Keep responses under 4 lines unless the task requires more detail
@@ -82,6 +100,17 @@ assistant: [Uses bash to run ls src/]
foo.ts, bar.ts, index.ts
</example>
<example>
user: run tree to show me the project structure
assistant: [Uses bash to run tree -L 2]
.
├── src
│ ├── components
│ └── utils
├── package.json
└── tsconfig.json
</example>
# Tool Usage Policy
You have access to these tools - use them proactively:
@@ -117,7 +146,81 @@ When performing software engineering tasks:
2. **Read existing code**: Understand patterns and conventions before changes
3. **Make incremental changes**: One logical change at a time
4. **Follow conventions**: Match existing code style and patterns
5. **Verify changes**: Run tests/lint when possible
5. **ALWAYS verify your work**: Run tests, builds, or linters to confirm changes work
## CRITICAL: Always Verify Your Work
### Step 1: Understand Project Context
Before making changes, detect the project type by checking for config files:
- \`tsconfig.json\` → TypeScript project → validate with \`tsc --noEmit\` or \`npx tsc --noEmit\`
- \`package.json\` → Node.js project → check scripts for test/build commands
- \`pom.xml\` → Java Maven → validate with \`mvn compile\`
- \`build.gradle\` → Java Gradle → validate with \`./gradlew build\`
- \`Cargo.toml\` → Rust → validate with \`cargo check\`
- \`go.mod\` → Go → validate with \`go build ./...\`
- \`pyproject.toml\` or \`setup.py\` → Python → validate with \`python -m py_compile\`
If you haven't examined the project structure yet, do it first with glob/read.
### Step 2: Validate After Every Change
After creating or modifying code, you MUST run the appropriate validation:
| Project Type | Validation Command |
|--------------|-------------------|
| TypeScript | \`tsc --noEmit\` or \`bun build --dry-run\` |
| JavaScript | \`node --check <file>\` or run tests |
| Java | \`mvn compile\` or \`./gradlew compileJava\` |
| Rust | \`cargo check\` |
| Go | \`go build ./...\` |
| Python | \`python -m py_compile <file>\` |
### Step 3: Run Tests
- **Created tests?** → Run them immediately
- **Modified code?** → Run existing tests to ensure nothing broke
- **Added new feature?** → Test it manually or run relevant test suites
NEVER say "let me know if you want me to run the tests" - just run them yourself.
NEVER leave work unverified. Complete the full loop: create → type-check → test → confirm.
### Validation Order (TypeScript Projects)
For TypeScript projects, ALWAYS run in this order:
1. \`tsc --noEmit\` - Catch type errors first
2. \`bun test <file>\` or \`npm test\` - Run tests
3. If either fails, fix and re-run both
<example>
user: create a utility function for string formatting
assistant: [Uses glob to find tsconfig.json - confirms TypeScript project]
[Uses read to understand existing utils]
[Uses write to create src/utils/format.ts]
[Uses bash: tsc --noEmit] → No errors
[Uses write to create tests/format.test.ts]
[Uses bash: bun test tests/format.test.ts] → 8 tests pass
Created format.ts with formatCurrency, formatDate, formatNumber. Types check. All 8 tests pass.
</example>
<example>
user: add a new field to the User type
assistant: [Uses glob to find tsconfig.json - TypeScript project]
[Uses read to examine src/types/user.ts]
[Uses edit to add the new field]
[Uses bash: tsc --noEmit] → Error: Property 'newField' missing in 3 files
[Uses edit to fix src/services/user.ts]
[Uses edit to fix src/api/users.ts]
[Uses bash: tsc --noEmit] → No errors
[Uses bash: bun test] → All tests pass
Added 'email' field to User type. Fixed 3 files that needed the new field. Types check. Tests pass.
</example>
<example>
user: fix the bug in UserService.java
assistant: [Uses glob to find pom.xml - confirms Maven project]
[Uses read to examine UserService.java]
[Uses edit to fix the bug]
[Uses bash: mvn compile] → BUILD SUCCESS
[Uses bash: mvn test -Dtest=UserServiceTest] → Tests pass
Fixed null pointer in UserService.java:45. Compiles successfully. Tests pass.
</example>
## Task Tracking

View File

@@ -245,6 +245,7 @@ const executeStream = (
if (delta?.tool_calls) {
for (const tc of delta.tool_calls) {
addDebugLog("api", `Tool call chunk: ${JSON.stringify(tc)}`);
onChunk({ type: "tool_call", toolCall: tc });
}
}

View File

@@ -1,203 +0,0 @@
/**
* Unit tests for Streaming Agent
*/
import { describe, it, expect } from "bun:test";
import {
createInitialStreamingState,
createStreamAccumulator,
} from "@/types/streaming";
import type {
StreamingState,
StreamAccumulator,
PartialToolCall,
} from "@/types/streaming";
describe("Streaming Agent Types", () => {
describe("createInitialStreamingState", () => {
it("should create state with idle status", () => {
const state = createInitialStreamingState();
expect(state.status).toBe("idle");
expect(state.content).toBe("");
expect(state.pendingToolCalls).toHaveLength(0);
expect(state.completedToolCalls).toHaveLength(0);
expect(state.error).toBeNull();
expect(state.modelSwitched).toBeNull();
});
});
describe("createStreamAccumulator", () => {
it("should create empty accumulator", () => {
const accumulator = createStreamAccumulator();
expect(accumulator.content).toBe("");
expect(accumulator.toolCalls.size).toBe(0);
expect(accumulator.modelSwitch).toBeNull();
});
it("should accumulate content", () => {
const accumulator = createStreamAccumulator();
accumulator.content += "Hello ";
accumulator.content += "World";
expect(accumulator.content).toBe("Hello World");
});
it("should store partial tool calls", () => {
const accumulator = createStreamAccumulator();
const partial: PartialToolCall = {
index: 0,
id: "call_123",
name: "read",
argumentsBuffer: '{"path": "/test',
isComplete: false,
};
accumulator.toolCalls.set(0, partial);
expect(accumulator.toolCalls.size).toBe(1);
expect(accumulator.toolCalls.get(0)?.name).toBe("read");
});
it("should accumulate tool call arguments", () => {
const accumulator = createStreamAccumulator();
const partial: PartialToolCall = {
index: 0,
id: "call_123",
name: "read",
argumentsBuffer: "",
isComplete: false,
};
accumulator.toolCalls.set(0, partial);
// Simulate streaming arguments
partial.argumentsBuffer += '{"path": ';
partial.argumentsBuffer += '"/test.ts"}';
expect(partial.argumentsBuffer).toBe('{"path": "/test.ts"}');
// Verify JSON is valid
const parsed = JSON.parse(partial.argumentsBuffer);
expect(parsed.path).toBe("/test.ts");
});
});
describe("StreamingState transitions", () => {
it("should represent idle to streaming transition", () => {
const state: StreamingState = {
...createInitialStreamingState(),
status: "streaming",
content: "Processing your request",
};
expect(state.status).toBe("streaming");
expect(state.content).toBe("Processing your request");
});
it("should represent tool call accumulation", () => {
const partial: PartialToolCall = {
index: 0,
id: "call_456",
name: "bash",
argumentsBuffer: '{"command": "ls -la"}',
isComplete: false,
};
const state: StreamingState = {
...createInitialStreamingState(),
status: "accumulating_tool",
pendingToolCalls: [partial],
};
expect(state.status).toBe("accumulating_tool");
expect(state.pendingToolCalls).toHaveLength(1);
expect(state.pendingToolCalls[0].name).toBe("bash");
});
it("should represent completion state", () => {
const state: StreamingState = {
...createInitialStreamingState(),
status: "complete",
content: "Task completed successfully.",
completedToolCalls: [
{ id: "call_789", name: "write", arguments: { path: "/out.txt" } },
],
};
expect(state.status).toBe("complete");
expect(state.completedToolCalls).toHaveLength(1);
});
it("should represent error state", () => {
const state: StreamingState = {
...createInitialStreamingState(),
status: "error",
error: "Connection timeout",
};
expect(state.status).toBe("error");
expect(state.error).toBe("Connection timeout");
});
it("should represent model switch", () => {
const state: StreamingState = {
...createInitialStreamingState(),
status: "streaming",
modelSwitched: {
from: "gpt-4",
to: "gpt-4-unlimited",
reason: "Quota exceeded",
},
};
expect(state.modelSwitched).not.toBeNull();
expect(state.modelSwitched?.from).toBe("gpt-4");
expect(state.modelSwitched?.to).toBe("gpt-4-unlimited");
});
});
describe("Tool call finalization", () => {
it("should convert partial to complete tool call", () => {
const partial: PartialToolCall = {
index: 0,
id: "call_abc",
name: "edit",
argumentsBuffer:
'{"file_path": "/src/app.ts", "old_string": "foo", "new_string": "bar"}',
isComplete: true,
};
const args = JSON.parse(partial.argumentsBuffer);
expect(args.file_path).toBe("/src/app.ts");
expect(args.old_string).toBe("foo");
expect(args.new_string).toBe("bar");
});
it("should handle malformed JSON gracefully", () => {
const partial: PartialToolCall = {
index: 0,
id: "call_def",
name: "read",
argumentsBuffer: '{"path": "/incomplete',
isComplete: true,
};
let args: Record<string, unknown> = {};
try {
args = JSON.parse(partial.argumentsBuffer);
} catch {
args = {};
}
expect(args).toEqual({});
});
});
});

View File

@@ -23,7 +23,7 @@ import type {
import { chatStream } from "@providers/chat";
import { getTool, getToolsForApi, refreshMCPTools } from "@tools/index";
import { initializePermissions } from "@services/permissions";
import { MAX_ITERATIONS } from "@constants/agent";
import { MAX_ITERATIONS, MAX_CONSECUTIVE_ERRORS } from "@constants/agent";
import { createStreamAccumulator } from "@/types/streaming";
// =============================================================================
@@ -80,33 +80,47 @@ const processStreamChunk = (
tool_call: () => {
if (!chunk.toolCall) return;
const tc = chunk.toolCall;
const index = tc.id ? getToolCallIndex(tc.id, accumulator) : 0;
const tc = chunk.toolCall as {
index?: number;
id?: string;
function?: { name?: string; arguments?: string };
};
// OpenAI streaming format includes index in each chunk
// Use index from chunk if available, otherwise find by id or default to 0
const chunkIndex = tc.index ?? (tc.id ? getToolCallIndex(tc.id, accumulator) : 0);
// Get or create partial tool call
let partial = accumulator.toolCalls.get(index);
if (!partial && tc.id) {
let partial = accumulator.toolCalls.get(chunkIndex);
if (!partial) {
// Create new partial - use id if provided, generate one otherwise
partial = {
index,
id: tc.id,
index: chunkIndex,
id: tc.id ?? `tool_${chunkIndex}_${Date.now()}`,
name: tc.function?.name ?? "",
argumentsBuffer: "",
isComplete: false,
};
accumulator.toolCalls.set(index, partial);
accumulator.toolCalls.set(chunkIndex, partial);
if (tc.id) {
callbacks.onToolCallStart?.(partial);
}
}
// Update id if provided (first chunk has the real id)
if (tc.id && partial.id.startsWith("tool_")) {
partial.id = tc.id;
callbacks.onToolCallStart?.(partial);
}
if (partial) {
// Update name if provided
if (tc.function?.name) {
partial.name = tc.function.name;
}
// Update name if provided
if (tc.function?.name) {
partial.name = tc.function.name;
}
// Accumulate arguments
if (tc.function?.arguments) {
partial.argumentsBuffer += tc.function.arguments;
}
// Accumulate arguments
if (tc.function?.arguments) {
partial.argumentsBuffer += tc.function.arguments;
}
},
@@ -165,10 +179,20 @@ const getToolCallIndex = (
*/
const finalizeToolCall = (partial: PartialToolCall): ToolCall => {
let args: Record<string, unknown> = {};
try {
args = JSON.parse(partial.argumentsBuffer || "{}");
} catch {
args = {};
const rawBuffer = partial.argumentsBuffer || "";
if (!rawBuffer) {
args = { __debug_error: "Empty arguments buffer" };
} else {
try {
args = JSON.parse(rawBuffer);
} catch (e) {
args = {
__debug_error: "JSON parse failed",
__debug_buffer: rawBuffer.substring(0, 200),
__debug_parseError: e instanceof Error ? e.message : String(e),
};
}
}
return {
@@ -210,12 +234,13 @@ const executeTool = async (
const validatedArgs = tool.parameters.parse(toolCall.arguments);
return await tool.execute(validatedArgs, ctx);
} catch (error: unknown) {
const receivedArgs = JSON.stringify(toolCall.arguments);
const errorMessage = error instanceof Error ? error.message : String(error);
return {
success: false,
title: "Tool error",
title: "Tool validation error",
output: "",
error: errorMessage,
error: `${toolCall.name}: ${errorMessage}\nReceived: ${receivedArgs}`,
};
}
};
@@ -296,6 +321,7 @@ export const runAgentLoopStream = async (
const allToolCalls: { call: ToolCall; result: ToolResult }[] = [];
let iterations = 0;
let finalResponse = "";
let consecutiveErrors = 0;
// Initialize
await initializePermissions();
@@ -331,6 +357,9 @@ export const runAgentLoopStream = async (
state.options.onText?.(response.content);
}
// Track if all tool calls in this iteration failed
let allFailed = true;
// Execute each tool call
for (const toolCall of response.toolCalls) {
state.options.onToolCall?.(toolCall);
@@ -340,6 +369,12 @@ export const runAgentLoopStream = async (
state.options.onToolResult?.(toolCall.id, result);
// Track success/failure
if (result.success) {
allFailed = false;
consecutiveErrors = 0;
}
// Add tool result message
const toolResultMessage: ToolResultMessage = {
role: "tool",
@@ -350,6 +385,21 @@ export const runAgentLoopStream = async (
};
agentMessages.push(toolResultMessage);
}
// Check for repeated failures
if (allFailed) {
consecutiveErrors++;
if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
const errorMsg = `Stopping: ${consecutiveErrors} consecutive tool errors. Check model compatibility with tool calling.`;
state.options.onError?.(errorMsg);
return {
success: false,
finalResponse: errorMsg,
iterations,
toolCalls: allToolCalls,
};
}
}
} else {
// No tool calls - this is the final response
finalResponse = response.content || "";

View File

@@ -24,7 +24,10 @@ export type {
export { initializeChatService } from "@services/chat-tui/initialize";
// Re-export message handling
export { handleMessage } from "@services/chat-tui/message-handler";
export {
handleMessage,
abortCurrentOperation,
} from "@services/chat-tui/message-handler";
// Re-export command handling
export { executeCommand } from "@services/chat-tui/commands";

View File

@@ -43,7 +43,7 @@ import {
checkOllamaAvailability,
checkCopilotAvailability,
} from "@services/cascading-provider";
import { chat } from "@providers/chat";
import { chat, getDefaultModel } from "@providers/chat";
import { AUDIT_SYSTEM_PROMPT, createAuditPrompt, parseAuditResponse } from "@prompts/audit-prompt";
import { PROVIDER_IDS } from "@constants/provider-quality";
import { appStore } from "@tui/index";
@@ -55,6 +55,12 @@ import type {
ToolCallInfo,
} from "@/types/chat-service";
import { addDebugLog } from "@tui-solid/components/debug-log-panel";
import { FILE_MODIFYING_TOOLS } from "@constants/tools";
import type { StreamCallbacksWithState } from "@interfaces/StreamCallbacksWithState";
import {
detectCommand,
executeDetectedCommand,
} from "@services/command-detection";
// Track last response for feedback learning
let lastResponseContext: {
@@ -63,7 +69,25 @@ let lastResponseContext: {
response: string;
} | null = null;
const FILE_MODIFYING_TOOLS = ["write", "edit"];
// Track current running agent for abort capability
let currentAgent: { stop: () => void } | null = null;
/**
* Abort the currently running agent operation
* @returns true if an operation was aborted, false if nothing was running
*/
export const abortCurrentOperation = (): boolean => {
if (currentAgent) {
currentAgent.stop();
currentAgent = null;
appStore.cancelStreaming();
appStore.stopThinking();
appStore.setMode("idle");
addDebugLog("state", "Operation aborted by user");
return true;
}
return false;
};
const createToolCallHandler =
(
@@ -72,7 +96,7 @@ const createToolCallHandler =
) =>
(call: { id: string; name: string; arguments?: Record<string, unknown> }) => {
const args = call.arguments;
if (FILE_MODIFYING_TOOLS.includes(call.name) && args?.path) {
if ((FILE_MODIFYING_TOOLS as readonly string[]).includes(call.name) && args?.path) {
toolCallRef.current = { name: call.name, path: String(args.path) };
} else {
toolCallRef.current = { name: call.name };
@@ -117,10 +141,10 @@ const createToolResultHandler =
/**
* Create streaming callbacks for TUI integration
*/
const createStreamCallbacks = (): StreamCallbacks => {
const createStreamCallbacks = (): StreamCallbacksWithState => {
let chunkCount = 0;
return {
const callbacks: StreamCallbacks = {
onContentChunk: (content: string) => {
chunkCount++;
addDebugLog("stream", `Chunk #${chunkCount}: "${content.substring(0, 30)}${content.length > 30 ? "..." : ""}"`);
@@ -155,8 +179,10 @@ const createStreamCallbacks = (): StreamCallbacks => {
},
onComplete: () => {
addDebugLog("stream", `Stream complete (${chunkCount} chunks)`);
appStore.completeStreaming();
// Note: Don't call completeStreaming() here!
// The agent loop may have multiple iterations (tool calls + final response)
// Streaming will be completed manually after the entire agent finishes
addDebugLog("stream", `Stream iteration done (${chunkCount} chunks total)`);
},
onError: (error: string) => {
@@ -168,6 +194,11 @@ const createStreamCallbacks = (): StreamCallbacks => {
});
},
};
return {
callbacks,
hasReceivedContent: () => chunkCount > 0,
};
};
/**
@@ -245,6 +276,50 @@ export const handleMessage = async (
// Check for feedback on previous response
await checkUserFeedback(message, callbacks);
// Detect explicit command requests and execute directly
const detected = detectCommand(message);
if (detected.detected && detected.command) {
addDebugLog("info", `Detected command: ${detected.command}`);
// Show the user's request
appStore.addLog({
type: "user",
content: message,
});
// Show what we're running
appStore.addLog({
type: "tool",
content: detected.command,
metadata: {
toolName: "bash",
toolStatus: "running",
toolDescription: `Running: ${detected.command}`,
},
});
appStore.setMode("tool_execution");
const result = await executeDetectedCommand(detected.command, process.cwd());
appStore.setMode("idle");
// Show result
if (result.success && result.output) {
appStore.addLog({
type: "assistant",
content: result.output,
});
} else if (!result.success) {
appStore.addLog({
type: "error",
content: result.error || "Command failed",
});
}
// Save to session (for persistence only, not UI)
await saveSession();
return;
}
// Get interaction mode and cascade setting from app store
const { interactionMode, cascadeEnabled } = appStore.getState();
const isReadOnlyMode = interactionMode === "ask" || interactionMode === "code-review";
@@ -397,23 +472,34 @@ export const handleMessage = async (
}
}
// Determine the correct model for the provider
// If provider changed, use the provider's default model instead of state.model
const effectiveModel =
effectiveProvider === state.provider
? state.model
: getDefaultModel(effectiveProvider);
// Start streaming UI
addDebugLog("state", `Starting request: provider=${effectiveProvider}, model=${state.model}`);
addDebugLog("state", `Starting request: provider=${effectiveProvider}, model=${effectiveModel}`);
addDebugLog("state", `Mode: ${appStore.getState().interactionMode}, Cascade: ${cascadeEnabled}`);
appStore.setMode("thinking");
appStore.startThinking();
appStore.startStreaming();
addDebugLog("state", "Streaming started");
const streamCallbacks = createStreamCallbacks();
const streamState = createStreamCallbacks();
const agent = createStreamingAgent(
process.cwd(),
{
provider: effectiveProvider,
model: state.model,
model: effectiveModel,
verbose: state.verbose,
autoApprove: state.autoApprove,
chatMode: isReadOnlyMode,
onText: (text: string) => {
addDebugLog("info", `onText callback: "${text.substring(0, 50)}..."`);
appStore.appendStreamContent(text);
},
onToolCall: createToolCallHandler(callbacks, toolCallRef),
onToolResult: createToolResultHandler(callbacks, toolCallRef),
onError: (error) => {
@@ -423,9 +509,12 @@ export const handleMessage = async (
callbacks.onLog("system", warning);
},
},
streamCallbacks,
streamState.callbacks,
);
// Store agent reference for abort capability
currentAgent = agent;
try {
addDebugLog("api", `Agent.run() started with ${state.messages.length} messages`);
const result = await agent.run(state.messages);
@@ -471,14 +560,18 @@ export const handleMessage = async (
// Check if streaming content was received - if not, add the response as a log
// This handles cases where streaming didn't work or content was all in final response
const streamingState = appStore.getState().streamingLog;
if (!streamingState.content && finalResponse) {
if (!streamState.hasReceivedContent() && finalResponse) {
addDebugLog("info", "No streaming content received, adding fallback log");
// Streaming didn't receive content, manually add the response
appStore.cancelStreaming(); // Remove empty streaming log
appStore.addLog({
type: "assistant",
content: finalResponse,
});
} else {
// Streaming received content - finalize the streaming log
addDebugLog("info", "Completing streaming with received content");
appStore.completeStreaming();
}
addMessage("user", message);
@@ -501,5 +594,8 @@ export const handleMessage = async (
appStore.cancelStreaming();
appStore.stopThinking();
callbacks.onLog("error", String(error));
} finally {
// Clear agent reference when done
currentAgent = null;
}
};

View File

@@ -7,6 +7,7 @@
import type { Message } from "@/types/providers";
import type { AgentOptions } from "@interfaces/AgentOptions";
import type { AgentResult } from "@interfaces/AgentResult";
import type { StreamingChatOptions } from "@interfaces/StreamingChatOptions";
import type {
StreamCallbacks,
PartialToolCall,
@@ -16,13 +17,8 @@ import type { ToolCall, ToolResult } from "@/types/tools";
import { createStreamingAgent } from "@services/agent-stream";
import { appStore } from "@tui/index";
// =============================================================================
// Types
// =============================================================================
export interface StreamingChatOptions extends AgentOptions {
onModelSwitch?: (info: ModelSwitchInfo) => void;
}
// Re-export for convenience
export type { StreamingChatOptions } from "@interfaces/StreamingChatOptions";
// =============================================================================
// TUI Streaming Callbacks

View File

@@ -5,16 +5,13 @@
import { usageStore } from "@stores/usage-store";
import { getUserInfo } from "@providers/copilot/credentials";
import { getCopilotUsage } from "@providers/copilot/usage";
import { PROGRESS_BAR } from "@constants/ui";
import type {
ChatServiceState,
ChatServiceCallbacks,
} from "@/types/chat-service";
import type { CopilotQuotaDetail } from "@/types/copilot-usage";
const BAR_WIDTH = 40;
const FILLED_CHAR = "█";
const EMPTY_CHAR = "░";
const formatNumber = (num: number): string => {
return num.toLocaleString();
};
@@ -35,9 +32,12 @@ const formatDuration = (ms: number): string => {
const renderBar = (percent: number): string => {
const clampedPercent = Math.max(0, Math.min(100, percent));
const filledWidth = Math.round((clampedPercent / 100) * BAR_WIDTH);
const emptyWidth = BAR_WIDTH - filledWidth;
return FILLED_CHAR.repeat(filledWidth) + EMPTY_CHAR.repeat(emptyWidth);
const filledWidth = Math.round((clampedPercent / 100) * PROGRESS_BAR.WIDTH);
const emptyWidth = PROGRESS_BAR.WIDTH - filledWidth;
return (
PROGRESS_BAR.FILLED_CHAR.repeat(filledWidth) +
PROGRESS_BAR.EMPTY_CHAR.repeat(emptyWidth)
);
};
const formatQuotaBar = (
@@ -55,7 +55,7 @@ const formatQuotaBar = (
if (quota.unlimited) {
lines.push(name);
lines.push(FILLED_CHAR.repeat(BAR_WIDTH) + " Unlimited");
lines.push(PROGRESS_BAR.FILLED_CHAR.repeat(PROGRESS_BAR.WIDTH) + " Unlimited");
return lines;
}

View File

@@ -0,0 +1,158 @@
/**
* Command Detection Service
*
* Detects when user explicitly requests to run a command
* and executes it directly without relying on LLM decision-making.
*/
import { executeBash } from "@tools/bash/execute";
import type { ToolContext } from "@/types/tools";
import { v4 as uuidv4 } from "uuid";
/**
* Patterns that indicate an explicit command request
*/
const COMMAND_PATTERNS = [
// "run <command>" patterns
/^run\s+(.+)$/i,
/^execute\s+(.+)$/i,
/^exec\s+(.+)$/i,
// "run a/the <command> command" patterns
/^run\s+(?:a\s+|the\s+)?(.+?)\s+command$/i,
// "use <command> to" patterns
/^use\s+(\S+)\s+to\s+/i,
// Direct command requests
/^show\s+me\s+(?:the\s+)?(?:output\s+of\s+)?(.+)$/i,
// "can you run" patterns
/^(?:can\s+you\s+)?(?:please\s+)?run\s+(.+?)(?:\s+for\s+me)?$/i,
];
/**
* Common shell commands that should be executed directly
*/
const DIRECT_COMMANDS = new Set([
"ls",
"tree",
"pwd",
"cat",
"head",
"tail",
"find",
"grep",
"wc",
"du",
"df",
"ps",
"top",
"which",
"whoami",
"date",
"echo",
"env",
"printenv",
"uname",
]);
export interface DetectedCommand {
detected: boolean;
command?: string;
originalMessage: string;
}
/**
* Detect if the user message is an explicit command request
*/
export const detectCommand = (message: string): DetectedCommand => {
const trimmed = message.trim();
// Check patterns
for (const pattern of COMMAND_PATTERNS) {
const match = trimmed.match(pattern);
if (match) {
const command = match[1].trim();
// Validate it looks like a real command
if (command && command.length > 0 && command.length < 500) {
return {
detected: true,
command: normalizeCommand(command),
originalMessage: message,
};
}
}
}
// Check if message starts with a known command
const firstWord = trimmed.split(/\s+/)[0].toLowerCase();
if (DIRECT_COMMANDS.has(firstWord)) {
return {
detected: true,
command: trimmed,
originalMessage: message,
};
}
return {
detected: false,
originalMessage: message,
};
};
/**
* Normalize command - handle common variations
*/
const normalizeCommand = (command: string): string => {
// Remove quotes if wrapped
if (
(command.startsWith('"') && command.endsWith('"')) ||
(command.startsWith("'") && command.endsWith("'"))
) {
command = command.slice(1, -1);
}
// Handle "tree command" -> "tree"
if (command.endsWith(" command")) {
command = command.slice(0, -8).trim();
}
// Handle "the tree" -> "tree"
if (command.startsWith("the ")) {
command = command.slice(4);
}
// Handle "a ls" -> "ls"
if (command.startsWith("a ")) {
command = command.slice(2);
}
return command;
};
/**
* Execute a detected command directly
*/
export const executeDetectedCommand = async (
command: string,
workingDir: string,
abortController?: AbortController,
): Promise<{
success: boolean;
output: string;
error?: string;
}> => {
const ctx: ToolContext = {
sessionId: uuidv4(),
messageId: uuidv4(),
workingDir,
abort: abortController ?? new AbortController(),
autoApprove: true, // Direct command requests are auto-approved
onMetadata: () => {},
};
const result = await executeBash({ command }, ctx);
return {
success: result.success,
output: result.output,
error: result.error,
};
};

View File

@@ -1,231 +0,0 @@
/**
* Unit tests for Vector Store
*/
import { describe, it, expect } from "bun:test";
import {
cosineSimilarity,
euclideanDistance,
upsertEmbedding,
removeEmbedding,
hasEmbedding,
getEmbedding,
findSimilar,
findAboveThreshold,
getIndexStats,
} from "@services/learning/vector-store";
import { createEmptyIndex } from "@/types/embeddings";
describe("Vector Store", () => {
describe("cosineSimilarity", () => {
it("should return 1 for identical vectors", () => {
const a = [1, 0, 0];
const b = [1, 0, 0];
expect(cosineSimilarity(a, b)).toBeCloseTo(1);
});
it("should return 0 for orthogonal vectors", () => {
const a = [1, 0, 0];
const b = [0, 1, 0];
expect(cosineSimilarity(a, b)).toBeCloseTo(0);
});
it("should return -1 for opposite vectors", () => {
const a = [1, 0, 0];
const b = [-1, 0, 0];
expect(cosineSimilarity(a, b)).toBeCloseTo(-1);
});
it("should handle normalized vectors", () => {
const a = [0.6, 0.8, 0];
const b = [0.8, 0.6, 0];
const similarity = cosineSimilarity(a, b);
expect(similarity).toBeGreaterThan(0);
expect(similarity).toBeLessThan(1);
});
it("should return 0 for mismatched lengths", () => {
const a = [1, 0, 0];
const b = [1, 0];
expect(cosineSimilarity(a, b)).toBe(0);
});
it("should handle zero vectors", () => {
const a = [0, 0, 0];
const b = [1, 0, 0];
expect(cosineSimilarity(a, b)).toBe(0);
});
});
describe("euclideanDistance", () => {
it("should return 0 for identical vectors", () => {
const a = [1, 2, 3];
const b = [1, 2, 3];
expect(euclideanDistance(a, b)).toBe(0);
});
it("should compute correct distance", () => {
const a = [0, 0, 0];
const b = [3, 4, 0];
expect(euclideanDistance(a, b)).toBe(5);
});
it("should return Infinity for mismatched lengths", () => {
const a = [1, 0, 0];
const b = [1, 0];
expect(euclideanDistance(a, b)).toBe(Infinity);
});
});
describe("Index Operations", () => {
it("should create empty index", () => {
const index = createEmptyIndex("test-model");
expect(index.version).toBe(1);
expect(index.model).toBe("test-model");
expect(Object.keys(index.embeddings)).toHaveLength(0);
});
it("should upsert embedding", () => {
let index = createEmptyIndex("test-model");
const embedding = [0.1, 0.2, 0.3];
index = upsertEmbedding(index, "learn_1", embedding);
expect(hasEmbedding(index, "learn_1")).toBe(true);
expect(getEmbedding(index, "learn_1")?.embedding).toEqual(embedding);
});
it("should update existing embedding", () => {
let index = createEmptyIndex("test-model");
const embedding1 = [0.1, 0.2, 0.3];
const embedding2 = [0.4, 0.5, 0.6];
index = upsertEmbedding(index, "learn_1", embedding1);
index = upsertEmbedding(index, "learn_1", embedding2);
expect(getEmbedding(index, "learn_1")?.embedding).toEqual(embedding2);
});
it("should remove embedding", () => {
let index = createEmptyIndex("test-model");
index = upsertEmbedding(index, "learn_1", [0.1, 0.2, 0.3]);
index = upsertEmbedding(index, "learn_2", [0.4, 0.5, 0.6]);
index = removeEmbedding(index, "learn_1");
expect(hasEmbedding(index, "learn_1")).toBe(false);
expect(hasEmbedding(index, "learn_2")).toBe(true);
});
it("should return null for missing embedding", () => {
const index = createEmptyIndex("test-model");
expect(getEmbedding(index, "nonexistent")).toBeNull();
});
it("should track index stats", () => {
let index = createEmptyIndex("test-model");
index = upsertEmbedding(index, "learn_1", [0.1, 0.2, 0.3]);
index = upsertEmbedding(index, "learn_2", [0.4, 0.5, 0.6]);
const stats = getIndexStats(index);
expect(stats.count).toBe(2);
expect(stats.model).toBe("test-model");
});
});
describe("Similarity Search", () => {
it("should find similar embeddings", () => {
let index = createEmptyIndex("test-model");
// Add embeddings with known similarities
index = upsertEmbedding(index, "a", [1, 0, 0]);
index = upsertEmbedding(index, "b", [0.9, 0.1, 0]);
index = upsertEmbedding(index, "c", [0, 1, 0]);
const query = [1, 0, 0];
const results = findSimilar(index, query, 2, 0);
expect(results).toHaveLength(2);
expect(results[0].id).toBe("a");
expect(results[0].score).toBeCloseTo(1);
expect(results[1].id).toBe("b");
});
it("should respect minSimilarity threshold", () => {
let index = createEmptyIndex("test-model");
index = upsertEmbedding(index, "a", [1, 0, 0]);
index = upsertEmbedding(index, "b", [0, 1, 0]);
const query = [1, 0, 0];
const results = findSimilar(index, query, 10, 0.5);
expect(results).toHaveLength(1);
expect(results[0].id).toBe("a");
});
it("should limit results to topK", () => {
let index = createEmptyIndex("test-model");
for (let i = 0; i < 10; i++) {
const embedding = [Math.random(), Math.random(), Math.random()];
index = upsertEmbedding(index, `learn_${i}`, embedding);
}
const query = [0.5, 0.5, 0.5];
const results = findSimilar(index, query, 3, 0);
expect(results.length).toBeLessThanOrEqual(3);
});
it("should find all above threshold", () => {
let index = createEmptyIndex("test-model");
index = upsertEmbedding(index, "a", [1, 0, 0]);
index = upsertEmbedding(index, "b", [0.95, 0.05, 0]);
index = upsertEmbedding(index, "c", [0.9, 0.1, 0]);
index = upsertEmbedding(index, "d", [0, 1, 0]);
const query = [1, 0, 0];
const results = findAboveThreshold(index, query, 0.85);
expect(results.length).toBe(3);
expect(results.map((r) => r.id)).toContain("a");
expect(results.map((r) => r.id)).toContain("b");
expect(results.map((r) => r.id)).toContain("c");
});
it("should return empty array for no matches", () => {
let index = createEmptyIndex("test-model");
index = upsertEmbedding(index, "a", [1, 0, 0]);
const query = [-1, 0, 0];
const results = findSimilar(index, query, 10, 0.5);
expect(results).toHaveLength(0);
});
it("should handle empty index", () => {
const index = createEmptyIndex("test-model");
const query = [1, 0, 0];
const results = findSimilar(index, query, 10, 0);
expect(results).toHaveLength(0);
});
});
});

View File

@@ -1,152 +0,0 @@
/**
* Unit tests for Bash Pattern Matcher
*/
import { describe, it, expect } from "bun:test";
import {
matchesBashPattern,
isBashAllowedByIndex,
findMatchingBashPatterns,
generateBashPattern,
extractCommandPrefix,
} from "@services/permissions/matchers/bash";
import { buildPatternIndex } from "@services/permissions/pattern-index";
import type { PermissionPattern } from "@/types/permissions";
describe("Bash Pattern Matcher", () => {
describe("matchesBashPattern", () => {
it("should match exact command with wildcard args", () => {
const pattern: PermissionPattern = {
tool: "Bash",
command: "git",
args: "*",
};
expect(matchesBashPattern("git", pattern)).toBe(true);
expect(matchesBashPattern("git status", pattern)).toBe(true);
expect(matchesBashPattern("git commit -m 'msg'", pattern)).toBe(true);
});
it("should not match different command", () => {
const pattern: PermissionPattern = {
tool: "Bash",
command: "git",
args: "*",
};
expect(matchesBashPattern("npm install", pattern)).toBe(false);
expect(matchesBashPattern("gitx status", pattern)).toBe(false);
});
it("should match command with specific args prefix", () => {
const pattern: PermissionPattern = {
tool: "Bash",
command: "git",
args: "status*",
};
expect(matchesBashPattern("git status", pattern)).toBe(true);
expect(matchesBashPattern("git status --short", pattern)).toBe(true);
expect(matchesBashPattern("git commit", pattern)).toBe(false);
});
it("should match exact args", () => {
const pattern: PermissionPattern = {
tool: "Bash",
command: "npm",
args: "install",
};
expect(matchesBashPattern("npm install", pattern)).toBe(true);
expect(matchesBashPattern("npm install lodash", pattern)).toBe(false);
});
it("should reject non-Bash patterns", () => {
const pattern: PermissionPattern = {
tool: "Read",
path: "*",
};
expect(matchesBashPattern("ls", pattern)).toBe(false);
});
});
describe("isBashAllowedByIndex", () => {
it("should check against index patterns", () => {
const index = buildPatternIndex(["Bash(git:*)", "Bash(npm install:*)"]);
expect(isBashAllowedByIndex("git status", index)).toBe(true);
expect(isBashAllowedByIndex("git commit", index)).toBe(true);
expect(isBashAllowedByIndex("npm install lodash", index)).toBe(true);
expect(isBashAllowedByIndex("npm run build", index)).toBe(false);
expect(isBashAllowedByIndex("rm -rf /", index)).toBe(false);
});
it("should return false for empty index", () => {
const index = buildPatternIndex([]);
expect(isBashAllowedByIndex("git status", index)).toBe(false);
});
});
describe("findMatchingBashPatterns", () => {
it("should find all matching patterns", () => {
const index = buildPatternIndex([
"Bash(git:*)",
"Bash(git status:*)",
"Bash(npm:*)",
]);
const matches = findMatchingBashPatterns("git status", index);
expect(matches.length).toBe(2);
expect(matches.map((m) => m.raw)).toContain("Bash(git:*)");
expect(matches.map((m) => m.raw)).toContain("Bash(git status:*)");
});
it("should return empty for no matches", () => {
const index = buildPatternIndex(["Bash(git:*)"]);
const matches = findMatchingBashPatterns("npm install", index);
expect(matches).toHaveLength(0);
});
});
describe("generateBashPattern", () => {
it("should generate pattern for multi-word commands", () => {
expect(generateBashPattern("git status")).toBe("Bash(git status:*)");
expect(generateBashPattern("npm install lodash")).toBe(
"Bash(npm install:*)",
);
expect(generateBashPattern("docker run nginx")).toBe(
"Bash(docker run:*)",
);
});
it("should generate pattern for single commands", () => {
expect(generateBashPattern("ls")).toBe("Bash(ls:*)");
expect(generateBashPattern("pwd")).toBe("Bash(pwd:*)");
});
it("should handle commands with many args", () => {
expect(generateBashPattern("git commit -m 'message'")).toBe(
"Bash(git commit:*)",
);
});
});
describe("extractCommandPrefix", () => {
it("should extract multi-word prefix", () => {
expect(extractCommandPrefix("git status")).toBe("git status");
expect(extractCommandPrefix("npm install lodash")).toBe("npm install");
expect(extractCommandPrefix("bun test --watch")).toBe("bun test");
});
it("should extract single word for non-recognized commands", () => {
expect(extractCommandPrefix("ls -la")).toBe("ls");
expect(extractCommandPrefix("cat file.txt")).toBe("cat");
});
});
});

View File

@@ -1,158 +0,0 @@
/**
* Unit tests for Path Pattern Matcher
*/
import { describe, it, expect } from "bun:test";
import {
matchesPathPattern,
matchesFilePattern,
isFileOpAllowedByIndex,
findMatchingFilePatterns,
generateFilePattern,
normalizePath,
isPathInDirectory,
} from "@services/permissions/matchers/path";
import { buildPatternIndex } from "@services/permissions/pattern-index";
import type { PermissionPattern } from "@/types/permissions";
describe("Path Pattern Matcher", () => {
describe("matchesPathPattern", () => {
it("should match wildcard pattern", () => {
expect(matchesPathPattern("/any/path/file.ts", "*")).toBe(true);
expect(matchesPathPattern("relative/file.js", "*")).toBe(true);
});
it("should match directory prefix pattern", () => {
expect(matchesPathPattern("src/file.ts", "src/*")).toBe(true);
expect(matchesPathPattern("src/nested/file.ts", "src/*")).toBe(true);
expect(matchesPathPattern("tests/file.ts", "src/*")).toBe(false);
});
it("should match extension pattern", () => {
expect(matchesPathPattern("file.ts", "*.ts")).toBe(true);
expect(matchesPathPattern("src/nested/file.ts", "*.ts")).toBe(true);
expect(matchesPathPattern("file.js", "*.ts")).toBe(false);
});
it("should match exact path", () => {
expect(matchesPathPattern("src/file.ts", "src/file.ts")).toBe(true);
expect(matchesPathPattern("src/other.ts", "src/file.ts")).toBe(false);
});
it("should match substring", () => {
expect(
matchesPathPattern("/path/to/config/settings.json", "config"),
).toBe(true);
});
});
describe("matchesFilePattern", () => {
it("should match with parsed pattern", () => {
const pattern: PermissionPattern = {
tool: "Read",
path: "*.ts",
};
expect(matchesFilePattern("file.ts", pattern)).toBe(true);
expect(matchesFilePattern("file.js", pattern)).toBe(false);
});
it("should return false for pattern without path", () => {
const pattern: PermissionPattern = {
tool: "Bash",
command: "git",
};
expect(matchesFilePattern("file.ts", pattern)).toBe(false);
});
});
describe("isFileOpAllowedByIndex", () => {
it("should check Read operations", () => {
const index = buildPatternIndex(["Read(*.ts)", "Read(src/*)"]);
expect(isFileOpAllowedByIndex("Read", "file.ts", index)).toBe(true);
expect(isFileOpAllowedByIndex("Read", "src/nested.js", index)).toBe(true);
expect(isFileOpAllowedByIndex("Read", "tests/file.js", index)).toBe(
false,
);
});
it("should check Write operations separately", () => {
const index = buildPatternIndex(["Read(*)", "Write(src/*)"]);
expect(isFileOpAllowedByIndex("Read", "any/file.ts", index)).toBe(true);
expect(isFileOpAllowedByIndex("Write", "any/file.ts", index)).toBe(false);
expect(isFileOpAllowedByIndex("Write", "src/file.ts", index)).toBe(true);
});
it("should return false for empty index", () => {
const index = buildPatternIndex([]);
expect(isFileOpAllowedByIndex("Read", "file.ts", index)).toBe(false);
});
});
describe("findMatchingFilePatterns", () => {
it("should find all matching patterns", () => {
const index = buildPatternIndex(["Read(*)", "Read(*.ts)", "Read(src/*)"]);
const matches = findMatchingFilePatterns("Read", "src/file.ts", index);
expect(matches.length).toBe(3);
});
it("should return empty for no matches", () => {
const index = buildPatternIndex(["Read(src/*)"]);
const matches = findMatchingFilePatterns("Read", "tests/file.ts", index);
expect(matches).toHaveLength(0);
});
});
describe("generateFilePattern", () => {
it("should generate extension-based pattern for common extensions", () => {
expect(generateFilePattern("Read", "file.ts")).toBe("Read(*.ts)");
expect(generateFilePattern("Write", "file.json")).toBe("Write(*.json)");
expect(generateFilePattern("Edit", "file.tsx")).toBe("Edit(*.tsx)");
});
it("should generate directory-based pattern when appropriate", () => {
expect(generateFilePattern("Read", "src/file.xyz")).toBe("Read(src/*)");
});
it("should fall back to basename", () => {
expect(generateFilePattern("Read", "Makefile")).toBe("Read(Makefile)");
});
});
describe("normalizePath", () => {
it("should normalize path separators", () => {
expect(normalizePath("src/file.ts")).toBe("src/file.ts");
expect(normalizePath("src//file.ts")).toBe("src/file.ts");
expect(normalizePath("./src/file.ts")).toBe("src/file.ts");
});
});
describe("isPathInDirectory", () => {
it("should check if path is in directory", () => {
expect(isPathInDirectory("/project/src/file.ts", "/project/src")).toBe(
true,
);
expect(
isPathInDirectory("/project/src/nested/file.ts", "/project/src"),
).toBe(true);
expect(isPathInDirectory("/project/tests/file.ts", "/project/src")).toBe(
false,
);
});
it("should not match partial directory names", () => {
expect(
isPathInDirectory("/project/src-backup/file.ts", "/project/src"),
).toBe(false);
});
});
});

View File

@@ -1,186 +0,0 @@
/**
* Unit tests for Permission Pattern Index
*/
import { describe, it, expect } from "bun:test";
import {
createPatternIndex,
buildPatternIndex,
addToIndex,
removeFromIndex,
getPatternsForTool,
hasPattern,
getRawPatterns,
mergeIndexes,
getIndexStats,
} from "@services/permissions/pattern-index";
describe("Permission Pattern Index", () => {
describe("createPatternIndex", () => {
it("should create empty index", () => {
const index = createPatternIndex();
expect(index.all).toHaveLength(0);
expect(index.byTool.size).toBe(0);
});
});
describe("buildPatternIndex", () => {
it("should build index from patterns", () => {
const patterns = [
"Bash(git:*)",
"Bash(npm install:*)",
"Read(*)",
"Write(src/*)",
];
const index = buildPatternIndex(patterns);
expect(index.all).toHaveLength(4);
expect(index.byTool.get("Bash")).toHaveLength(2);
expect(index.byTool.get("Read")).toHaveLength(1);
expect(index.byTool.get("Write")).toHaveLength(1);
});
it("should skip invalid patterns", () => {
const patterns = ["Bash(git:*)", "invalid pattern", "Read(*)"];
const index = buildPatternIndex(patterns);
expect(index.all).toHaveLength(2);
});
it("should handle empty array", () => {
const index = buildPatternIndex([]);
expect(index.all).toHaveLength(0);
});
});
describe("addToIndex", () => {
it("should add pattern to index", () => {
let index = createPatternIndex();
index = addToIndex(index, "Bash(git:*)");
expect(index.all).toHaveLength(1);
expect(hasPattern(index, "Bash(git:*)")).toBe(true);
});
it("should not duplicate patterns", () => {
let index = buildPatternIndex(["Bash(git:*)"]);
index = addToIndex(index, "Bash(git:*)");
expect(index.all).toHaveLength(1);
});
it("should add to correct tool bucket", () => {
let index = createPatternIndex();
index = addToIndex(index, "Read(src/*)");
expect(getPatternsForTool(index, "Read")).toHaveLength(1);
expect(getPatternsForTool(index, "Bash")).toHaveLength(0);
});
});
describe("removeFromIndex", () => {
it("should remove pattern from index", () => {
let index = buildPatternIndex(["Bash(git:*)", "Read(*)"]);
index = removeFromIndex(index, "Bash(git:*)");
expect(index.all).toHaveLength(1);
expect(hasPattern(index, "Bash(git:*)")).toBe(false);
expect(hasPattern(index, "Read(*)")).toBe(true);
});
it("should handle non-existent pattern", () => {
const index = buildPatternIndex(["Bash(git:*)"]);
const result = removeFromIndex(index, "Read(*)");
expect(result.all).toHaveLength(1);
});
});
describe("getPatternsForTool", () => {
it("should return patterns for specific tool", () => {
const index = buildPatternIndex([
"Bash(git:*)",
"Bash(npm:*)",
"Read(*)",
]);
const bashPatterns = getPatternsForTool(index, "Bash");
const readPatterns = getPatternsForTool(index, "Read");
const writePatterns = getPatternsForTool(index, "Write");
expect(bashPatterns).toHaveLength(2);
expect(readPatterns).toHaveLength(1);
expect(writePatterns).toHaveLength(0);
});
});
describe("getRawPatterns", () => {
it("should return all raw pattern strings", () => {
const patterns = ["Bash(git:*)", "Read(*)"];
const index = buildPatternIndex(patterns);
const raw = getRawPatterns(index);
expect(raw).toEqual(patterns);
});
});
describe("mergeIndexes", () => {
it("should merge multiple indexes", () => {
const index1 = buildPatternIndex(["Bash(git:*)"]);
const index2 = buildPatternIndex(["Read(*)"]);
const index3 = buildPatternIndex(["Write(src/*)"]);
const merged = mergeIndexes(index1, index2, index3);
expect(merged.all).toHaveLength(3);
expect(getPatternsForTool(merged, "Bash")).toHaveLength(1);
expect(getPatternsForTool(merged, "Read")).toHaveLength(1);
expect(getPatternsForTool(merged, "Write")).toHaveLength(1);
});
it("should preserve duplicates from different indexes", () => {
const index1 = buildPatternIndex(["Bash(git:*)"]);
const index2 = buildPatternIndex(["Bash(git:*)"]);
const merged = mergeIndexes(index1, index2);
// Duplicates preserved (session might override global)
expect(merged.all).toHaveLength(2);
});
it("should handle empty indexes", () => {
const index1 = createPatternIndex();
const index2 = buildPatternIndex(["Read(*)"]);
const merged = mergeIndexes(index1, index2);
expect(merged.all).toHaveLength(1);
});
});
describe("getIndexStats", () => {
it("should return correct statistics", () => {
const index = buildPatternIndex([
"Bash(git:*)",
"Bash(npm:*)",
"Read(*)",
"Write(src/*)",
"Edit(*.ts)",
]);
const stats = getIndexStats(index);
expect(stats.total).toBe(5);
expect(stats.byTool["Bash"]).toBe(2);
expect(stats.byTool["Read"]).toBe(1);
expect(stats.byTool["Write"]).toBe(1);
expect(stats.byTool["Edit"]).toBe(1);
});
});
});

View File

@@ -1,427 +0,0 @@
/**
* Unit tests for Memory Selection Layer
*/
import { describe, it, expect } from "bun:test";
import {
selectRelevantMemories,
computeRelevance,
computeMandatoryItems,
createMemoryItem,
createQueryContext,
createMemoryStore,
addMemory,
findMemoriesByType,
findMemoriesByPath,
pruneOldMemories,
} from "../memory-selection";
import type {
MemoryItem,
QueryContext,
SelectionInput,
} from "@src/types/reasoning";
describe("Memory Selection Layer", () => {
const createTestMemory = (
content: string,
type: MemoryItem["type"] = "CONVERSATION",
options: Partial<MemoryItem> = {},
): MemoryItem => ({
id: `mem_${Math.random().toString(36).slice(2)}`,
content,
tokens: content.toLowerCase().split(/\s+/),
entities: [],
timestamp: Date.now(),
type,
causalLinks: [],
tokenCount: Math.ceil(content.length * 0.25),
...options,
});
describe("computeRelevance", () => {
it("should score higher for keyword overlap", () => {
const memory = createTestMemory(
"The function handles database queries efficiently",
);
const queryHighOverlap = createQueryContext(
"database query optimization",
{},
);
const queryLowOverlap = createQueryContext("user interface design", {});
const highScore = computeRelevance(memory, queryHighOverlap);
const lowScore = computeRelevance(memory, queryLowOverlap);
expect(highScore.total).toBeGreaterThan(lowScore.total);
});
it("should score higher for recent memories", () => {
const recentMemory = createTestMemory("Recent content", "CONVERSATION", {
timestamp: Date.now(),
});
const oldMemory = createTestMemory("Old content", "CONVERSATION", {
timestamp: Date.now() - 3600000, // 1 hour ago
});
const query = createQueryContext("content search", {});
const recentScore = computeRelevance(recentMemory, query);
const oldScore = computeRelevance(oldMemory, query);
expect(recentScore.breakdown.recency).toBeGreaterThan(
oldScore.breakdown.recency,
);
});
it("should give type bonus to ERROR type", () => {
const errorMemory = createTestMemory("Error: connection failed", "ERROR");
const conversationMemory = createTestMemory(
"Error: connection failed",
"CONVERSATION",
);
const query = createQueryContext("error handling", {});
const errorScore = computeRelevance(errorMemory, query);
const convScore = computeRelevance(conversationMemory, query);
expect(errorScore.breakdown.typeBonus).toBeGreaterThan(
convScore.breakdown.typeBonus,
);
});
it("should score causal links", () => {
const linkedMemory = createTestMemory("Linked memory", "CONVERSATION", {
causalLinks: ["active_item_1"],
});
const unlinkedMemory = createTestMemory(
"Unlinked memory",
"CONVERSATION",
{
causalLinks: [],
},
);
const query = createQueryContext("test", {
activeItems: ["active_item_1"],
});
const linkedScore = computeRelevance(linkedMemory, query);
const unlinkedScore = computeRelevance(unlinkedMemory, query);
expect(linkedScore.breakdown.causalLink).toBe(1);
expect(unlinkedScore.breakdown.causalLink).toBe(0);
});
it("should score path overlap", () => {
const memoryWithPath = createTestMemory("File content", "FILE_CONTENT", {
filePaths: ["/src/services/agent.ts"],
});
const queryMatchingPath = createQueryContext("agent implementation", {
activePaths: ["/src/services/agent.ts"],
});
const queryDifferentPath = createQueryContext("agent implementation", {
activePaths: ["/src/utils/helpers.ts"],
});
const matchingScore = computeRelevance(memoryWithPath, queryMatchingPath);
const differentScore = computeRelevance(
memoryWithPath,
queryDifferentPath,
);
expect(matchingScore.breakdown.pathOverlap).toBeGreaterThan(
differentScore.breakdown.pathOverlap,
);
});
});
describe("selectRelevantMemories", () => {
it("should select memories within token budget", () => {
const memories = [
createTestMemory("First memory content here", "CONVERSATION", {
tokenCount: 100,
}),
createTestMemory("Second memory content here", "CONVERSATION", {
tokenCount: 100,
}),
createTestMemory("Third memory content here", "CONVERSATION", {
tokenCount: 100,
}),
];
const input: SelectionInput = {
memories,
query: createQueryContext("memory content", {}),
tokenBudget: 250,
mandatoryItems: [],
};
const result = selectRelevantMemories(input);
expect(result.tokenUsage).toBeLessThanOrEqual(250);
});
it("should always include mandatory items", () => {
const memories = [
createTestMemory("Important memory", "CONVERSATION", {
id: "mandatory_1",
}),
createTestMemory("Irrelevant memory about cooking", "CONVERSATION"),
];
const input: SelectionInput = {
memories,
query: createQueryContext("completely unrelated topic", {}),
tokenBudget: 1000,
mandatoryItems: ["mandatory_1"],
};
const result = selectRelevantMemories(input);
expect(result.selected.some((m) => m.id === "mandatory_1")).toBe(true);
});
it("should exclude low relevance items", () => {
const memories = [
createTestMemory(
"Highly relevant database query optimization",
"CONVERSATION",
),
createTestMemory(
"xyz abc def completely unrelated topic",
"CONVERSATION",
),
];
const input: SelectionInput = {
memories,
query: createQueryContext("database query optimization", {}),
tokenBudget: 1000,
mandatoryItems: [],
};
const result = selectRelevantMemories(input);
// At least one memory should be selected (the relevant one)
expect(result.selected.length).toBeGreaterThanOrEqual(1);
// The first (relevant) memory should be selected
expect(result.selected.some((m) => m.content.includes("database"))).toBe(
true,
);
});
it("should return scores for all selected items", () => {
const memories = [
createTestMemory("First memory", "CONVERSATION", { id: "mem_1" }),
createTestMemory("Second memory", "CONVERSATION", { id: "mem_2" }),
];
const input: SelectionInput = {
memories,
query: createQueryContext("memory", {}),
tokenBudget: 1000,
mandatoryItems: [],
};
const result = selectRelevantMemories(input);
for (const selected of result.selected) {
expect(result.scores.has(selected.id)).toBe(true);
}
});
});
describe("computeMandatoryItems", () => {
it("should include recent memories", () => {
const now = Date.now();
const memories = [
createTestMemory("Recent", "CONVERSATION", {
id: "recent",
timestamp: now,
}),
createTestMemory("Old", "CONVERSATION", {
id: "old",
timestamp: now - 600000,
}),
];
const mandatory = computeMandatoryItems(memories, now);
expect(mandatory).toContain("recent");
});
it("should include recent error memories", () => {
const now = Date.now();
const memories = [
createTestMemory("Error occurred", "ERROR", {
id: "error_1",
timestamp: now - 300000, // 5 minutes ago
}),
];
const mandatory = computeMandatoryItems(memories, now);
expect(mandatory).toContain("error_1");
});
it("should include decision memories", () => {
const now = Date.now();
const memories = [
createTestMemory("Decided to use TypeScript", "DECISION", {
id: "decision_1",
}),
createTestMemory("Decided to use React", "DECISION", {
id: "decision_2",
}),
createTestMemory("Decided to use Bun", "DECISION", {
id: "decision_3",
}),
createTestMemory("Decided to use Zustand", "DECISION", {
id: "decision_4",
}),
];
const mandatory = computeMandatoryItems(memories, now);
// Should include last 3 decisions
expect(mandatory).toContain("decision_2");
expect(mandatory).toContain("decision_3");
expect(mandatory).toContain("decision_4");
});
});
describe("Memory Store Operations", () => {
describe("createMemoryStore", () => {
it("should create empty store with max items", () => {
const store = createMemoryStore(500);
expect(store.items).toHaveLength(0);
expect(store.maxItems).toBe(500);
});
});
describe("addMemory", () => {
it("should add memory to store", () => {
let store = createMemoryStore(100);
const memory = createMemoryItem("Test content", "CONVERSATION");
store = addMemory(store, memory);
expect(store.items).toHaveLength(1);
expect(store.items[0].content).toBe("Test content");
});
it("should prune oldest items when exceeding max", () => {
let store = createMemoryStore(3);
for (let i = 0; i < 5; i++) {
const memory = createMemoryItem(`Memory ${i}`, "CONVERSATION");
store = addMemory(store, memory);
}
expect(store.items.length).toBeLessThanOrEqual(3);
});
});
describe("findMemoriesByType", () => {
it("should filter by type", () => {
let store = createMemoryStore(100);
store = addMemory(
store,
createMemoryItem("Conversation", "CONVERSATION"),
);
store = addMemory(store, createMemoryItem("Error", "ERROR"));
store = addMemory(
store,
createMemoryItem("Tool result", "TOOL_RESULT"),
);
const errors = findMemoriesByType(store, "ERROR");
expect(errors).toHaveLength(1);
expect(errors[0].content).toBe("Error");
});
});
describe("findMemoriesByPath", () => {
it("should find memories by file path", () => {
let store = createMemoryStore(100);
store = addMemory(store, {
...createMemoryItem("File content", "FILE_CONTENT"),
filePaths: ["/src/services/agent.ts"],
});
store = addMemory(store, {
...createMemoryItem("Other file", "FILE_CONTENT"),
filePaths: ["/src/utils/helpers.ts"],
});
const results = findMemoriesByPath(store, "agent.ts");
expect(results).toHaveLength(1);
expect(results[0].content).toBe("File content");
});
});
describe("pruneOldMemories", () => {
it("should remove memories older than threshold", () => {
const now = Date.now();
let store = createMemoryStore(100);
store = addMemory(store, {
...createMemoryItem("Recent", "CONVERSATION"),
timestamp: now,
});
store = addMemory(store, {
...createMemoryItem("Old", "CONVERSATION"),
timestamp: now - 7200000, // 2 hours ago
});
const pruned = pruneOldMemories(store, 3600000); // 1 hour threshold
expect(pruned.items).toHaveLength(1);
expect(pruned.items[0].content).toBe("Recent");
});
});
});
describe("createMemoryItem", () => {
it("should create memory with correct structure", () => {
const memory = createMemoryItem("Test content", "CONVERSATION", {
filePaths: ["/test.ts"],
causalLinks: ["prev_memory"],
});
expect(memory.content).toBe("Test content");
expect(memory.type).toBe("CONVERSATION");
expect(memory.filePaths).toContain("/test.ts");
expect(memory.causalLinks).toContain("prev_memory");
expect(memory.tokenCount).toBeGreaterThan(0);
expect(memory.id).toMatch(/^mem_/);
});
it("should tokenize content", () => {
const memory = createMemoryItem("Hello world test", "CONVERSATION");
expect(memory.tokens.length).toBeGreaterThan(0);
});
});
describe("createQueryContext", () => {
it("should create query context with tokens", () => {
const context = createQueryContext("database query optimization", {
activePaths: ["/src/db.ts"],
activeItems: ["item_1"],
});
expect(context.tokens.length).toBeGreaterThan(0);
expect(context.activePaths).toContain("/src/db.ts");
expect(context.activeItems).toContain("item_1");
expect(context.timestamp).toBeDefined();
});
});
});

View File

@@ -1,276 +0,0 @@
/**
* Unit tests for Quality Evaluation Layer
*/
import { describe, it, expect } from "bun:test";
import {
evaluateQuality,
computeVerdict,
hasHallucinationMarkers,
hasContradiction,
} from "../quality-evaluation";
import type {
QualityEvalInput,
TaskConstraints,
AttemptRecord,
} from "@src/types/reasoning";
describe("Quality Evaluation Layer", () => {
const createDefaultInput = (
overrides: Partial<QualityEvalInput> = {},
): QualityEvalInput => ({
responseText: "Here is the solution to your problem.",
responseToolCalls: [],
expectedType: "text",
queryTokens: ["solution", "problem"],
queryEntities: [],
previousAttempts: [],
taskConstraints: {
requiredOutputs: [],
expectedToolCalls: [],
maxResponseTokens: 4000,
requiresCode: false,
},
...overrides,
});
describe("evaluateQuality", () => {
it("should accept a high-quality text response", () => {
const input = createDefaultInput({
responseText:
"Here is the solution to your problem. I've analyzed the issue and found the root cause.",
queryTokens: ["solution", "problem", "analyze", "issue"],
});
const result = evaluateQuality(input);
expect(result.score).toBeGreaterThan(0.5);
expect(result.verdict).toBe("ACCEPT");
expect(result.deficiencies).toHaveLength(0);
});
it("should reject an empty response", () => {
const input = createDefaultInput({
responseText: "",
responseToolCalls: [],
});
const result = evaluateQuality(input);
expect(result.verdict).not.toBe("ACCEPT");
expect(result.deficiencies).toContain("EMPTY_RESPONSE");
});
it("should detect missing tool calls when expected", () => {
const input = createDefaultInput({
responseText: "I will read the file now.",
responseToolCalls: [],
expectedType: "tool_call",
taskConstraints: {
requiredOutputs: [],
expectedToolCalls: ["read"],
maxResponseTokens: 4000,
requiresCode: false,
},
});
const result = evaluateQuality(input);
expect(result.deficiencies).toContain("MISSING_TOOL_CALL");
});
it("should accept response with tool calls when expected", () => {
const input = createDefaultInput({
responseText: "Let me read that file.",
responseToolCalls: [
{ id: "1", name: "read", arguments: { path: "/test.ts" } },
],
expectedType: "tool_call",
taskConstraints: {
requiredOutputs: [],
expectedToolCalls: ["read"],
maxResponseTokens: 4000,
requiresCode: false,
},
});
const result = evaluateQuality(input);
expect(result.score).toBeGreaterThan(0.5);
});
it("should detect query mismatch", () => {
const input = createDefaultInput({
responseText: "The weather today is sunny and warm.",
queryTokens: ["database", "migration", "schema", "postgresql"],
});
const result = evaluateQuality(input);
// With no token overlap, relevance should be lower than perfect match
expect(result.metrics.relevance).toBeLessThan(1);
});
it("should detect incomplete code when required", () => {
const input = createDefaultInput({
responseText: "Here is some text without any code.",
taskConstraints: {
requiredOutputs: [],
expectedToolCalls: [],
maxResponseTokens: 4000,
requiresCode: true,
codeLanguage: "typescript",
},
});
const result = evaluateQuality(input);
expect(result.deficiencies).toContain("INCOMPLETE_CODE");
});
it("should accept valid code block when required", () => {
const input = createDefaultInput({
responseText:
"Here is the function:\n\n```typescript\nfunction add(a: number, b: number): number {\n return a + b;\n}\n```",
taskConstraints: {
requiredOutputs: [],
expectedToolCalls: [],
maxResponseTokens: 4000,
requiresCode: true,
codeLanguage: "typescript",
},
});
const result = evaluateQuality(input);
expect(result.deficiencies).not.toContain("INCOMPLETE_CODE");
expect(result.deficiencies).not.toContain("WRONG_LANGUAGE");
});
});
describe("computeVerdict", () => {
it("should return ACCEPT for score >= 0.70", () => {
expect(computeVerdict(0.7)).toBe("ACCEPT");
expect(computeVerdict(0.85)).toBe("ACCEPT");
expect(computeVerdict(1.0)).toBe("ACCEPT");
});
it("should return RETRY for score between 0.40 and 0.70", () => {
expect(computeVerdict(0.69)).toBe("RETRY");
expect(computeVerdict(0.55)).toBe("RETRY");
expect(computeVerdict(0.4)).toBe("RETRY");
});
it("should return ESCALATE for score between 0.20 and 0.40", () => {
expect(computeVerdict(0.39)).toBe("ESCALATE");
expect(computeVerdict(0.3)).toBe("ESCALATE");
expect(computeVerdict(0.2)).toBe("ESCALATE");
});
it("should return ABORT for score < 0.20", () => {
expect(computeVerdict(0.19)).toBe("ABORT");
expect(computeVerdict(0.1)).toBe("ABORT");
expect(computeVerdict(0)).toBe("ABORT");
});
});
describe("hasHallucinationMarkers", () => {
it("should detect 'I don't have access' pattern", () => {
expect(
hasHallucinationMarkers(
"I don't have access to the file but I'll assume...",
),
).toBe(true);
});
it("should detect 'assuming exists' pattern", () => {
expect(
hasHallucinationMarkers(
"Assuming the function exists, here's how to use it",
),
).toBe(true);
});
it("should detect placeholder pattern", () => {
expect(
hasHallucinationMarkers("Replace [placeholder] with your value"),
).toBe(true);
});
it("should not flag normal responses", () => {
expect(
hasHallucinationMarkers("Here is the implementation you requested."),
).toBe(false);
});
});
describe("hasContradiction", () => {
it("should detect 'but actually' pattern", () => {
expect(
hasContradiction(
"The function returns true, but actually it returns false",
),
).toBe(true);
});
it("should detect 'wait, no' pattern", () => {
expect(
hasContradiction(
"It's in the utils folder. Wait, no, it's in helpers.",
),
).toBe(true);
});
it("should detect 'on second thought' pattern", () => {
expect(
hasContradiction(
"Let me use forEach. On second thought, I'll use map.",
),
).toBe(true);
});
it("should not flag normal responses", () => {
expect(
hasContradiction(
"The function takes two parameters and returns their sum.",
),
).toBe(false);
});
});
describe("structural validation", () => {
it("should detect malformed code blocks", () => {
const input = createDefaultInput({
responseText:
"Here is the code:\n```typescript\nfunction test() {\n return 1;\n", // Missing closing ```
});
const result = evaluateQuality(input);
expect(result.metrics.structural).toBeLessThan(1);
});
it("should accept well-formed code blocks", () => {
const input = createDefaultInput({
responseText:
"Here is the code:\n```typescript\nfunction test() {\n return 1;\n}\n```",
});
const result = evaluateQuality(input);
expect(result.metrics.structural).toBeGreaterThan(0.5);
});
it("should detect unbalanced braces", () => {
const input = createDefaultInput({
responseText: "The object is { name: 'test', value: { nested: true }",
});
const result = evaluateQuality(input);
expect(result.metrics.structural).toBeLessThan(1);
});
});
});

View File

@@ -1,312 +0,0 @@
/**
* Unit tests for Retry Policy Layer
*/
import { describe, it, expect } from "bun:test";
import {
createInitialRetryState,
createRetryBudget,
computeRetryTransition,
splitTaskDescription,
isRetryable,
getCurrentTier,
getRemainingAttempts,
} from "../retry-policy";
import type {
RetryPolicyInput,
RetryTrigger,
DeficiencyTag,
} from "@src/types/reasoning";
describe("Retry Policy Layer", () => {
describe("createInitialRetryState", () => {
it("should create state with INITIAL kind", () => {
const state = createInitialRetryState();
expect(state.currentState.kind).toBe("INITIAL");
expect(state.totalAttempts).toBe(0);
expect(state.history).toHaveLength(0);
});
it("should create budget with default limits", () => {
const state = createInitialRetryState();
expect(state.budget.maxTotalAttempts).toBe(12);
expect(state.budget.maxPerTier).toBe(2);
expect(state.budget.maxTimeMs).toBe(60000);
});
});
describe("createRetryBudget", () => {
it("should allow overriding defaults", () => {
const budget = createRetryBudget({
maxTotalAttempts: 20,
maxPerTier: 3,
});
expect(budget.maxTotalAttempts).toBe(20);
expect(budget.maxPerTier).toBe(3);
expect(budget.maxTimeMs).toBe(60000);
});
});
describe("computeRetryTransition", () => {
it("should transition from INITIAL to RETRY_SAME on first retry", () => {
const state = createInitialRetryState();
const input: RetryPolicyInput = {
currentState: state,
trigger: {
event: "QUALITY_VERDICT",
verdict: "RETRY",
deficiencies: ["QUERY_MISMATCH"],
},
availableTools: ["read", "write"],
contextBudget: 8000,
};
const result = computeRetryTransition(input);
expect(result.nextState.currentState.kind).toBe("RETRY_SAME");
expect(result.nextState.totalAttempts).toBe(1);
expect(result.action.kind).toBe("RETRY");
});
it("should eventually advance to next tier after repeated failures", () => {
let state = createInitialRetryState();
const trigger = {
event: "QUALITY_VERDICT" as const,
verdict: "RETRY" as const,
deficiencies: [] as string[],
};
// Run multiple iterations and verify tiers eventually change
let sawTierChange = false;
let lastKind = state.currentState.kind;
for (let i = 0; i < 8; i++) {
const result = computeRetryTransition({
currentState: state,
trigger,
availableTools: ["read"],
contextBudget: 8000,
});
state = result.nextState;
if (
state.currentState.kind !== lastKind &&
state.currentState.kind !== "INITIAL"
) {
sawTierChange = true;
lastKind = state.currentState.kind;
}
}
// Should have seen at least one tier change
expect(sawTierChange).toBe(true);
});
it("should exhaust after exceeding max total attempts", () => {
const state = createInitialRetryState();
state.budget.maxTotalAttempts = 2;
state.totalAttempts = 2;
const result = computeRetryTransition({
currentState: state,
trigger: {
event: "QUALITY_VERDICT",
verdict: "RETRY",
deficiencies: [],
},
availableTools: ["read"],
contextBudget: 8000,
});
expect(result.nextState.currentState.kind).toBe("EXHAUSTED");
expect(result.action.kind).toBe("ABORT");
});
it("should return REDUCE_CONTEXT transform when simplifying", () => {
let state = createInitialRetryState();
state.currentState = { kind: "RETRY_SAME", attempts: 2, tierAttempts: 2 };
const result = computeRetryTransition({
currentState: state,
trigger: {
event: "QUALITY_VERDICT",
verdict: "RETRY",
deficiencies: [],
},
availableTools: ["read"],
contextBudget: 8000,
});
if (
result.action.kind === "RETRY" &&
result.action.transform.kind === "REDUCE_CONTEXT"
) {
expect(result.action.transform.delta).toBeDefined();
}
});
it("should escalate to user on permission denied errors", () => {
const state = createInitialRetryState();
state.currentState = {
kind: "RETRY_ALTERNATIVE",
attempts: 10,
tierAttempts: 2,
};
const result = computeRetryTransition({
currentState: state,
trigger: {
event: "TOOL_EXECUTION_FAILED",
error: {
toolName: "bash",
errorType: "PERMISSION_DENIED",
message: "Permission denied",
},
},
availableTools: ["read"],
contextBudget: 8000,
});
expect(result.action.kind).toBe("ESCALATE_TO_USER");
});
});
describe("splitTaskDescription", () => {
it("should split 'first...then' pattern", () => {
const result = splitTaskDescription(
"First, read the file. Then, update the content.",
);
expect(result.length).toBeGreaterThanOrEqual(2);
});
it("should split numbered list pattern", () => {
const result = splitTaskDescription(
"1. Read file 2. Parse content 3. Write output",
);
expect(result.length).toBeGreaterThanOrEqual(1);
});
it("should return single item for atomic tasks", () => {
const result = splitTaskDescription("Read the configuration file");
expect(result).toHaveLength(1);
expect(result[0]).toBe("Read the configuration file");
});
it("should split bulleted list pattern", () => {
const result = splitTaskDescription(
"- Create file\n- Add content\n- Save changes",
);
expect(result.length).toBeGreaterThanOrEqual(1);
});
});
describe("isRetryable", () => {
it("should return true for INITIAL state", () => {
const state = createInitialRetryState();
expect(isRetryable(state)).toBe(true);
});
it("should return true for RETRY_SAME state", () => {
const state = createInitialRetryState();
state.currentState = { kind: "RETRY_SAME", attempts: 1, tierAttempts: 1 };
expect(isRetryable(state)).toBe(true);
});
it("should return false for EXHAUSTED state", () => {
const state = createInitialRetryState();
state.currentState = {
kind: "EXHAUSTED",
attempts: 12,
tierAttempts: 0,
exhaustionReason: "MAX_TIERS_EXCEEDED",
};
expect(isRetryable(state)).toBe(false);
});
it("should return false for COMPLETE state", () => {
const state = createInitialRetryState();
state.currentState = { kind: "COMPLETE", attempts: 5, tierAttempts: 0 };
expect(isRetryable(state)).toBe(false);
});
});
describe("getCurrentTier", () => {
it("should return current tier kind", () => {
const state = createInitialRetryState();
expect(getCurrentTier(state)).toBe("INITIAL");
state.currentState = {
kind: "RETRY_DECOMPOSED",
attempts: 5,
tierAttempts: 1,
};
expect(getCurrentTier(state)).toBe("RETRY_DECOMPOSED");
});
});
describe("getRemainingAttempts", () => {
it("should calculate remaining attempts correctly", () => {
const state = createInitialRetryState();
state.totalAttempts = 4;
expect(getRemainingAttempts(state)).toBe(8);
state.totalAttempts = 12;
expect(getRemainingAttempts(state)).toBe(0);
});
});
describe("state machine progression", () => {
it("should progress through tiers and eventually exhaust", () => {
let state = createInitialRetryState();
const trigger: RetryTrigger = {
event: "QUALITY_VERDICT",
verdict: "RETRY",
deficiencies: [],
};
// Track which tiers we've seen
const seenTiers = new Set<string>();
let iterations = 0;
const maxIterations = 15;
while (
iterations < maxIterations &&
state.currentState.kind !== "EXHAUSTED"
) {
const result = computeRetryTransition({
currentState: state,
trigger,
availableTools: ["read", "write"],
contextBudget: 8000,
});
seenTiers.add(result.nextState.currentState.kind);
state = result.nextState;
iterations++;
}
// Should have reached EXHAUSTED
expect(state.currentState.kind).toBe("EXHAUSTED");
// Should have seen multiple tiers along the way
expect(seenTiers.size).toBeGreaterThan(1);
});
});
});

View File

@@ -1,504 +0,0 @@
/**
* Unit tests for Termination Detection Layer
*/
import { describe, it, expect } from "bun:test";
import {
createInitialTerminationState,
processTerminationTrigger,
computeTerminationConfidence,
extractValidationFailures,
isComplete,
isFailed,
isTerminal,
requiresValidation,
getConfidencePercentage,
} from "../termination-detection";
import type {
TerminationState,
TerminationTrigger,
CompletionSignal,
ValidationResult,
} from "@src/types/reasoning";
describe("Termination Detection Layer", () => {
describe("createInitialTerminationState", () => {
it("should create state with RUNNING status", () => {
const state = createInitialTerminationState();
expect(state.status).toBe("RUNNING");
expect(state.completionSignals).toHaveLength(0);
expect(state.validationResults).toHaveLength(0);
expect(state.confidenceScore).toBe(0);
});
});
describe("processTerminationTrigger", () => {
describe("MODEL_OUTPUT trigger", () => {
it("should detect completion signals from model text", () => {
const state = createInitialTerminationState();
const trigger: TerminationTrigger = {
event: "MODEL_OUTPUT",
content: "I've completed the task successfully.",
hasToolCalls: false,
};
const result = processTerminationTrigger(state, trigger);
expect(result.evidence.signals.length).toBeGreaterThan(0);
expect(
result.evidence.signals.some((s) => s.source === "MODEL_STATEMENT"),
).toBe(true);
});
it("should detect no pending actions when no tool calls", () => {
const state = createInitialTerminationState();
const trigger: TerminationTrigger = {
event: "MODEL_OUTPUT",
content: "Here is the answer.",
hasToolCalls: false,
};
const result = processTerminationTrigger(state, trigger);
expect(
result.evidence.signals.some(
(s) => s.source === "NO_PENDING_ACTIONS",
),
).toBe(true);
});
it("should not add NO_PENDING_ACTIONS when tool calls present", () => {
const state = createInitialTerminationState();
const trigger: TerminationTrigger = {
event: "MODEL_OUTPUT",
content: "Let me read that file.",
hasToolCalls: true,
};
const result = processTerminationTrigger(state, trigger);
expect(
result.evidence.signals.some(
(s) => s.source === "NO_PENDING_ACTIONS",
),
).toBe(false);
});
});
describe("TOOL_COMPLETED trigger", () => {
it("should add TOOL_SUCCESS signal on successful tool execution", () => {
const state = createInitialTerminationState();
const trigger: TerminationTrigger = {
event: "TOOL_COMPLETED",
toolName: "write",
success: true,
};
const result = processTerminationTrigger(state, trigger);
expect(
result.evidence.signals.some((s) => s.source === "TOOL_SUCCESS"),
).toBe(true);
});
it("should not add signal on failed tool execution", () => {
const state = createInitialTerminationState();
const trigger: TerminationTrigger = {
event: "TOOL_COMPLETED",
toolName: "write",
success: false,
};
const result = processTerminationTrigger(state, trigger);
expect(
result.evidence.signals.some((s) => s.source === "TOOL_SUCCESS"),
).toBe(false);
});
});
describe("USER_INPUT trigger", () => {
it("should immediately confirm completion on user acceptance", () => {
const state = createInitialTerminationState();
const trigger: TerminationTrigger = {
event: "USER_INPUT",
isAcceptance: true,
};
const result = processTerminationTrigger(state, trigger);
expect(result.status).toBe("CONFIRMED_COMPLETE");
expect(
result.evidence.signals.some((s) => s.source === "USER_ACCEPT"),
).toBe(true);
});
});
describe("VALIDATION_RESULT trigger", () => {
it("should update validation results", () => {
const state = createInitialTerminationState();
state.status = "AWAITING_VALIDATION";
const trigger: TerminationTrigger = {
event: "VALIDATION_RESULT",
result: {
checkId: "file_exists_check",
passed: true,
details: "All files exist",
duration: 100,
},
};
const result = processTerminationTrigger(state, trigger);
expect(result.evidence.validationResults).toHaveLength(1);
expect(result.evidence.validationResults[0].passed).toBe(true);
});
it("should update existing validation result", () => {
const state = createInitialTerminationState();
state.status = "AWAITING_VALIDATION";
state.validationResults = [
{
checkId: "file_exists_check",
passed: false,
details: "File missing",
duration: 50,
},
];
const trigger: TerminationTrigger = {
event: "VALIDATION_RESULT",
result: {
checkId: "file_exists_check",
passed: true,
details: "File now exists",
duration: 100,
},
};
const result = processTerminationTrigger(state, trigger);
expect(result.evidence.validationResults).toHaveLength(1);
expect(result.evidence.validationResults[0].passed).toBe(true);
});
});
describe("status transitions", () => {
it("should accumulate signals and increase confidence over time", () => {
const state = createInitialTerminationState();
state.completionSignals = [
{ source: "MODEL_STATEMENT", timestamp: Date.now(), confidence: 0.3 },
{ source: "TOOL_SUCCESS", timestamp: Date.now(), confidence: 0.5 },
{ source: "TOOL_SUCCESS", timestamp: Date.now(), confidence: 0.5 },
];
const trigger: TerminationTrigger = {
event: "MODEL_OUTPUT",
content: "I've completed the task successfully.",
hasToolCalls: false,
};
const result = processTerminationTrigger(state, trigger);
// Confidence should increase with more signals
expect(result.confidence).toBeGreaterThan(0);
expect(result.evidence.signals.length).toBeGreaterThan(
state.completionSignals.length,
);
});
it("should transition from POTENTIALLY_COMPLETE to AWAITING_VALIDATION", () => {
const state = createInitialTerminationState();
state.status = "POTENTIALLY_COMPLETE";
const trigger: TerminationTrigger = {
event: "TOOL_COMPLETED",
toolName: "write",
success: true,
};
const result = processTerminationTrigger(state, trigger);
expect(result.status).toBe("AWAITING_VALIDATION");
});
});
});
describe("computeTerminationConfidence", () => {
it("should compute low confidence with no signals or results", () => {
const confidence = computeTerminationConfidence([], []);
expect(confidence).toBe(0);
});
it("should compute confidence from signals", () => {
const signals: CompletionSignal[] = [
{ source: "MODEL_STATEMENT", timestamp: Date.now(), confidence: 0.3 },
{ source: "TOOL_SUCCESS", timestamp: Date.now(), confidence: 0.5 },
];
const confidence = computeTerminationConfidence(signals, []);
expect(confidence).toBeGreaterThan(0);
expect(confidence).toBeLessThanOrEqual(0.4); // Signal max is 0.4
});
it("should compute confidence from validation results", () => {
const results: ValidationResult[] = [
{
checkId: "file_exists_check",
passed: true,
details: "OK",
duration: 100,
},
{
checkId: "syntax_valid_check",
passed: true,
details: "OK",
duration: 100,
},
];
const confidence = computeTerminationConfidence([], results);
expect(confidence).toBeGreaterThan(0);
});
it("should compute combined confidence", () => {
const signals: CompletionSignal[] = [
{ source: "TOOL_SUCCESS", timestamp: Date.now(), confidence: 0.5 },
];
const results: ValidationResult[] = [
{
checkId: "file_exists_check",
passed: true,
details: "OK",
duration: 100,
},
];
const combinedConfidence = computeTerminationConfidence(signals, results);
const signalOnlyConfidence = computeTerminationConfidence(signals, []);
const resultOnlyConfidence = computeTerminationConfidence([], results);
expect(combinedConfidence).toBeGreaterThan(signalOnlyConfidence);
expect(combinedConfidence).toBeGreaterThan(resultOnlyConfidence);
});
});
describe("extractValidationFailures", () => {
it("should extract failed validations", () => {
const results: ValidationResult[] = [
{ checkId: "check_1", passed: true, details: "OK", duration: 100 },
{
checkId: "check_2",
passed: false,
details: "File not found",
duration: 50,
},
{
checkId: "check_3",
passed: false,
details: "Syntax error",
duration: 75,
},
];
const failures = extractValidationFailures(results);
expect(failures).toHaveLength(2);
expect(failures.map((f) => f.checkId)).toContain("check_2");
expect(failures.map((f) => f.checkId)).toContain("check_3");
});
it("should mark permission errors as non-recoverable", () => {
const results: ValidationResult[] = [
{
checkId: "check_1",
passed: false,
details: "Permission denied",
duration: 100,
},
];
const failures = extractValidationFailures(results);
expect(failures[0].recoverable).toBe(false);
});
it("should mark other errors as recoverable", () => {
const results: ValidationResult[] = [
{
checkId: "check_1",
passed: false,
details: "Timeout occurred",
duration: 100,
},
];
const failures = extractValidationFailures(results);
expect(failures[0].recoverable).toBe(true);
});
});
describe("state query functions", () => {
describe("isComplete", () => {
it("should return true only for CONFIRMED_COMPLETE", () => {
const completeState: TerminationState = {
...createInitialTerminationState(),
status: "CONFIRMED_COMPLETE",
};
const runningState: TerminationState = {
...createInitialTerminationState(),
status: "RUNNING",
};
expect(isComplete(completeState)).toBe(true);
expect(isComplete(runningState)).toBe(false);
});
});
describe("isFailed", () => {
it("should return true only for FAILED", () => {
const failedState: TerminationState = {
...createInitialTerminationState(),
status: "FAILED",
};
const runningState: TerminationState = {
...createInitialTerminationState(),
status: "RUNNING",
};
expect(isFailed(failedState)).toBe(true);
expect(isFailed(runningState)).toBe(false);
});
});
describe("isTerminal", () => {
it("should return true for CONFIRMED_COMPLETE or FAILED", () => {
expect(
isTerminal({
...createInitialTerminationState(),
status: "CONFIRMED_COMPLETE",
}),
).toBe(true);
expect(
isTerminal({ ...createInitialTerminationState(), status: "FAILED" }),
).toBe(true);
expect(
isTerminal({ ...createInitialTerminationState(), status: "RUNNING" }),
).toBe(false);
expect(
isTerminal({
...createInitialTerminationState(),
status: "AWAITING_VALIDATION",
}),
).toBe(false);
});
});
describe("requiresValidation", () => {
it("should return true for POTENTIALLY_COMPLETE and AWAITING_VALIDATION", () => {
expect(
requiresValidation({
...createInitialTerminationState(),
status: "POTENTIALLY_COMPLETE",
}),
).toBe(true);
expect(
requiresValidation({
...createInitialTerminationState(),
status: "AWAITING_VALIDATION",
}),
).toBe(true);
expect(
requiresValidation({
...createInitialTerminationState(),
status: "RUNNING",
}),
).toBe(false);
expect(
requiresValidation({
...createInitialTerminationState(),
status: "CONFIRMED_COMPLETE",
}),
).toBe(false);
});
});
describe("getConfidencePercentage", () => {
it("should format confidence as percentage", () => {
const state: TerminationState = {
...createInitialTerminationState(),
confidenceScore: 0.756,
};
expect(getConfidencePercentage(state)).toBe("75.6%");
});
it("should handle zero confidence", () => {
const state = createInitialTerminationState();
expect(getConfidencePercentage(state)).toBe("0.0%");
});
it("should handle 100% confidence", () => {
const state: TerminationState = {
...createInitialTerminationState(),
confidenceScore: 1.0,
};
expect(getConfidencePercentage(state)).toBe("100.0%");
});
});
});
describe("decision computation", () => {
it("should return CONTINUE for low confidence", () => {
const state = createInitialTerminationState();
const trigger: TerminationTrigger = {
event: "MODEL_OUTPUT",
content: "Working on it...",
hasToolCalls: true,
};
const result = processTerminationTrigger(state, trigger);
expect(result.decision.kind).toBe("CONTINUE");
});
it("should return VALIDATE for potentially complete state", () => {
const state: TerminationState = {
...createInitialTerminationState(),
status: "POTENTIALLY_COMPLETE",
confidenceScore: 0.6,
};
const trigger: TerminationTrigger = {
event: "TOOL_COMPLETED",
toolName: "write",
success: true,
};
const result = processTerminationTrigger(state, trigger);
expect(result.decision.kind).toBe("VALIDATE");
});
it("should return COMPLETE for confirmed completion", () => {
const state = createInitialTerminationState();
const trigger: TerminationTrigger = {
event: "USER_INPUT",
isAcceptance: true,
};
const result = processTerminationTrigger(state, trigger);
expect(result.decision.kind).toBe("COMPLETE");
});
});
});

View File

@@ -1,435 +0,0 @@
/**
* Unit tests for Reasoning Utilities
*/
import { describe, it, expect } from "bun:test";
import {
estimateTokens,
tokenize,
jaccardSimilarity,
weightedSum,
extractEntities,
createEntityTable,
truncateMiddle,
foldCode,
extractCodeBlocks,
recencyDecay,
generateId,
isValidJson,
hasBalancedBraces,
countMatches,
sum,
unique,
groupBy,
} from "../utils";
describe("Reasoning Utilities", () => {
describe("estimateTokens", () => {
it("should estimate tokens based on character count", () => {
const text = "Hello world"; // 11 chars
const tokens = estimateTokens(text);
expect(tokens).toBeGreaterThan(0);
expect(tokens).toBeLessThan(text.length);
});
it("should handle empty string", () => {
expect(estimateTokens("")).toBe(0);
});
});
describe("tokenize", () => {
it("should split text into lowercase tokens", () => {
const tokens = tokenize("Hello World Test");
expect(tokens.every((t) => t === t.toLowerCase())).toBe(true);
});
it("should filter stop words", () => {
const tokens = tokenize("the quick brown fox jumps over the lazy dog");
expect(tokens).not.toContain("the");
// "over" may or may not be filtered depending on stop words list
expect(tokens).toContain("quick");
expect(tokens).toContain("brown");
});
it("should filter short tokens", () => {
const tokens = tokenize("I am a test");
expect(tokens).not.toContain("i");
expect(tokens).not.toContain("am");
expect(tokens).not.toContain("a");
});
it("should handle punctuation", () => {
const tokens = tokenize("Hello, world! How are you?");
expect(tokens.every((t) => !/[,!?]/.test(t))).toBe(true);
});
});
describe("jaccardSimilarity", () => {
it("should return 1 for identical sets", () => {
const similarity = jaccardSimilarity(["a", "b", "c"], ["a", "b", "c"]);
expect(similarity).toBe(1);
});
it("should return 0 for disjoint sets", () => {
const similarity = jaccardSimilarity(["a", "b", "c"], ["d", "e", "f"]);
expect(similarity).toBe(0);
});
it("should return correct value for partial overlap", () => {
const similarity = jaccardSimilarity(["a", "b", "c"], ["b", "c", "d"]);
// Intersection: {b, c} = 2, Union: {a, b, c, d} = 4
expect(similarity).toBe(0.5);
});
it("should handle empty sets", () => {
expect(jaccardSimilarity([], [])).toBe(0);
expect(jaccardSimilarity(["a"], [])).toBe(0);
expect(jaccardSimilarity([], ["a"])).toBe(0);
});
});
describe("weightedSum", () => {
it("should compute weighted sum correctly", () => {
const result = weightedSum([1, 2, 3], [0.5, 0.3, 0.2]);
expect(result).toBeCloseTo(1 * 0.5 + 2 * 0.3 + 3 * 0.2);
});
it("should throw for mismatched lengths", () => {
expect(() => weightedSum([1, 2], [0.5])).toThrow();
});
it("should handle empty arrays", () => {
expect(weightedSum([], [])).toBe(0);
});
});
describe("extractEntities", () => {
it("should extract file paths", () => {
const entities = extractEntities(
"Check the file src/index.ts for details",
"msg_1",
);
expect(
entities.some((e) => e.type === "FILE" && e.value.includes("index.ts")),
).toBe(true);
});
it("should extract function names", () => {
const entities = extractEntities(
"function handleClick() { return 1; }",
"msg_1",
);
expect(entities.some((e) => e.type === "FUNCTION")).toBe(true);
});
it("should extract URLs", () => {
const entities = extractEntities(
"Visit https://example.com for more info",
"msg_1",
);
expect(
entities.some(
(e) => e.type === "URL" && e.value.includes("example.com"),
),
).toBe(true);
});
it("should set source message ID", () => {
const entities = extractEntities("file.ts", "test_msg");
if (entities.length > 0) {
expect(entities[0].sourceMessageId).toBe("test_msg");
}
});
});
describe("createEntityTable", () => {
it("should organize entities by type", () => {
const entities = [
{
type: "FILE" as const,
value: "test.ts",
sourceMessageId: "msg_1",
frequency: 1,
},
{
type: "FILE" as const,
value: "other.ts",
sourceMessageId: "msg_1",
frequency: 1,
},
{
type: "URL" as const,
value: "https://test.com",
sourceMessageId: "msg_1",
frequency: 1,
},
];
const table = createEntityTable(entities);
expect(table.byType.FILE).toHaveLength(2);
expect(table.byType.URL).toHaveLength(1);
});
it("should organize entities by source", () => {
const entities = [
{
type: "FILE" as const,
value: "test.ts",
sourceMessageId: "msg_1",
frequency: 1,
},
{
type: "FILE" as const,
value: "other.ts",
sourceMessageId: "msg_2",
frequency: 1,
},
];
const table = createEntityTable(entities);
expect(table.bySource["msg_1"]).toHaveLength(1);
expect(table.bySource["msg_2"]).toHaveLength(1);
});
});
describe("truncateMiddle", () => {
it("should truncate long text", () => {
const text = "a".repeat(200);
const result = truncateMiddle(text, 50, 50);
expect(result.length).toBeLessThan(text.length);
expect(result).toContain("truncated");
});
it("should not truncate short text", () => {
const text = "short text";
const result = truncateMiddle(text, 50, 50);
expect(result).toBe(text);
});
it("should preserve head and tail", () => {
const text = "HEAD_CONTENT_MIDDLE_STUFF_TAIL_CONTENT";
const result = truncateMiddle(text, 12, 12);
expect(result.startsWith("HEAD_CONTENT")).toBe(true);
expect(result.endsWith("TAIL_CONTENT")).toBe(true);
});
});
describe("foldCode", () => {
it("should fold long code blocks", () => {
const code = Array.from({ length: 50 }, (_, i) => `line ${i + 1}`).join(
"\n",
);
const result = foldCode(code, { keepLines: 5, tailLines: 3 });
expect(result.split("\n").length).toBeLessThan(50);
expect(result).toContain("folded");
});
it("should not fold short code blocks", () => {
const code = "line 1\nline 2\nline 3";
const result = foldCode(code, { keepLines: 5, tailLines: 3 });
expect(result).toBe(code);
});
it("should preserve first and last lines", () => {
const code = Array.from({ length: 50 }, (_, i) => `line ${i + 1}`).join(
"\n",
);
const result = foldCode(code, { keepLines: 2, tailLines: 2 });
expect(result).toContain("line 1");
expect(result).toContain("line 2");
expect(result).toContain("line 49");
expect(result).toContain("line 50");
});
});
describe("extractCodeBlocks", () => {
it("should extract code blocks with language", () => {
const text =
"Here is code:\n```typescript\nconst x = 1;\n```\nMore text.";
const blocks = extractCodeBlocks(text);
expect(blocks).toHaveLength(1);
expect(blocks[0].language).toBe("typescript");
expect(blocks[0].content).toContain("const x = 1");
});
it("should extract multiple code blocks", () => {
const text = "```js\ncode1\n```\n\n```python\ncode2\n```";
const blocks = extractCodeBlocks(text);
expect(blocks).toHaveLength(2);
expect(blocks[0].language).toBe("js");
expect(blocks[1].language).toBe("python");
});
it("should handle code blocks without language", () => {
const text = "```\nsome code\n```";
const blocks = extractCodeBlocks(text);
expect(blocks).toHaveLength(1);
expect(blocks[0].language).toBe("unknown");
});
it("should track positions", () => {
const text = "Start\n```ts\ncode\n```\nEnd";
const blocks = extractCodeBlocks(text);
expect(blocks[0].startIndex).toBeGreaterThan(0);
expect(blocks[0].endIndex).toBeGreaterThan(blocks[0].startIndex);
});
});
describe("recencyDecay", () => {
it("should return 1 for current time", () => {
const now = Date.now();
const decay = recencyDecay(now, now, 30);
expect(decay).toBe(1);
});
it("should return 0.5 at half-life", () => {
const now = Date.now();
const halfLifeAgo = now - 30 * 60 * 1000; // 30 minutes ago
const decay = recencyDecay(halfLifeAgo, now, 30);
expect(decay).toBeCloseTo(0.5, 2);
});
it("should decrease with age", () => {
const now = Date.now();
const recent = recencyDecay(now - 60000, now, 30);
const old = recencyDecay(now - 3600000, now, 30);
expect(recent).toBeGreaterThan(old);
});
});
describe("generateId", () => {
it("should generate unique IDs", () => {
const ids = new Set<string>();
for (let i = 0; i < 100; i++) {
ids.add(generateId());
}
expect(ids.size).toBe(100);
});
it("should include prefix when provided", () => {
const id = generateId("test");
expect(id.startsWith("test_")).toBe(true);
});
});
describe("isValidJson", () => {
it("should return true for valid JSON", () => {
expect(isValidJson('{"key": "value"}')).toBe(true);
expect(isValidJson("[1, 2, 3]")).toBe(true);
expect(isValidJson('"string"')).toBe(true);
});
it("should return false for invalid JSON", () => {
expect(isValidJson("{key: value}")).toBe(false);
expect(isValidJson("not json")).toBe(false);
expect(isValidJson("{incomplete")).toBe(false);
});
});
describe("hasBalancedBraces", () => {
it("should return true for balanced braces", () => {
expect(hasBalancedBraces("{ foo: { bar: [] } }")).toBe(true);
expect(hasBalancedBraces("function() { return (a + b); }")).toBe(true);
});
it("should return false for unbalanced braces", () => {
expect(hasBalancedBraces("{ foo: { bar }")).toBe(false);
expect(hasBalancedBraces("function() { return (a + b); ")).toBe(false);
expect(hasBalancedBraces("{ ] }")).toBe(false);
});
it("should handle empty string", () => {
expect(hasBalancedBraces("")).toBe(true);
});
});
describe("countMatches", () => {
it("should count pattern matches", () => {
expect(countMatches("aaa", /a/g)).toBe(3);
expect(countMatches("hello world", /o/g)).toBe(2);
});
it("should handle no matches", () => {
expect(countMatches("hello", /z/g)).toBe(0);
});
it("should handle case-insensitive patterns", () => {
expect(countMatches("Hello HELLO hello", /hello/gi)).toBe(3);
});
});
describe("sum", () => {
it("should sum numbers", () => {
expect(sum([1, 2, 3])).toBe(6);
expect(sum([0.1, 0.2, 0.3])).toBeCloseTo(0.6);
});
it("should return 0 for empty array", () => {
expect(sum([])).toBe(0);
});
});
describe("unique", () => {
it("should remove duplicates", () => {
expect(unique([1, 2, 2, 3, 3, 3])).toEqual([1, 2, 3]);
expect(unique(["a", "b", "a"])).toEqual(["a", "b"]);
});
it("should handle empty array", () => {
expect(unique([])).toEqual([]);
});
});
describe("groupBy", () => {
it("should group by key function", () => {
const items = [
{ type: "a", value: 1 },
{ type: "b", value: 2 },
{ type: "a", value: 3 },
];
const grouped = groupBy(items, (item) => item.type);
expect(grouped.a).toHaveLength(2);
expect(grouped.b).toHaveLength(1);
});
it("should handle empty array", () => {
const grouped = groupBy([], (x: string) => x);
expect(Object.keys(grouped)).toHaveLength(0);
});
});
});

View File

@@ -111,10 +111,11 @@ const executeCommand = (
): Promise<ToolResult> => {
const {
command,
description,
workdir,
timeout = BASH_DEFAULTS.TIMEOUT,
} = args;
// Provide default description if not specified
const description = args.description ?? `Running: ${command.substring(0, 50)}`;
const cwd = workdir ?? ctx.workingDir;
updateRunningStatus(ctx, description);
@@ -165,7 +166,20 @@ export const executeBash = async (
args: BashParams,
ctx: ToolContext,
): Promise<ToolResult> => {
const { command, description } = args;
const { command } = args;
// Guard against undefined command (can happen with malformed tool calls)
if (!command) {
return {
success: false,
title: "Invalid command",
output: "",
error: "Command is required but was not provided",
};
}
// Provide default description if not specified
const description = args.description ?? `Running: ${command.substring(0, 50)}`;
const allowed = await checkPermission(
command,

View File

@@ -8,6 +8,7 @@ export const bashParams = z.object({
command: z.string().describe("The bash command to execute"),
description: z
.string()
.optional()
.describe("A brief description of what this command does"),
workdir: z
.string()

View File

@@ -9,6 +9,7 @@ import {
} from "solid-js";
import { batch } from "solid-js";
import { getFiles } from "@services/file-picker/files";
import { abortCurrentOperation } from "@services/chat-tui-service";
import versionData from "@/version.json";
import {
ExitProvider,
@@ -90,7 +91,7 @@ function ErrorFallback(props: { error: Error }) {
{props.error.message}
</text>
<text fg={theme.colors.textDim} marginTop={2}>
Press Ctrl+C to exit
Press Ctrl+C twice to exit
</text>
</box>
);
@@ -157,16 +158,29 @@ function AppContent(props: AppProps) {
}
useKeyboard((evt) => {
// ESC aborts current operation
if (evt.name === "escape") {
const aborted = abortCurrentOperation();
if (aborted) {
toast.info("Operation cancelled");
evt.preventDefault();
return;
}
}
// Ctrl+C exits the application
if (evt.ctrl && evt.name === "c") {
if (app.interruptPending()) {
exit.exit(0);
} else {
app.setInterruptPending(true);
toast.warning("Press Ctrl+C again to exit");
setTimeout(() => {
app.setInterruptPending(false);
}, 2000);
evt.preventDefault();
return;
}
app.setInterruptPending(true);
toast.warning("Press Ctrl+C again to exit");
setTimeout(() => {
app.setInterruptPending(false);
}, 2000);
evt.preventDefault();
return;
}

View File

@@ -1,4 +1,4 @@
import { createMemo, For, createSignal, onMount, onCleanup } from "solid-js";
import { For, createSignal, onMount, onCleanup } from "solid-js";
import { useKeyboard } from "@opentui/solid";
import { TextAttributes } from "@opentui/core";
import type { ScrollBoxRenderable } from "@opentui/core";
@@ -10,7 +10,7 @@ const SCROLL_LINES = 2;
interface DebugEntry {
id: string;
timestamp: number;
type: "api" | "stream" | "tool" | "state" | "error" | "info";
type: "api" | "stream" | "tool" | "state" | "error" | "info" | "render";
message: string;
}
@@ -80,6 +80,7 @@ export function DebugLogPanel() {
state: theme.colors.accent,
error: theme.colors.error,
info: theme.colors.textDim,
render: theme.colors.primary,
};
return colorMap[type];
};
@@ -92,6 +93,7 @@ export function DebugLogPanel() {
state: "STA",
error: "ERR",
info: "INF",
render: "RND",
};
return labelMap[type];
};

View File

@@ -56,9 +56,9 @@ function DiffLine(props: DiffLineProps) {
const theme = useTheme();
const lineColor = (): string => {
// Use white text for add/remove lines since they have colored backgrounds
// Use light text for add/remove lines since they have dark colored backgrounds
if (props.line.type === "add" || props.line.type === "remove") {
return theme.colors.text;
return theme.colors.diffLineText;
}
const colorMap: Record<string, string> = {
context: theme.colors.diffContext,
@@ -82,8 +82,8 @@ function DiffLine(props: DiffLineProps) {
};
const bgColor = (): string | undefined => {
if (props.line.type === "add") return theme.colors.bgAdded;
if (props.line.type === "remove") return theme.colors.bgRemoved;
if (props.line.type === "add") return theme.colors.diffLineBgAdded;
if (props.line.type === "remove") return theme.colors.diffLineBgRemoved;
return undefined;
};

View File

@@ -1,8 +1,10 @@
import { Show } from "solid-js";
import { Show, createSignal, createEffect, onMount } from "solid-js";
import { TextAttributes } from "@opentui/core";
import { useTheme } from "@tui-solid/context/theme";
import { useAppStore } from "@tui-solid/context/app";
import type { LogEntry } from "@/types/tui";
import { Spinner } from "@tui-solid/ui/spinner";
import { addDebugLog } from "@tui-solid/components/debug-log-panel";
interface StreamingMessageProps {
entry: LogEntry;
@@ -10,8 +12,50 @@ interface StreamingMessageProps {
export function StreamingMessage(props: StreamingMessageProps) {
const theme = useTheme();
const isStreaming = () => props.entry.metadata?.isStreaming ?? false;
const hasContent = () => Boolean(props.entry.content);
const app = useAppStore();
// Use local signals that are updated via createEffect
// This ensures proper reactivity with the store
const [displayContent, setDisplayContent] = createSignal(props.entry.content);
const [isActiveStreaming, setIsActiveStreaming] = createSignal(
props.entry.metadata?.isStreaming ?? false
);
onMount(() => {
addDebugLog("render", `StreamingMessage mounted for entry: ${props.entry.id}`);
});
// Effect to sync content from store's streamingLog
// Use individual property accessors for fine-grained reactivity
createEffect(() => {
// Use dedicated property accessors that directly access store properties
const logId = app.streamingLogId();
const isActive = app.streamingLogIsActive();
const storeContent = app.streamingLogContent();
// Check if this entry is the currently streaming log
const isCurrentLog = logId === props.entry.id;
addDebugLog("render", `Effect: logId=${logId}, entryId=${props.entry.id}, isActive=${isActive}, contentLen=${storeContent?.length ?? 0}`);
if (isCurrentLog && isActive) {
setDisplayContent(storeContent);
setIsActiveStreaming(true);
} else if (isCurrentLog && !isActive) {
// Streaming just completed for this log
setIsActiveStreaming(false);
// Keep the content we have
} else {
// Not the current streaming log, use entry content
setDisplayContent(props.entry.content);
setIsActiveStreaming(props.entry.metadata?.isStreaming ?? false);
}
});
const hasContent = () => {
const c = displayContent();
return Boolean(c && c.length > 0);
};
return (
<box flexDirection="column" marginBottom={1}>
@@ -19,7 +63,7 @@ export function StreamingMessage(props: StreamingMessageProps) {
<text fg={theme.colors.roleAssistant} attributes={TextAttributes.BOLD}>
CodeTyper
</text>
<Show when={isStreaming()}>
<Show when={isActiveStreaming()}>
<box marginLeft={1}>
<Spinner />
</box>
@@ -27,7 +71,7 @@ export function StreamingMessage(props: StreamingMessageProps) {
</box>
<Show when={hasContent()}>
<box marginLeft={2}>
<text wrapMode="word">{props.entry.content}</text>
<text wrapMode="word">{displayContent()}</text>
</box>
</Show>
</box>

View File

@@ -76,6 +76,9 @@ interface AppContextValue {
exitPending: Accessor<boolean>;
isCompacting: Accessor<boolean>;
streamingLog: Accessor<StreamingLogState>;
streamingLogId: Accessor<string | null>;
streamingLogContent: Accessor<string>;
streamingLogIsActive: Accessor<boolean>;
suggestions: Accessor<SuggestionState>;
cascadeEnabled: Accessor<boolean>;
@@ -263,6 +266,10 @@ export const { provider: AppStoreProvider, use: useAppStore } =
const exitPending = (): boolean => store.exitPending;
const isCompacting = (): boolean => store.isCompacting;
const streamingLog = (): StreamingLogState => store.streamingLog;
// Individual property accessors for fine-grained reactivity
const streamingLogId = (): string | null => store.streamingLog.logId;
const streamingLogContent = (): string => store.streamingLog.content;
const streamingLogIsActive = (): boolean => store.streamingLog.isStreaming;
const suggestions = (): SuggestionState => store.suggestions;
const cascadeEnabled = (): boolean => store.cascadeEnabled;
@@ -532,34 +539,30 @@ export const { provider: AppStoreProvider, use: useAppStore } =
s.logs.push(entry);
}),
);
setStore("streamingLog", {
logId,
content: "",
isStreaming: true,
});
// Use path-based updates to ensure proper proxy reactivity
setStore("streamingLog", "logId", logId);
setStore("streamingLog", "content", "");
setStore("streamingLog", "isStreaming", true);
});
return logId;
};
const appendStreamContent = (content: string): void => {
if (!store.streamingLog.logId || !store.streamingLog.isStreaming) {
const logId = store.streamingLog.logId;
const isCurrentlyStreaming = store.streamingLog.isStreaming;
if (!logId || !isCurrentlyStreaming) {
return;
}
const newContent = store.streamingLog.content + content;
const logIndex = store.logs.findIndex((l) => l.id === logId);
batch(() => {
setStore("streamingLog", {
...store.streamingLog,
content: newContent,
});
setStore(
produce((s) => {
const log = s.logs.find((l) => l.id === store.streamingLog.logId);
if (log) {
log.content = newContent;
}
}),
);
// Use path-based updates for proper reactivity tracking
setStore("streamingLog", "content", newContent);
if (logIndex !== -1) {
setStore("logs", logIndex, "content", newContent);
}
});
};
@@ -569,21 +572,19 @@ export const { provider: AppStoreProvider, use: useAppStore } =
}
const logId = store.streamingLog.logId;
const logIndex = store.logs.findIndex((l) => l.id === logId);
batch(() => {
setStore("streamingLog", createInitialStreamingState());
setStore(
produce((s) => {
const log = s.logs.find((l) => l.id === logId);
if (log) {
log.type = "assistant";
log.metadata = {
...log.metadata,
isStreaming: false,
streamComplete: true,
};
}
}),
);
if (logIndex !== -1) {
const currentMetadata = store.logs[logIndex].metadata ?? {};
setStore("logs", logIndex, "type", "assistant");
setStore("logs", logIndex, "metadata", {
...currentMetadata,
isStreaming: false,
streamComplete: true,
});
}
});
};
@@ -692,6 +693,9 @@ export const { provider: AppStoreProvider, use: useAppStore } =
exitPending,
isCompacting,
streamingLog,
streamingLogId,
streamingLogContent,
streamingLogIsActive,
suggestions,
cascadeEnabled,

View File

@@ -94,11 +94,9 @@ const renderAddLine = (
</Text>
</>
)}
<Text backgroundColor="greenBright" color="black">
+
<Text backgroundColor="#1a3d1a" color="white">
+{line.content}
</Text>
<Text color="green"> </Text>
<HighlightedCode content={line.content} language={ctx.language} />
</Box>
);
@@ -123,11 +121,9 @@ const renderRemoveLine = (
</Text>
</>
)}
<Text backgroundColor="redBright" color="black">
-
<Text backgroundColor="#3d1a1a" color="white">
-{line.content}
</Text>
<Text color="red"> </Text>
<HighlightedCode content={line.content} language={ctx.language} />
</Box>
);

View File

@@ -44,6 +44,10 @@ export interface ThemeColors {
diffContext: string;
diffHeader: string;
diffHunk: string;
// Diff line backgrounds (darker/muted for readability)
diffLineBgAdded: string;
diffLineBgRemoved: string;
diffLineText: string;
// Role colors
roleUser: string;

View File

@@ -62,7 +62,7 @@ export interface FunctionDefinition {
export interface BashParams {
command: string;
description: string;
description?: string;
workdir?: string;
timeout?: number;
}

105
src/ui/banner.test.ts Normal file
View File

@@ -0,0 +1,105 @@
import { getBannerLines } from "./banner/lines";
import { renderBanner, renderBannerWithSubtitle } from "./banner/render";
import { printBanner, printWelcome } from "./banner/print";
import { getInlineLogo } from "./banner/logo";
import { BANNER_STYLE_MAP, BANNER_LINES, GRADIENT_COLORS } from "@constants/banner";
import { Style } from "@ui/styles";
describe("Banner Utilities", () => {
describe("getBannerLines", () => {
it("should return default banner lines when no style is provided", () => {
const lines = getBannerLines();
expect(lines).toEqual(BANNER_LINES);
});
it("should return banner lines for a specific style", () => {
const style = "blocks";
const lines = getBannerLines(style);
expect(lines).toEqual(BANNER_STYLE_MAP[style]);
});
it("should return default banner lines for an unknown style", () => {
const lines = getBannerLines("unknown-style" as any);
expect(lines).toEqual(BANNER_LINES);
});
});
describe("renderBanner", () => {
it("should render banner with default style", () => {
const banner = renderBanner();
const expectedLines = BANNER_LINES.map((line, index) => {
const colorIndex = Math.min(index, GRADIENT_COLORS.length - 1);
const color = GRADIENT_COLORS[colorIndex];
return color + line + Style.RESET;
}).join("\n");
expect(banner).toBe(expectedLines);
});
it("should render banner with a specific style", () => {
const style = "blocks";
const banner = renderBanner(style);
const expectedLines = BANNER_STYLE_MAP[style].map((line, index) => {
const colorIndex = Math.min(index, GRADIENT_COLORS.length - 1);
const color = GRADIENT_COLORS[colorIndex];
return color + line + Style.RESET;
}).join("\n");
expect(banner).toBe(expectedLines);
});
});
describe("renderBannerWithSubtitle", () => {
it("should render banner with subtitle", () => {
const subtitle = "Welcome to CodeTyper!";
const style = "default";
const bannerWithSubtitle = renderBannerWithSubtitle(subtitle, style);
const banner = renderBanner(style);
const expectedSubtitle = Style.DIM + " " + subtitle + Style.RESET;
expect(bannerWithSubtitle).toBe(banner + "\n" + expectedSubtitle);
});
});
describe("printBanner", () => {
it("should print the banner to the console", () => {
const consoleSpy = jest.spyOn(console, "log").mockImplementation(() => {});
const style = "default";
printBanner(style);
expect(consoleSpy).toHaveBeenCalledWith("\n" + renderBanner(style));
consoleSpy.mockRestore();
});
});
describe("printWelcome", () => {
it("should print the welcome message to the console", () => {
const consoleSpy = jest.spyOn(console, "log").mockImplementation(() => {});
const version = "1.0.0";
const provider = "OpenAI";
const model = "GPT-4";
printWelcome(version, provider, model);
expect(consoleSpy).toHaveBeenCalledWith("\n" + renderBanner("blocks"));
expect(consoleSpy).toHaveBeenCalledWith("");
expect(consoleSpy).toHaveBeenCalledWith(Style.DIM + " AI Coding Assistant" + Style.RESET);
expect(consoleSpy).toHaveBeenCalledWith("");
expect(consoleSpy).toHaveBeenCalledWith(
Style.DIM + ` v${version} | ${provider} | ${model}` + Style.RESET
);
expect(consoleSpy).toHaveBeenCalledWith("");
consoleSpy.mockRestore();
});
});
describe("getInlineLogo", () => {
it("should return the inline logo with correct style", () => {
const logo = getInlineLogo();
const expectedLogo = Style.CYAN + Style.BOLD + "codetyper" + Style.RESET;
expect(logo).toBe(expectedLogo);
});
});
});

View File

@@ -0,0 +1,4 @@
// Utility function to capitalize the first letter of each word in a string
export function capitalizeWords(input: string): string {
return input.replace(/\b\w/g, (char) => char.toUpperCase()).replace(/_\w/g, (char) => char.toUpperCase());
}