Terminal-based AI coding agent with interactive TUI for autonomous code generation.

Features:
  - Interactive TUI with React/Ink
  - Autonomous agent with tool calls (bash, read, write, edit, glob, grep)
  - Permission system with pattern-based rules
  - Session management with auto-compaction
  - Dual providers: GitHub Copilot and Ollama
  - MCP server integration
  - Todo panel and theme system
  - Streaming responses
  - GitHub-compatible project context
This commit is contained in:
2026-01-27 23:33:06 -05:00
commit 0062e5d9d9
521 changed files with 66418 additions and 0 deletions

View File

@@ -0,0 +1,231 @@
/**
* Unit tests for Vector Store
*/
import { describe, it, expect } from "bun:test";
import {
cosineSimilarity,
euclideanDistance,
upsertEmbedding,
removeEmbedding,
hasEmbedding,
getEmbedding,
findSimilar,
findAboveThreshold,
getIndexStats,
} from "@services/learning/vector-store";
import { createEmptyIndex } from "@/types/embeddings";
describe("Vector Store", () => {
describe("cosineSimilarity", () => {
it("should return 1 for identical vectors", () => {
const a = [1, 0, 0];
const b = [1, 0, 0];
expect(cosineSimilarity(a, b)).toBeCloseTo(1);
});
it("should return 0 for orthogonal vectors", () => {
const a = [1, 0, 0];
const b = [0, 1, 0];
expect(cosineSimilarity(a, b)).toBeCloseTo(0);
});
it("should return -1 for opposite vectors", () => {
const a = [1, 0, 0];
const b = [-1, 0, 0];
expect(cosineSimilarity(a, b)).toBeCloseTo(-1);
});
it("should handle normalized vectors", () => {
const a = [0.6, 0.8, 0];
const b = [0.8, 0.6, 0];
const similarity = cosineSimilarity(a, b);
expect(similarity).toBeGreaterThan(0);
expect(similarity).toBeLessThan(1);
});
it("should return 0 for mismatched lengths", () => {
const a = [1, 0, 0];
const b = [1, 0];
expect(cosineSimilarity(a, b)).toBe(0);
});
it("should handle zero vectors", () => {
const a = [0, 0, 0];
const b = [1, 0, 0];
expect(cosineSimilarity(a, b)).toBe(0);
});
});
describe("euclideanDistance", () => {
it("should return 0 for identical vectors", () => {
const a = [1, 2, 3];
const b = [1, 2, 3];
expect(euclideanDistance(a, b)).toBe(0);
});
it("should compute correct distance", () => {
const a = [0, 0, 0];
const b = [3, 4, 0];
expect(euclideanDistance(a, b)).toBe(5);
});
it("should return Infinity for mismatched lengths", () => {
const a = [1, 0, 0];
const b = [1, 0];
expect(euclideanDistance(a, b)).toBe(Infinity);
});
});
describe("Index Operations", () => {
it("should create empty index", () => {
const index = createEmptyIndex("test-model");
expect(index.version).toBe(1);
expect(index.model).toBe("test-model");
expect(Object.keys(index.embeddings)).toHaveLength(0);
});
it("should upsert embedding", () => {
let index = createEmptyIndex("test-model");
const embedding = [0.1, 0.2, 0.3];
index = upsertEmbedding(index, "learn_1", embedding);
expect(hasEmbedding(index, "learn_1")).toBe(true);
expect(getEmbedding(index, "learn_1")?.embedding).toEqual(embedding);
});
it("should update existing embedding", () => {
let index = createEmptyIndex("test-model");
const embedding1 = [0.1, 0.2, 0.3];
const embedding2 = [0.4, 0.5, 0.6];
index = upsertEmbedding(index, "learn_1", embedding1);
index = upsertEmbedding(index, "learn_1", embedding2);
expect(getEmbedding(index, "learn_1")?.embedding).toEqual(embedding2);
});
it("should remove embedding", () => {
let index = createEmptyIndex("test-model");
index = upsertEmbedding(index, "learn_1", [0.1, 0.2, 0.3]);
index = upsertEmbedding(index, "learn_2", [0.4, 0.5, 0.6]);
index = removeEmbedding(index, "learn_1");
expect(hasEmbedding(index, "learn_1")).toBe(false);
expect(hasEmbedding(index, "learn_2")).toBe(true);
});
it("should return null for missing embedding", () => {
const index = createEmptyIndex("test-model");
expect(getEmbedding(index, "nonexistent")).toBeNull();
});
it("should track index stats", () => {
let index = createEmptyIndex("test-model");
index = upsertEmbedding(index, "learn_1", [0.1, 0.2, 0.3]);
index = upsertEmbedding(index, "learn_2", [0.4, 0.5, 0.6]);
const stats = getIndexStats(index);
expect(stats.count).toBe(2);
expect(stats.model).toBe("test-model");
});
});
describe("Similarity Search", () => {
it("should find similar embeddings", () => {
let index = createEmptyIndex("test-model");
// Add embeddings with known similarities
index = upsertEmbedding(index, "a", [1, 0, 0]);
index = upsertEmbedding(index, "b", [0.9, 0.1, 0]);
index = upsertEmbedding(index, "c", [0, 1, 0]);
const query = [1, 0, 0];
const results = findSimilar(index, query, 2, 0);
expect(results).toHaveLength(2);
expect(results[0].id).toBe("a");
expect(results[0].score).toBeCloseTo(1);
expect(results[1].id).toBe("b");
});
it("should respect minSimilarity threshold", () => {
let index = createEmptyIndex("test-model");
index = upsertEmbedding(index, "a", [1, 0, 0]);
index = upsertEmbedding(index, "b", [0, 1, 0]);
const query = [1, 0, 0];
const results = findSimilar(index, query, 10, 0.5);
expect(results).toHaveLength(1);
expect(results[0].id).toBe("a");
});
it("should limit results to topK", () => {
let index = createEmptyIndex("test-model");
for (let i = 0; i < 10; i++) {
const embedding = [Math.random(), Math.random(), Math.random()];
index = upsertEmbedding(index, `learn_${i}`, embedding);
}
const query = [0.5, 0.5, 0.5];
const results = findSimilar(index, query, 3, 0);
expect(results.length).toBeLessThanOrEqual(3);
});
it("should find all above threshold", () => {
let index = createEmptyIndex("test-model");
index = upsertEmbedding(index, "a", [1, 0, 0]);
index = upsertEmbedding(index, "b", [0.95, 0.05, 0]);
index = upsertEmbedding(index, "c", [0.9, 0.1, 0]);
index = upsertEmbedding(index, "d", [0, 1, 0]);
const query = [1, 0, 0];
const results = findAboveThreshold(index, query, 0.85);
expect(results.length).toBe(3);
expect(results.map((r) => r.id)).toContain("a");
expect(results.map((r) => r.id)).toContain("b");
expect(results.map((r) => r.id)).toContain("c");
});
it("should return empty array for no matches", () => {
let index = createEmptyIndex("test-model");
index = upsertEmbedding(index, "a", [1, 0, 0]);
const query = [-1, 0, 0];
const results = findSimilar(index, query, 10, 0.5);
expect(results).toHaveLength(0);
});
it("should handle empty index", () => {
const index = createEmptyIndex("test-model");
const query = [1, 0, 0];
const results = findSimilar(index, query, 10, 0);
expect(results).toHaveLength(0);
});
});
});

View File

@@ -0,0 +1,86 @@
/**
* Message analysis for learning detection
*/
import {
LEARNING_PATTERNS,
LEARNING_KEYWORDS,
LEARNING_DEFAULTS,
LEARNING_CONTEXTS,
} from "@constants/learning";
import { categorizePattern } from "@services/learning/categorize";
import { extractLearningContent } from "@services/learning/extract";
import type { LearningCandidate, MessageSource } from "@/types/learning";
const getContextForSource = (source: MessageSource): string =>
source === "user"
? LEARNING_CONTEXTS.USER_PREFERENCE
: LEARNING_CONTEXTS.CONVENTION_IDENTIFIED;
const findPatternMatches = (
message: string,
source: MessageSource,
): LearningCandidate[] => {
const candidates: LearningCandidate[] = [];
for (const pattern of LEARNING_PATTERNS) {
const match = message.match(pattern);
if (match) {
candidates.push({
content: extractLearningContent(message, match),
context: getContextForSource(source),
confidence: LEARNING_DEFAULTS.BASE_PATTERN_CONFIDENCE,
category: categorizePattern(pattern),
});
}
}
return candidates;
};
const countKeywords = (text: string): number =>
LEARNING_KEYWORDS.filter((keyword) => text.includes(keyword)).length;
const extractKeywordSentences = (message: string): LearningCandidate[] => {
const candidates: LearningCandidate[] = [];
const sentences = message.split(/[.!?]+/).filter((s) => s.trim());
for (const sentence of sentences) {
const sentenceLower = sentence.toLowerCase();
const keywordCount = countKeywords(sentenceLower);
if (keywordCount >= LEARNING_DEFAULTS.MIN_KEYWORDS_FOR_LEARNING) {
const confidence =
LEARNING_DEFAULTS.BASE_KEYWORD_CONFIDENCE +
keywordCount * LEARNING_DEFAULTS.KEYWORD_CONFIDENCE_INCREMENT;
candidates.push({
content: sentence.trim(),
context: LEARNING_CONTEXTS.MULTIPLE_INDICATORS,
confidence,
category: "general",
});
}
}
return candidates;
};
export const analyzeMessage = (
message: string,
source: MessageSource,
): LearningCandidate[] => {
const candidates: LearningCandidate[] = [];
const lowerMessage = message.toLowerCase();
const patternMatches = findPatternMatches(message, source);
candidates.push(...patternMatches);
const keywordCount = countKeywords(lowerMessage);
if (keywordCount >= LEARNING_DEFAULTS.MIN_KEYWORDS_FOR_LEARNING) {
const keywordSentences = extractKeywordSentences(message);
candidates.push(...keywordSentences);
}
return candidates;
};

View File

@@ -0,0 +1,72 @@
/**
* Assistant response analysis for learning detection
*/
import {
ACKNOWLEDGMENT_PATTERNS,
ACKNOWLEDGMENT_PHRASES,
LEARNING_DEFAULTS,
LEARNING_CONTEXTS,
} from "@constants/learning";
import { analyzeMessage } from "@services/learning/analyze";
import { extractLearningFromAcknowledgment } from "@services/learning/extract";
import type { LearningCandidate } from "@/types/learning";
const findAcknowledgmentMatches = (
userMessage: string,
assistantResponse: string,
): LearningCandidate[] => {
const candidates: LearningCandidate[] = [];
for (const pattern of ACKNOWLEDGMENT_PATTERNS) {
const match = assistantResponse.match(pattern);
if (match) {
candidates.push({
content: extractLearningFromAcknowledgment(userMessage),
context: LEARNING_CONTEXTS.CONVENTION_CONFIRMED,
confidence: LEARNING_DEFAULTS.ACKNOWLEDGMENT_CONFIDENCE,
category: "convention",
});
}
}
return candidates;
};
const hasAcknowledgmentPhrase = (response: string): boolean =>
ACKNOWLEDGMENT_PHRASES.some((phrase) => response.includes(phrase));
const boostConfidence = (candidate: LearningCandidate): LearningCandidate => ({
...candidate,
confidence: Math.min(
candidate.confidence + LEARNING_DEFAULTS.CONFIDENCE_BOOST,
LEARNING_DEFAULTS.MAX_CONFIDENCE,
),
context: LEARNING_CONTEXTS.PREFERENCE_ACKNOWLEDGED,
});
const getAcknowledgedLearnings = (userMessage: string): LearningCandidate[] => {
const userLearnings = analyzeMessage(userMessage, "user");
return userLearnings.map(boostConfidence);
};
export const analyzeAssistantResponse = (
userMessage: string,
assistantResponse: string,
): LearningCandidate[] => {
const candidates: LearningCandidate[] = [];
const lowerResponse = assistantResponse.toLowerCase();
const acknowledgmentMatches = findAcknowledgmentMatches(
userMessage,
assistantResponse,
);
candidates.push(...acknowledgmentMatches);
if (hasAcknowledgmentPhrase(lowerResponse)) {
const acknowledgedLearnings = getAcknowledgedLearnings(userMessage);
candidates.push(...acknowledgedLearnings);
}
return candidates;
};

View File

@@ -0,0 +1,20 @@
/**
* Learning pattern categorization
*/
import { CATEGORY_PATTERNS } from "@constants/learning";
import type { LearningCategory } from "@/types/learning";
const findMatchingCategory = (patternStr: string): LearningCategory | null => {
for (const [keyword, category] of Object.entries(CATEGORY_PATTERNS)) {
if (patternStr.includes(keyword)) {
return category;
}
}
return null;
};
export const categorizePattern = (pattern: RegExp): LearningCategory => {
const patternStr = pattern.toString().toLowerCase();
return findMatchingCategory(patternStr) ?? "general";
};

View File

@@ -0,0 +1,25 @@
/**
* Learning candidate deduplication
*/
import type { LearningCandidate } from "@/types/learning";
const normalizeContent = (content: string): string =>
content.toLowerCase().trim();
export const deduplicateCandidates = (
candidates: LearningCandidate[],
): LearningCandidate[] => {
const seen = new Set<string>();
return candidates.filter((candidate) => {
const key = normalizeContent(candidate.content);
if (seen.has(key)) {
return false;
}
seen.add(key);
return true;
});
};

View File

@@ -0,0 +1,29 @@
/**
* Learning detection orchestration
*/
import { analyzeMessage } from "@services/learning/analyze";
import { analyzeAssistantResponse } from "@services/learning/assistant";
import { deduplicateCandidates } from "@services/learning/deduplicate";
import type { LearningCandidate } from "@/types/learning";
const sortByConfidence = (a: LearningCandidate, b: LearningCandidate): number =>
b.confidence - a.confidence;
export const detectLearnings = (
userMessage: string,
assistantResponse: string,
): LearningCandidate[] => {
const candidates: LearningCandidate[] = [];
const userLearnings = analyzeMessage(userMessage, "user");
candidates.push(...userLearnings);
const assistantLearnings = analyzeAssistantResponse(
userMessage,
assistantResponse,
);
candidates.push(...assistantLearnings);
return deduplicateCandidates(candidates).sort(sortByConfidence);
};

View File

@@ -0,0 +1,240 @@
/**
* Embedding Service
*
* Generates text embeddings using Ollama for semantic search
*/
import got from "got";
import {
EMBEDDING_DEFAULTS,
EMBEDDING_ENDPOINTS,
EMBEDDING_TIMEOUTS,
} from "@constants/embeddings";
import { getOllamaBaseUrl } from "@providers/ollama/state";
import type {
EmbeddingVector,
EmbeddingResult,
EmbeddingError,
EmbeddingServiceState,
OllamaEmbedRequest,
OllamaEmbedResponse,
} from "@/types/embeddings";
// =============================================================================
// Service State
// =============================================================================
let serviceState: EmbeddingServiceState = {
initialized: false,
model: null,
available: false,
error: null,
};
// =============================================================================
// Ollama API
// =============================================================================
const callOllamaEmbed = async (
texts: string[],
model: string,
): Promise<EmbeddingVector[]> => {
const baseUrl = getOllamaBaseUrl();
const endpoint = `${baseUrl}${EMBEDDING_ENDPOINTS.EMBED}`;
const request: OllamaEmbedRequest = {
model,
input: texts,
};
const response = await got
.post(endpoint, {
json: request,
timeout: { request: EMBEDDING_TIMEOUTS.EMBED },
})
.json<OllamaEmbedResponse>();
return response.embeddings;
};
// =============================================================================
// Model Detection
// =============================================================================
const checkModelAvailable = async (model: string): Promise<boolean> => {
try {
// Try to embed a simple test string
await callOllamaEmbed(["test"], model);
return true;
} catch {
return false;
}
};
const findAvailableModel = async (): Promise<string | null> => {
const modelsToTry = [
EMBEDDING_DEFAULTS.MODEL,
EMBEDDING_DEFAULTS.FALLBACK_MODEL,
"mxbai-embed-large",
"snowflake-arctic-embed",
];
for (const model of modelsToTry) {
const available = await checkModelAvailable(model);
if (available) {
return model;
}
}
return null;
};
// =============================================================================
// Service Initialization
// =============================================================================
export const initializeEmbeddingService =
async (): Promise<EmbeddingServiceState> => {
if (serviceState.initialized) {
return serviceState;
}
try {
const model = await findAvailableModel();
if (model) {
serviceState = {
initialized: true,
model,
available: true,
error: null,
};
} else {
serviceState = {
initialized: true,
model: null,
available: false,
error: {
code: "MODEL_NOT_FOUND",
message: `No embedding model found. Install one with: ollama pull ${EMBEDDING_DEFAULTS.MODEL}`,
},
};
}
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
const isConnectionError =
message.includes("ECONNREFUSED") || message.includes("connect");
serviceState = {
initialized: true,
model: null,
available: false,
error: {
code: isConnectionError ? "OLLAMA_NOT_RUNNING" : "EMBEDDING_FAILED",
message: isConnectionError
? "Ollama is not running. Start it with: ollama serve"
: `Embedding service error: ${message}`,
},
};
}
return serviceState;
};
// =============================================================================
// Core Embedding Functions
// =============================================================================
/**
* Generate embedding for a single text
*/
export const embed = async (text: string): Promise<EmbeddingResult | null> => {
if (!serviceState.initialized) {
await initializeEmbeddingService();
}
if (!serviceState.available || !serviceState.model) {
return null;
}
try {
const embeddings = await callOllamaEmbed([text], serviceState.model);
if (embeddings.length === 0) {
return null;
}
return {
text,
embedding: embeddings[0],
model: serviceState.model,
dimensions: embeddings[0].length,
};
} catch {
return null;
}
};
/**
* Generate embeddings for multiple texts (batch)
*/
export const embedBatch = async (
texts: string[],
): Promise<(EmbeddingResult | null)[]> => {
if (!serviceState.initialized) {
await initializeEmbeddingService();
}
if (!serviceState.available || !serviceState.model) {
return texts.map(() => null);
}
try {
const embeddings = await callOllamaEmbed(texts, serviceState.model);
return texts.map((text, i) => {
const embedding = embeddings[i];
if (!embedding) {
return null;
}
return {
text,
embedding,
model: serviceState.model!,
dimensions: embedding.length,
};
});
} catch {
return texts.map(() => null);
}
};
// =============================================================================
// Service State Accessors
// =============================================================================
export const isEmbeddingAvailable = (): boolean => serviceState.available;
export const getEmbeddingModel = (): string | null => serviceState.model;
export const getEmbeddingError = (): EmbeddingError | null =>
serviceState.error;
export const getServiceState = (): EmbeddingServiceState => ({
...serviceState,
});
/**
* Reset service state (for testing)
*/
export const resetEmbeddingService = (): void => {
serviceState = {
initialized: false,
model: null,
available: false,
error: null,
};
};

View File

@@ -0,0 +1,38 @@
/**
* Learning content extraction utilities
*/
import { LEARNING_DEFAULTS } from "@constants/learning";
const splitIntoSentences = (text: string): string[] =>
text.split(/[.!?]+/).filter((s) => s.trim());
const findMatchingSentence = (
sentences: string[],
matchText: string,
): string | null => {
const lowerMatch = matchText.toLowerCase();
return sentences.find((s) => s.toLowerCase().includes(lowerMatch)) ?? null;
};
export const extractLearningContent = (
message: string,
match: RegExpMatchArray,
): string => {
const sentences = splitIntoSentences(message);
const matchingSentence = findMatchingSentence(sentences, match[0]);
return matchingSentence?.trim() ?? match[0];
};
export const extractLearningFromAcknowledgment = (
userMessage: string,
): string => {
const sentences = splitIntoSentences(userMessage);
if (sentences.length > 0) {
return sentences[0].trim();
}
return userMessage.slice(0, LEARNING_DEFAULTS.MAX_SLICE_LENGTH);
};

View File

@@ -0,0 +1,13 @@
/**
* Learning formatting utilities
*/
import { LEARNING_DEFAULTS } from "@constants/learning";
import type { LearningCandidate } from "@/types/learning";
export const formatLearningForPrompt = (
candidate: LearningCandidate,
): string =>
candidate.content.length > LEARNING_DEFAULTS.MAX_CONTENT_LENGTH
? candidate.content.slice(0, LEARNING_DEFAULTS.TRUNCATE_LENGTH) + "..."
: candidate.content;

View File

@@ -0,0 +1,67 @@
/**
* Learning Service Exports
*
* Central export point for all learning-related functionality
*/
// Core persistence
export {
saveLearning,
getLearnings,
learningExists,
} from "@services/learning/persistence";
// Embedding service
export {
initializeEmbeddingService,
embed,
embedBatch,
isEmbeddingAvailable,
getEmbeddingModel,
getEmbeddingError,
getServiceState,
resetEmbeddingService,
} from "@services/learning/embeddings";
// Vector store
export {
cosineSimilarity,
euclideanDistance,
loadIndex,
saveIndex,
upsertEmbedding,
removeEmbedding,
hasEmbedding,
getEmbedding,
findSimilar,
findAboveThreshold,
getIndexStats,
} from "@services/learning/vector-store";
// Semantic search
export {
indexLearning,
unindexLearning,
isLearningIndexed,
searchLearnings,
rebuildIndex,
clearIndexCache,
getIndexStatistics,
} from "@services/learning/semantic-search";
// Re-export types
export type {
StoredLearning,
LearningCandidate,
LearningCategory,
} from "@/types/learning";
export type {
EmbeddingVector,
EmbeddingResult,
EmbeddingIndex,
StoredEmbedding,
SimilarityResult,
SemanticSearchResult,
SemanticSearchOptions,
} from "@/types/embeddings";

View File

@@ -0,0 +1,48 @@
/**
* Learning persistence operations
*/
import { projectConfig } from "@services/project-config";
import type { StoredLearning } from "@/types/learning";
import { indexLearning } from "@services/learning/semantic-search";
export const saveLearning = async (
content: string,
context?: string,
global = false,
): Promise<void> => {
// Save the learning
const learning = await projectConfig.addLearning(content, context, global);
// Index for semantic search (non-blocking, don't fail if embeddings unavailable)
if (learning) {
indexLearning(learning, global).catch(() => {
// Silently ignore embedding failures
});
}
};
export const getLearnings = async (): Promise<StoredLearning[]> =>
projectConfig.getLearnings();
const normalizeForComparison = (text: string): string =>
text.toLowerCase().trim();
const isSimilarContent = (existing: string, newContent: string): boolean => {
const normalizedExisting = normalizeForComparison(existing);
const normalizedNew = normalizeForComparison(newContent);
return (
normalizedExisting === normalizedNew ||
normalizedExisting.includes(normalizedNew) ||
normalizedNew.includes(normalizedExisting)
);
};
export const learningExists = async (content: string): Promise<boolean> => {
const learnings = await getLearnings();
return learnings.some((learning) =>
isSimilarContent(learning.content, content),
);
};

View File

@@ -0,0 +1,386 @@
/**
* Semantic Search Service
*
* High-level API for semantic learning retrieval
*/
import * as path from "path";
import { EMBEDDING_SEARCH } from "@constants/embeddings";
import {
getGlobalConfigDir,
getLocalConfigDir,
} from "@services/project-config";
import type { StoredLearning } from "@/types/learning";
import type {
EmbeddingIndex,
SemanticSearchResult,
SemanticSearchOptions,
SimilarityResult,
} from "@/types/embeddings";
import {
embed,
isEmbeddingAvailable,
initializeEmbeddingService,
getEmbeddingModel,
} from "@services/learning/embeddings";
import {
loadIndex,
saveIndex,
upsertEmbedding,
removeEmbedding,
findSimilar,
hasEmbedding,
} from "@services/learning/vector-store";
// =============================================================================
// Index Management
// =============================================================================
let globalIndex: EmbeddingIndex | null = null;
let localIndex: EmbeddingIndex | null = null;
const getGlobalIndexDir = (): string =>
path.join(getGlobalConfigDir(), "learnings");
const getLocalIndexDir = (): string =>
path.join(getLocalConfigDir(), "learnings");
/**
* Initialize or get the global embedding index
*/
const getGlobalIndex = async (): Promise<EmbeddingIndex | null> => {
if (!isEmbeddingAvailable()) {
return null;
}
if (globalIndex) {
return globalIndex;
}
const model = getEmbeddingModel();
if (!model) {
return null;
}
globalIndex = await loadIndex(getGlobalIndexDir(), model);
return globalIndex;
};
/**
* Initialize or get the local embedding index
*/
const getLocalIndex = async (): Promise<EmbeddingIndex | null> => {
if (!isEmbeddingAvailable()) {
return null;
}
if (localIndex) {
return localIndex;
}
const model = getEmbeddingModel();
if (!model) {
return null;
}
localIndex = await loadIndex(getLocalIndexDir(), model);
return localIndex;
};
/**
* Save both indexes to disk
*/
const persistIndexes = async (): Promise<void> => {
if (globalIndex) {
await saveIndex(getGlobalIndexDir(), globalIndex);
}
if (localIndex) {
await saveIndex(getLocalIndexDir(), localIndex);
}
};
// =============================================================================
// Embedding Management
// =============================================================================
/**
* Add embedding for a learning
*/
export const indexLearning = async (
learning: StoredLearning,
global: boolean,
): Promise<boolean> => {
await initializeEmbeddingService();
if (!isEmbeddingAvailable()) {
return false;
}
const result = await embed(learning.content);
if (!result) {
return false;
}
const index = global ? await getGlobalIndex() : await getLocalIndex();
if (!index) {
return false;
}
const updatedIndex = upsertEmbedding(index, learning.id, result.embedding);
if (global) {
globalIndex = updatedIndex;
} else {
localIndex = updatedIndex;
}
await persistIndexes();
return true;
};
/**
* Remove embedding for a learning
*/
export const unindexLearning = async (
learningId: string,
global: boolean,
): Promise<void> => {
const index = global ? await getGlobalIndex() : await getLocalIndex();
if (!index) {
return;
}
const updatedIndex = removeEmbedding(index, learningId);
if (global) {
globalIndex = updatedIndex;
} else {
localIndex = updatedIndex;
}
await persistIndexes();
};
/**
* Check if a learning has an embedding
*/
export const isLearningIndexed = async (
learningId: string,
): Promise<boolean> => {
const gIndex = await getGlobalIndex();
const lIndex = await getLocalIndex();
return (
(gIndex !== null && hasEmbedding(gIndex, learningId)) ||
(lIndex !== null && hasEmbedding(lIndex, learningId))
);
};
// =============================================================================
// Semantic Search
// =============================================================================
/**
* Search learnings by semantic similarity
*/
export const searchLearnings = async (
query: string,
learnings: StoredLearning[],
options: SemanticSearchOptions = {},
): Promise<SemanticSearchResult<StoredLearning>[]> => {
const {
topK = EMBEDDING_SEARCH.TOP_K,
minSimilarity = EMBEDDING_SEARCH.MIN_SIMILARITY,
} = options;
await initializeEmbeddingService();
if (!isEmbeddingAvailable()) {
// Fallback to keyword matching
return fallbackKeywordSearch(query, learnings, topK);
}
// Embed the query
const queryResult = await embed(query);
if (!queryResult) {
return fallbackKeywordSearch(query, learnings, topK);
}
// Search both indexes
const gIndex = await getGlobalIndex();
const lIndex = await getLocalIndex();
const allResults: SimilarityResult[] = [];
if (gIndex) {
allResults.push(
...findSimilar(gIndex, queryResult.embedding, topK * 2, minSimilarity),
);
}
if (lIndex) {
allResults.push(
...findSimilar(lIndex, queryResult.embedding, topK * 2, minSimilarity),
);
}
// Deduplicate and sort
const seen = new Set<string>();
const uniqueResults: SimilarityResult[] = [];
for (const result of allResults.sort((a, b) => b.score - a.score)) {
if (!seen.has(result.id)) {
seen.add(result.id);
uniqueResults.push(result);
}
}
// Map results to learnings
const learningMap = new Map(learnings.map((l) => [l.id, l]));
const searchResults: SemanticSearchResult<StoredLearning>[] = [];
for (let i = 0; i < Math.min(uniqueResults.length, topK); i++) {
const result = uniqueResults[i];
const learning = learningMap.get(result.id);
if (learning) {
searchResults.push({
item: learning,
score: result.score,
rank: i + 1,
});
}
}
return searchResults;
};
// =============================================================================
// Fallback Search
// =============================================================================
/**
* Simple keyword-based search as fallback when embeddings unavailable
*/
const fallbackKeywordSearch = (
query: string,
learnings: StoredLearning[],
topK: number,
): SemanticSearchResult<StoredLearning>[] => {
const queryTokens = tokenize(query);
if (queryTokens.length === 0) {
// Return most recent if no query tokens
return learnings.slice(0, topK).map((item, i) => ({
item,
score: 1 - i * 0.05,
rank: i + 1,
}));
}
// Score each learning by token overlap
const scored = learnings.map((learning) => {
const contentTokens = tokenize(learning.content);
const overlap = queryTokens.filter((t) => contentTokens.includes(t)).length;
const score = overlap / Math.max(queryTokens.length, 1);
return { learning, score };
});
// Sort by score and return top K
return scored
.filter((s) => s.score > 0)
.sort((a, b) => b.score - a.score)
.slice(0, topK)
.map((s, i) => ({
item: s.learning,
score: s.score,
rank: i + 1,
}));
};
/**
* Simple tokenizer for fallback search
*/
const tokenize = (text: string): string[] =>
text
.toLowerCase()
.split(/\W+/)
.filter((t) => t.length > 2);
// =============================================================================
// Index Rebuilding
// =============================================================================
/**
* Rebuild all embeddings for existing learnings
*/
export const rebuildIndex = async (
learnings: StoredLearning[],
global: boolean,
onProgress?: (current: number, total: number) => void,
): Promise<{ indexed: number; failed: number }> => {
await initializeEmbeddingService();
if (!isEmbeddingAvailable()) {
return { indexed: 0, failed: learnings.length };
}
let indexed = 0;
let failed = 0;
for (let i = 0; i < learnings.length; i++) {
const learning = learnings[i];
const success = await indexLearning(learning, global);
if (success) {
indexed++;
} else {
failed++;
}
onProgress?.(i + 1, learnings.length);
}
return { indexed, failed };
};
// =============================================================================
// Cache Management
// =============================================================================
/**
* Clear in-memory index cache
*/
export const clearIndexCache = (): void => {
globalIndex = null;
localIndex = null;
};
/**
* Get index statistics
*/
export const getIndexStatistics = async (): Promise<{
global: { count: number; model: string } | null;
local: { count: number; model: string } | null;
embeddingsAvailable: boolean;
}> => {
await initializeEmbeddingService();
const gIndex = await getGlobalIndex();
const lIndex = await getLocalIndex();
return {
global: gIndex
? { count: Object.keys(gIndex.embeddings).length, model: gIndex.model }
: null,
local: lIndex
? { count: Object.keys(lIndex.embeddings).length, model: lIndex.model }
: null,
embeddingsAvailable: isEmbeddingAvailable(),
};
};

View File

@@ -0,0 +1,243 @@
/**
* Vector Store
*
* Stores and searches embeddings for semantic retrieval
*/
import * as fs from "fs/promises";
import * as path from "path";
import { EMBEDDING_STORAGE, EMBEDDING_SEARCH } from "@constants/embeddings";
import type {
EmbeddingVector,
EmbeddingIndex,
StoredEmbedding,
SimilarityResult,
} from "@/types/embeddings";
import { createEmptyIndex } from "@/types/embeddings";
// =============================================================================
// Vector Math
// =============================================================================
/**
* Compute cosine similarity between two vectors
* Returns value between -1 and 1 (1 = identical, 0 = orthogonal, -1 = opposite)
*/
export const cosineSimilarity = (
a: EmbeddingVector,
b: EmbeddingVector,
): number => {
if (a.length !== b.length) {
return 0;
}
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
if (magnitude === 0) {
return 0;
}
return dotProduct / magnitude;
};
/**
* Compute Euclidean distance between two vectors
*/
export const euclideanDistance = (
a: EmbeddingVector,
b: EmbeddingVector,
): number => {
if (a.length !== b.length) {
return Infinity;
}
let sum = 0;
for (let i = 0; i < a.length; i++) {
const diff = a[i] - b[i];
sum += diff * diff;
}
return Math.sqrt(sum);
};
// =============================================================================
// Index File Operations
// =============================================================================
const getIndexPath = (baseDir: string): string =>
path.join(baseDir, EMBEDDING_STORAGE.INDEX_FILE);
/**
* Load embedding index from disk
*/
export const loadIndex = async (
baseDir: string,
model: string,
): Promise<EmbeddingIndex> => {
const indexPath = getIndexPath(baseDir);
try {
const data = await fs.readFile(indexPath, "utf-8");
const index = JSON.parse(data) as EmbeddingIndex;
// Check version and model compatibility
if (index.version !== EMBEDDING_STORAGE.VERSION || index.model !== model) {
// Index is incompatible, create new one
return createEmptyIndex(model);
}
return index;
} catch {
// Index doesn't exist or is invalid
return createEmptyIndex(model);
}
};
/**
* Save embedding index to disk
*/
export const saveIndex = async (
baseDir: string,
index: EmbeddingIndex,
): Promise<void> => {
const indexPath = getIndexPath(baseDir);
await fs.mkdir(baseDir, { recursive: true });
await fs.writeFile(indexPath, JSON.stringify(index, null, 2), "utf-8");
};
// =============================================================================
// Index Operations
// =============================================================================
/**
* Add or update an embedding in the index
*/
export const upsertEmbedding = (
index: EmbeddingIndex,
id: string,
embedding: EmbeddingVector,
): EmbeddingIndex => {
const stored: StoredEmbedding = {
id,
embedding,
model: index.model,
createdAt: Date.now(),
};
return {
...index,
embeddings: {
...index.embeddings,
[id]: stored,
},
lastUpdated: Date.now(),
};
};
/**
* Remove an embedding from the index
*/
export const removeEmbedding = (
index: EmbeddingIndex,
id: string,
): EmbeddingIndex => {
const { [id]: _, ...remaining } = index.embeddings;
return {
...index,
embeddings: remaining,
lastUpdated: Date.now(),
};
};
/**
* Check if an embedding exists in the index
*/
export const hasEmbedding = (index: EmbeddingIndex, id: string): boolean =>
id in index.embeddings;
/**
* Get an embedding from the index
*/
export const getEmbedding = (
index: EmbeddingIndex,
id: string,
): StoredEmbedding | null => index.embeddings[id] ?? null;
// =============================================================================
// Similarity Search
// =============================================================================
/**
* Find the most similar embeddings to a query vector
*/
export const findSimilar = (
index: EmbeddingIndex,
queryVector: EmbeddingVector,
topK: number = EMBEDDING_SEARCH.TOP_K,
minSimilarity: number = EMBEDDING_SEARCH.MIN_SIMILARITY,
): SimilarityResult[] => {
const results: SimilarityResult[] = [];
for (const [id, stored] of Object.entries(index.embeddings)) {
const score = cosineSimilarity(queryVector, stored.embedding);
if (score >= minSimilarity) {
results.push({ id, score });
}
}
// Sort by score descending and take top K
return results.sort((a, b) => b.score - a.score).slice(0, topK);
};
/**
* Find all embeddings above a similarity threshold
*/
export const findAboveThreshold = (
index: EmbeddingIndex,
queryVector: EmbeddingVector,
threshold: number,
): SimilarityResult[] => {
const results: SimilarityResult[] = [];
for (const [id, stored] of Object.entries(index.embeddings)) {
const score = cosineSimilarity(queryVector, stored.embedding);
if (score >= threshold) {
results.push({ id, score });
}
}
return results.sort((a, b) => b.score - a.score);
};
// =============================================================================
// Index Statistics
// =============================================================================
export const getIndexStats = (
index: EmbeddingIndex,
): {
count: number;
model: string;
lastUpdated: number;
} => ({
count: Object.keys(index.embeddings).length,
model: index.model,
lastUpdated: index.lastUpdated,
});