Terminal-based AI coding agent with interactive TUI for autonomous code generation.
Features: - Interactive TUI with React/Ink - Autonomous agent with tool calls (bash, read, write, edit, glob, grep) - Permission system with pattern-based rules - Session management with auto-compaction - Dual providers: GitHub Copilot and Ollama - MCP server integration - Todo panel and theme system - Streaming responses - GitHub-compatible project context
This commit is contained in:
231
src/services/learning/__tests__/vector-store.test.ts
Normal file
231
src/services/learning/__tests__/vector-store.test.ts
Normal file
@@ -0,0 +1,231 @@
|
||||
/**
|
||||
* Unit tests for Vector Store
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "bun:test";
|
||||
|
||||
import {
|
||||
cosineSimilarity,
|
||||
euclideanDistance,
|
||||
upsertEmbedding,
|
||||
removeEmbedding,
|
||||
hasEmbedding,
|
||||
getEmbedding,
|
||||
findSimilar,
|
||||
findAboveThreshold,
|
||||
getIndexStats,
|
||||
} from "@services/learning/vector-store";
|
||||
|
||||
import { createEmptyIndex } from "@/types/embeddings";
|
||||
|
||||
describe("Vector Store", () => {
|
||||
describe("cosineSimilarity", () => {
|
||||
it("should return 1 for identical vectors", () => {
|
||||
const a = [1, 0, 0];
|
||||
const b = [1, 0, 0];
|
||||
|
||||
expect(cosineSimilarity(a, b)).toBeCloseTo(1);
|
||||
});
|
||||
|
||||
it("should return 0 for orthogonal vectors", () => {
|
||||
const a = [1, 0, 0];
|
||||
const b = [0, 1, 0];
|
||||
|
||||
expect(cosineSimilarity(a, b)).toBeCloseTo(0);
|
||||
});
|
||||
|
||||
it("should return -1 for opposite vectors", () => {
|
||||
const a = [1, 0, 0];
|
||||
const b = [-1, 0, 0];
|
||||
|
||||
expect(cosineSimilarity(a, b)).toBeCloseTo(-1);
|
||||
});
|
||||
|
||||
it("should handle normalized vectors", () => {
|
||||
const a = [0.6, 0.8, 0];
|
||||
const b = [0.8, 0.6, 0];
|
||||
|
||||
const similarity = cosineSimilarity(a, b);
|
||||
expect(similarity).toBeGreaterThan(0);
|
||||
expect(similarity).toBeLessThan(1);
|
||||
});
|
||||
|
||||
it("should return 0 for mismatched lengths", () => {
|
||||
const a = [1, 0, 0];
|
||||
const b = [1, 0];
|
||||
|
||||
expect(cosineSimilarity(a, b)).toBe(0);
|
||||
});
|
||||
|
||||
it("should handle zero vectors", () => {
|
||||
const a = [0, 0, 0];
|
||||
const b = [1, 0, 0];
|
||||
|
||||
expect(cosineSimilarity(a, b)).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("euclideanDistance", () => {
|
||||
it("should return 0 for identical vectors", () => {
|
||||
const a = [1, 2, 3];
|
||||
const b = [1, 2, 3];
|
||||
|
||||
expect(euclideanDistance(a, b)).toBe(0);
|
||||
});
|
||||
|
||||
it("should compute correct distance", () => {
|
||||
const a = [0, 0, 0];
|
||||
const b = [3, 4, 0];
|
||||
|
||||
expect(euclideanDistance(a, b)).toBe(5);
|
||||
});
|
||||
|
||||
it("should return Infinity for mismatched lengths", () => {
|
||||
const a = [1, 0, 0];
|
||||
const b = [1, 0];
|
||||
|
||||
expect(euclideanDistance(a, b)).toBe(Infinity);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Index Operations", () => {
|
||||
it("should create empty index", () => {
|
||||
const index = createEmptyIndex("test-model");
|
||||
|
||||
expect(index.version).toBe(1);
|
||||
expect(index.model).toBe("test-model");
|
||||
expect(Object.keys(index.embeddings)).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("should upsert embedding", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
const embedding = [0.1, 0.2, 0.3];
|
||||
|
||||
index = upsertEmbedding(index, "learn_1", embedding);
|
||||
|
||||
expect(hasEmbedding(index, "learn_1")).toBe(true);
|
||||
expect(getEmbedding(index, "learn_1")?.embedding).toEqual(embedding);
|
||||
});
|
||||
|
||||
it("should update existing embedding", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
const embedding1 = [0.1, 0.2, 0.3];
|
||||
const embedding2 = [0.4, 0.5, 0.6];
|
||||
|
||||
index = upsertEmbedding(index, "learn_1", embedding1);
|
||||
index = upsertEmbedding(index, "learn_1", embedding2);
|
||||
|
||||
expect(getEmbedding(index, "learn_1")?.embedding).toEqual(embedding2);
|
||||
});
|
||||
|
||||
it("should remove embedding", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
index = upsertEmbedding(index, "learn_1", [0.1, 0.2, 0.3]);
|
||||
index = upsertEmbedding(index, "learn_2", [0.4, 0.5, 0.6]);
|
||||
|
||||
index = removeEmbedding(index, "learn_1");
|
||||
|
||||
expect(hasEmbedding(index, "learn_1")).toBe(false);
|
||||
expect(hasEmbedding(index, "learn_2")).toBe(true);
|
||||
});
|
||||
|
||||
it("should return null for missing embedding", () => {
|
||||
const index = createEmptyIndex("test-model");
|
||||
|
||||
expect(getEmbedding(index, "nonexistent")).toBeNull();
|
||||
});
|
||||
|
||||
it("should track index stats", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
index = upsertEmbedding(index, "learn_1", [0.1, 0.2, 0.3]);
|
||||
index = upsertEmbedding(index, "learn_2", [0.4, 0.5, 0.6]);
|
||||
|
||||
const stats = getIndexStats(index);
|
||||
|
||||
expect(stats.count).toBe(2);
|
||||
expect(stats.model).toBe("test-model");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Similarity Search", () => {
|
||||
it("should find similar embeddings", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
|
||||
// Add embeddings with known similarities
|
||||
index = upsertEmbedding(index, "a", [1, 0, 0]);
|
||||
index = upsertEmbedding(index, "b", [0.9, 0.1, 0]);
|
||||
index = upsertEmbedding(index, "c", [0, 1, 0]);
|
||||
|
||||
const query = [1, 0, 0];
|
||||
const results = findSimilar(index, query, 2, 0);
|
||||
|
||||
expect(results).toHaveLength(2);
|
||||
expect(results[0].id).toBe("a");
|
||||
expect(results[0].score).toBeCloseTo(1);
|
||||
expect(results[1].id).toBe("b");
|
||||
});
|
||||
|
||||
it("should respect minSimilarity threshold", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
|
||||
index = upsertEmbedding(index, "a", [1, 0, 0]);
|
||||
index = upsertEmbedding(index, "b", [0, 1, 0]);
|
||||
|
||||
const query = [1, 0, 0];
|
||||
const results = findSimilar(index, query, 10, 0.5);
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].id).toBe("a");
|
||||
});
|
||||
|
||||
it("should limit results to topK", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const embedding = [Math.random(), Math.random(), Math.random()];
|
||||
index = upsertEmbedding(index, `learn_${i}`, embedding);
|
||||
}
|
||||
|
||||
const query = [0.5, 0.5, 0.5];
|
||||
const results = findSimilar(index, query, 3, 0);
|
||||
|
||||
expect(results.length).toBeLessThanOrEqual(3);
|
||||
});
|
||||
|
||||
it("should find all above threshold", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
|
||||
index = upsertEmbedding(index, "a", [1, 0, 0]);
|
||||
index = upsertEmbedding(index, "b", [0.95, 0.05, 0]);
|
||||
index = upsertEmbedding(index, "c", [0.9, 0.1, 0]);
|
||||
index = upsertEmbedding(index, "d", [0, 1, 0]);
|
||||
|
||||
const query = [1, 0, 0];
|
||||
const results = findAboveThreshold(index, query, 0.85);
|
||||
|
||||
expect(results.length).toBe(3);
|
||||
expect(results.map((r) => r.id)).toContain("a");
|
||||
expect(results.map((r) => r.id)).toContain("b");
|
||||
expect(results.map((r) => r.id)).toContain("c");
|
||||
});
|
||||
|
||||
it("should return empty array for no matches", () => {
|
||||
let index = createEmptyIndex("test-model");
|
||||
|
||||
index = upsertEmbedding(index, "a", [1, 0, 0]);
|
||||
|
||||
const query = [-1, 0, 0];
|
||||
const results = findSimilar(index, query, 10, 0.5);
|
||||
|
||||
expect(results).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("should handle empty index", () => {
|
||||
const index = createEmptyIndex("test-model");
|
||||
const query = [1, 0, 0];
|
||||
const results = findSimilar(index, query, 10, 0);
|
||||
|
||||
expect(results).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
86
src/services/learning/analyze.ts
Normal file
86
src/services/learning/analyze.ts
Normal file
@@ -0,0 +1,86 @@
|
||||
/**
|
||||
* Message analysis for learning detection
|
||||
*/
|
||||
|
||||
import {
|
||||
LEARNING_PATTERNS,
|
||||
LEARNING_KEYWORDS,
|
||||
LEARNING_DEFAULTS,
|
||||
LEARNING_CONTEXTS,
|
||||
} from "@constants/learning";
|
||||
import { categorizePattern } from "@services/learning/categorize";
|
||||
import { extractLearningContent } from "@services/learning/extract";
|
||||
import type { LearningCandidate, MessageSource } from "@/types/learning";
|
||||
|
||||
const getContextForSource = (source: MessageSource): string =>
|
||||
source === "user"
|
||||
? LEARNING_CONTEXTS.USER_PREFERENCE
|
||||
: LEARNING_CONTEXTS.CONVENTION_IDENTIFIED;
|
||||
|
||||
const findPatternMatches = (
|
||||
message: string,
|
||||
source: MessageSource,
|
||||
): LearningCandidate[] => {
|
||||
const candidates: LearningCandidate[] = [];
|
||||
|
||||
for (const pattern of LEARNING_PATTERNS) {
|
||||
const match = message.match(pattern);
|
||||
if (match) {
|
||||
candidates.push({
|
||||
content: extractLearningContent(message, match),
|
||||
context: getContextForSource(source),
|
||||
confidence: LEARNING_DEFAULTS.BASE_PATTERN_CONFIDENCE,
|
||||
category: categorizePattern(pattern),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return candidates;
|
||||
};
|
||||
|
||||
const countKeywords = (text: string): number =>
|
||||
LEARNING_KEYWORDS.filter((keyword) => text.includes(keyword)).length;
|
||||
|
||||
const extractKeywordSentences = (message: string): LearningCandidate[] => {
|
||||
const candidates: LearningCandidate[] = [];
|
||||
const sentences = message.split(/[.!?]+/).filter((s) => s.trim());
|
||||
|
||||
for (const sentence of sentences) {
|
||||
const sentenceLower = sentence.toLowerCase();
|
||||
const keywordCount = countKeywords(sentenceLower);
|
||||
|
||||
if (keywordCount >= LEARNING_DEFAULTS.MIN_KEYWORDS_FOR_LEARNING) {
|
||||
const confidence =
|
||||
LEARNING_DEFAULTS.BASE_KEYWORD_CONFIDENCE +
|
||||
keywordCount * LEARNING_DEFAULTS.KEYWORD_CONFIDENCE_INCREMENT;
|
||||
|
||||
candidates.push({
|
||||
content: sentence.trim(),
|
||||
context: LEARNING_CONTEXTS.MULTIPLE_INDICATORS,
|
||||
confidence,
|
||||
category: "general",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return candidates;
|
||||
};
|
||||
|
||||
export const analyzeMessage = (
|
||||
message: string,
|
||||
source: MessageSource,
|
||||
): LearningCandidate[] => {
|
||||
const candidates: LearningCandidate[] = [];
|
||||
const lowerMessage = message.toLowerCase();
|
||||
|
||||
const patternMatches = findPatternMatches(message, source);
|
||||
candidates.push(...patternMatches);
|
||||
|
||||
const keywordCount = countKeywords(lowerMessage);
|
||||
if (keywordCount >= LEARNING_DEFAULTS.MIN_KEYWORDS_FOR_LEARNING) {
|
||||
const keywordSentences = extractKeywordSentences(message);
|
||||
candidates.push(...keywordSentences);
|
||||
}
|
||||
|
||||
return candidates;
|
||||
};
|
||||
72
src/services/learning/assistant.ts
Normal file
72
src/services/learning/assistant.ts
Normal file
@@ -0,0 +1,72 @@
|
||||
/**
|
||||
* Assistant response analysis for learning detection
|
||||
*/
|
||||
|
||||
import {
|
||||
ACKNOWLEDGMENT_PATTERNS,
|
||||
ACKNOWLEDGMENT_PHRASES,
|
||||
LEARNING_DEFAULTS,
|
||||
LEARNING_CONTEXTS,
|
||||
} from "@constants/learning";
|
||||
import { analyzeMessage } from "@services/learning/analyze";
|
||||
import { extractLearningFromAcknowledgment } from "@services/learning/extract";
|
||||
import type { LearningCandidate } from "@/types/learning";
|
||||
|
||||
const findAcknowledgmentMatches = (
|
||||
userMessage: string,
|
||||
assistantResponse: string,
|
||||
): LearningCandidate[] => {
|
||||
const candidates: LearningCandidate[] = [];
|
||||
|
||||
for (const pattern of ACKNOWLEDGMENT_PATTERNS) {
|
||||
const match = assistantResponse.match(pattern);
|
||||
if (match) {
|
||||
candidates.push({
|
||||
content: extractLearningFromAcknowledgment(userMessage),
|
||||
context: LEARNING_CONTEXTS.CONVENTION_CONFIRMED,
|
||||
confidence: LEARNING_DEFAULTS.ACKNOWLEDGMENT_CONFIDENCE,
|
||||
category: "convention",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return candidates;
|
||||
};
|
||||
|
||||
const hasAcknowledgmentPhrase = (response: string): boolean =>
|
||||
ACKNOWLEDGMENT_PHRASES.some((phrase) => response.includes(phrase));
|
||||
|
||||
const boostConfidence = (candidate: LearningCandidate): LearningCandidate => ({
|
||||
...candidate,
|
||||
confidence: Math.min(
|
||||
candidate.confidence + LEARNING_DEFAULTS.CONFIDENCE_BOOST,
|
||||
LEARNING_DEFAULTS.MAX_CONFIDENCE,
|
||||
),
|
||||
context: LEARNING_CONTEXTS.PREFERENCE_ACKNOWLEDGED,
|
||||
});
|
||||
|
||||
const getAcknowledgedLearnings = (userMessage: string): LearningCandidate[] => {
|
||||
const userLearnings = analyzeMessage(userMessage, "user");
|
||||
return userLearnings.map(boostConfidence);
|
||||
};
|
||||
|
||||
export const analyzeAssistantResponse = (
|
||||
userMessage: string,
|
||||
assistantResponse: string,
|
||||
): LearningCandidate[] => {
|
||||
const candidates: LearningCandidate[] = [];
|
||||
const lowerResponse = assistantResponse.toLowerCase();
|
||||
|
||||
const acknowledgmentMatches = findAcknowledgmentMatches(
|
||||
userMessage,
|
||||
assistantResponse,
|
||||
);
|
||||
candidates.push(...acknowledgmentMatches);
|
||||
|
||||
if (hasAcknowledgmentPhrase(lowerResponse)) {
|
||||
const acknowledgedLearnings = getAcknowledgedLearnings(userMessage);
|
||||
candidates.push(...acknowledgedLearnings);
|
||||
}
|
||||
|
||||
return candidates;
|
||||
};
|
||||
20
src/services/learning/categorize.ts
Normal file
20
src/services/learning/categorize.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
/**
|
||||
* Learning pattern categorization
|
||||
*/
|
||||
|
||||
import { CATEGORY_PATTERNS } from "@constants/learning";
|
||||
import type { LearningCategory } from "@/types/learning";
|
||||
|
||||
const findMatchingCategory = (patternStr: string): LearningCategory | null => {
|
||||
for (const [keyword, category] of Object.entries(CATEGORY_PATTERNS)) {
|
||||
if (patternStr.includes(keyword)) {
|
||||
return category;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
export const categorizePattern = (pattern: RegExp): LearningCategory => {
|
||||
const patternStr = pattern.toString().toLowerCase();
|
||||
return findMatchingCategory(patternStr) ?? "general";
|
||||
};
|
||||
25
src/services/learning/deduplicate.ts
Normal file
25
src/services/learning/deduplicate.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
/**
|
||||
* Learning candidate deduplication
|
||||
*/
|
||||
|
||||
import type { LearningCandidate } from "@/types/learning";
|
||||
|
||||
const normalizeContent = (content: string): string =>
|
||||
content.toLowerCase().trim();
|
||||
|
||||
export const deduplicateCandidates = (
|
||||
candidates: LearningCandidate[],
|
||||
): LearningCandidate[] => {
|
||||
const seen = new Set<string>();
|
||||
|
||||
return candidates.filter((candidate) => {
|
||||
const key = normalizeContent(candidate.content);
|
||||
|
||||
if (seen.has(key)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
seen.add(key);
|
||||
return true;
|
||||
});
|
||||
};
|
||||
29
src/services/learning/detect.ts
Normal file
29
src/services/learning/detect.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
/**
|
||||
* Learning detection orchestration
|
||||
*/
|
||||
|
||||
import { analyzeMessage } from "@services/learning/analyze";
|
||||
import { analyzeAssistantResponse } from "@services/learning/assistant";
|
||||
import { deduplicateCandidates } from "@services/learning/deduplicate";
|
||||
import type { LearningCandidate } from "@/types/learning";
|
||||
|
||||
const sortByConfidence = (a: LearningCandidate, b: LearningCandidate): number =>
|
||||
b.confidence - a.confidence;
|
||||
|
||||
export const detectLearnings = (
|
||||
userMessage: string,
|
||||
assistantResponse: string,
|
||||
): LearningCandidate[] => {
|
||||
const candidates: LearningCandidate[] = [];
|
||||
|
||||
const userLearnings = analyzeMessage(userMessage, "user");
|
||||
candidates.push(...userLearnings);
|
||||
|
||||
const assistantLearnings = analyzeAssistantResponse(
|
||||
userMessage,
|
||||
assistantResponse,
|
||||
);
|
||||
candidates.push(...assistantLearnings);
|
||||
|
||||
return deduplicateCandidates(candidates).sort(sortByConfidence);
|
||||
};
|
||||
240
src/services/learning/embeddings.ts
Normal file
240
src/services/learning/embeddings.ts
Normal file
@@ -0,0 +1,240 @@
|
||||
/**
|
||||
* Embedding Service
|
||||
*
|
||||
* Generates text embeddings using Ollama for semantic search
|
||||
*/
|
||||
|
||||
import got from "got";
|
||||
|
||||
import {
|
||||
EMBEDDING_DEFAULTS,
|
||||
EMBEDDING_ENDPOINTS,
|
||||
EMBEDDING_TIMEOUTS,
|
||||
} from "@constants/embeddings";
|
||||
import { getOllamaBaseUrl } from "@providers/ollama/state";
|
||||
|
||||
import type {
|
||||
EmbeddingVector,
|
||||
EmbeddingResult,
|
||||
EmbeddingError,
|
||||
EmbeddingServiceState,
|
||||
OllamaEmbedRequest,
|
||||
OllamaEmbedResponse,
|
||||
} from "@/types/embeddings";
|
||||
|
||||
// =============================================================================
|
||||
// Service State
|
||||
// =============================================================================
|
||||
|
||||
let serviceState: EmbeddingServiceState = {
|
||||
initialized: false,
|
||||
model: null,
|
||||
available: false,
|
||||
error: null,
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Ollama API
|
||||
// =============================================================================
|
||||
|
||||
const callOllamaEmbed = async (
|
||||
texts: string[],
|
||||
model: string,
|
||||
): Promise<EmbeddingVector[]> => {
|
||||
const baseUrl = getOllamaBaseUrl();
|
||||
const endpoint = `${baseUrl}${EMBEDDING_ENDPOINTS.EMBED}`;
|
||||
|
||||
const request: OllamaEmbedRequest = {
|
||||
model,
|
||||
input: texts,
|
||||
};
|
||||
|
||||
const response = await got
|
||||
.post(endpoint, {
|
||||
json: request,
|
||||
timeout: { request: EMBEDDING_TIMEOUTS.EMBED },
|
||||
})
|
||||
.json<OllamaEmbedResponse>();
|
||||
|
||||
return response.embeddings;
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Model Detection
|
||||
// =============================================================================
|
||||
|
||||
const checkModelAvailable = async (model: string): Promise<boolean> => {
|
||||
try {
|
||||
// Try to embed a simple test string
|
||||
await callOllamaEmbed(["test"], model);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
const findAvailableModel = async (): Promise<string | null> => {
|
||||
const modelsToTry = [
|
||||
EMBEDDING_DEFAULTS.MODEL,
|
||||
EMBEDDING_DEFAULTS.FALLBACK_MODEL,
|
||||
"mxbai-embed-large",
|
||||
"snowflake-arctic-embed",
|
||||
];
|
||||
|
||||
for (const model of modelsToTry) {
|
||||
const available = await checkModelAvailable(model);
|
||||
if (available) {
|
||||
return model;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Service Initialization
|
||||
// =============================================================================
|
||||
|
||||
export const initializeEmbeddingService =
|
||||
async (): Promise<EmbeddingServiceState> => {
|
||||
if (serviceState.initialized) {
|
||||
return serviceState;
|
||||
}
|
||||
|
||||
try {
|
||||
const model = await findAvailableModel();
|
||||
|
||||
if (model) {
|
||||
serviceState = {
|
||||
initialized: true,
|
||||
model,
|
||||
available: true,
|
||||
error: null,
|
||||
};
|
||||
} else {
|
||||
serviceState = {
|
||||
initialized: true,
|
||||
model: null,
|
||||
available: false,
|
||||
error: {
|
||||
code: "MODEL_NOT_FOUND",
|
||||
message: `No embedding model found. Install one with: ollama pull ${EMBEDDING_DEFAULTS.MODEL}`,
|
||||
},
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
const isConnectionError =
|
||||
message.includes("ECONNREFUSED") || message.includes("connect");
|
||||
|
||||
serviceState = {
|
||||
initialized: true,
|
||||
model: null,
|
||||
available: false,
|
||||
error: {
|
||||
code: isConnectionError ? "OLLAMA_NOT_RUNNING" : "EMBEDDING_FAILED",
|
||||
message: isConnectionError
|
||||
? "Ollama is not running. Start it with: ollama serve"
|
||||
: `Embedding service error: ${message}`,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return serviceState;
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Core Embedding Functions
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Generate embedding for a single text
|
||||
*/
|
||||
export const embed = async (text: string): Promise<EmbeddingResult | null> => {
|
||||
if (!serviceState.initialized) {
|
||||
await initializeEmbeddingService();
|
||||
}
|
||||
|
||||
if (!serviceState.available || !serviceState.model) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const embeddings = await callOllamaEmbed([text], serviceState.model);
|
||||
|
||||
if (embeddings.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
text,
|
||||
embedding: embeddings[0],
|
||||
model: serviceState.model,
|
||||
dimensions: embeddings[0].length,
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Generate embeddings for multiple texts (batch)
|
||||
*/
|
||||
export const embedBatch = async (
|
||||
texts: string[],
|
||||
): Promise<(EmbeddingResult | null)[]> => {
|
||||
if (!serviceState.initialized) {
|
||||
await initializeEmbeddingService();
|
||||
}
|
||||
|
||||
if (!serviceState.available || !serviceState.model) {
|
||||
return texts.map(() => null);
|
||||
}
|
||||
|
||||
try {
|
||||
const embeddings = await callOllamaEmbed(texts, serviceState.model);
|
||||
|
||||
return texts.map((text, i) => {
|
||||
const embedding = embeddings[i];
|
||||
if (!embedding) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
text,
|
||||
embedding,
|
||||
model: serviceState.model!,
|
||||
dimensions: embedding.length,
|
||||
};
|
||||
});
|
||||
} catch {
|
||||
return texts.map(() => null);
|
||||
}
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Service State Accessors
|
||||
// =============================================================================
|
||||
|
||||
export const isEmbeddingAvailable = (): boolean => serviceState.available;
|
||||
|
||||
export const getEmbeddingModel = (): string | null => serviceState.model;
|
||||
|
||||
export const getEmbeddingError = (): EmbeddingError | null =>
|
||||
serviceState.error;
|
||||
|
||||
export const getServiceState = (): EmbeddingServiceState => ({
|
||||
...serviceState,
|
||||
});
|
||||
|
||||
/**
|
||||
* Reset service state (for testing)
|
||||
*/
|
||||
export const resetEmbeddingService = (): void => {
|
||||
serviceState = {
|
||||
initialized: false,
|
||||
model: null,
|
||||
available: false,
|
||||
error: null,
|
||||
};
|
||||
};
|
||||
38
src/services/learning/extract.ts
Normal file
38
src/services/learning/extract.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
/**
|
||||
* Learning content extraction utilities
|
||||
*/
|
||||
|
||||
import { LEARNING_DEFAULTS } from "@constants/learning";
|
||||
|
||||
const splitIntoSentences = (text: string): string[] =>
|
||||
text.split(/[.!?]+/).filter((s) => s.trim());
|
||||
|
||||
const findMatchingSentence = (
|
||||
sentences: string[],
|
||||
matchText: string,
|
||||
): string | null => {
|
||||
const lowerMatch = matchText.toLowerCase();
|
||||
return sentences.find((s) => s.toLowerCase().includes(lowerMatch)) ?? null;
|
||||
};
|
||||
|
||||
export const extractLearningContent = (
|
||||
message: string,
|
||||
match: RegExpMatchArray,
|
||||
): string => {
|
||||
const sentences = splitIntoSentences(message);
|
||||
const matchingSentence = findMatchingSentence(sentences, match[0]);
|
||||
|
||||
return matchingSentence?.trim() ?? match[0];
|
||||
};
|
||||
|
||||
export const extractLearningFromAcknowledgment = (
|
||||
userMessage: string,
|
||||
): string => {
|
||||
const sentences = splitIntoSentences(userMessage);
|
||||
|
||||
if (sentences.length > 0) {
|
||||
return sentences[0].trim();
|
||||
}
|
||||
|
||||
return userMessage.slice(0, LEARNING_DEFAULTS.MAX_SLICE_LENGTH);
|
||||
};
|
||||
13
src/services/learning/format.ts
Normal file
13
src/services/learning/format.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
/**
|
||||
* Learning formatting utilities
|
||||
*/
|
||||
|
||||
import { LEARNING_DEFAULTS } from "@constants/learning";
|
||||
import type { LearningCandidate } from "@/types/learning";
|
||||
|
||||
export const formatLearningForPrompt = (
|
||||
candidate: LearningCandidate,
|
||||
): string =>
|
||||
candidate.content.length > LEARNING_DEFAULTS.MAX_CONTENT_LENGTH
|
||||
? candidate.content.slice(0, LEARNING_DEFAULTS.TRUNCATE_LENGTH) + "..."
|
||||
: candidate.content;
|
||||
67
src/services/learning/index.ts
Normal file
67
src/services/learning/index.ts
Normal file
@@ -0,0 +1,67 @@
|
||||
/**
|
||||
* Learning Service Exports
|
||||
*
|
||||
* Central export point for all learning-related functionality
|
||||
*/
|
||||
|
||||
// Core persistence
|
||||
export {
|
||||
saveLearning,
|
||||
getLearnings,
|
||||
learningExists,
|
||||
} from "@services/learning/persistence";
|
||||
|
||||
// Embedding service
|
||||
export {
|
||||
initializeEmbeddingService,
|
||||
embed,
|
||||
embedBatch,
|
||||
isEmbeddingAvailable,
|
||||
getEmbeddingModel,
|
||||
getEmbeddingError,
|
||||
getServiceState,
|
||||
resetEmbeddingService,
|
||||
} from "@services/learning/embeddings";
|
||||
|
||||
// Vector store
|
||||
export {
|
||||
cosineSimilarity,
|
||||
euclideanDistance,
|
||||
loadIndex,
|
||||
saveIndex,
|
||||
upsertEmbedding,
|
||||
removeEmbedding,
|
||||
hasEmbedding,
|
||||
getEmbedding,
|
||||
findSimilar,
|
||||
findAboveThreshold,
|
||||
getIndexStats,
|
||||
} from "@services/learning/vector-store";
|
||||
|
||||
// Semantic search
|
||||
export {
|
||||
indexLearning,
|
||||
unindexLearning,
|
||||
isLearningIndexed,
|
||||
searchLearnings,
|
||||
rebuildIndex,
|
||||
clearIndexCache,
|
||||
getIndexStatistics,
|
||||
} from "@services/learning/semantic-search";
|
||||
|
||||
// Re-export types
|
||||
export type {
|
||||
StoredLearning,
|
||||
LearningCandidate,
|
||||
LearningCategory,
|
||||
} from "@/types/learning";
|
||||
|
||||
export type {
|
||||
EmbeddingVector,
|
||||
EmbeddingResult,
|
||||
EmbeddingIndex,
|
||||
StoredEmbedding,
|
||||
SimilarityResult,
|
||||
SemanticSearchResult,
|
||||
SemanticSearchOptions,
|
||||
} from "@/types/embeddings";
|
||||
48
src/services/learning/persistence.ts
Normal file
48
src/services/learning/persistence.ts
Normal file
@@ -0,0 +1,48 @@
|
||||
/**
|
||||
* Learning persistence operations
|
||||
*/
|
||||
|
||||
import { projectConfig } from "@services/project-config";
|
||||
import type { StoredLearning } from "@/types/learning";
|
||||
import { indexLearning } from "@services/learning/semantic-search";
|
||||
|
||||
export const saveLearning = async (
|
||||
content: string,
|
||||
context?: string,
|
||||
global = false,
|
||||
): Promise<void> => {
|
||||
// Save the learning
|
||||
const learning = await projectConfig.addLearning(content, context, global);
|
||||
|
||||
// Index for semantic search (non-blocking, don't fail if embeddings unavailable)
|
||||
if (learning) {
|
||||
indexLearning(learning, global).catch(() => {
|
||||
// Silently ignore embedding failures
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
export const getLearnings = async (): Promise<StoredLearning[]> =>
|
||||
projectConfig.getLearnings();
|
||||
|
||||
const normalizeForComparison = (text: string): string =>
|
||||
text.toLowerCase().trim();
|
||||
|
||||
const isSimilarContent = (existing: string, newContent: string): boolean => {
|
||||
const normalizedExisting = normalizeForComparison(existing);
|
||||
const normalizedNew = normalizeForComparison(newContent);
|
||||
|
||||
return (
|
||||
normalizedExisting === normalizedNew ||
|
||||
normalizedExisting.includes(normalizedNew) ||
|
||||
normalizedNew.includes(normalizedExisting)
|
||||
);
|
||||
};
|
||||
|
||||
export const learningExists = async (content: string): Promise<boolean> => {
|
||||
const learnings = await getLearnings();
|
||||
|
||||
return learnings.some((learning) =>
|
||||
isSimilarContent(learning.content, content),
|
||||
);
|
||||
};
|
||||
386
src/services/learning/semantic-search.ts
Normal file
386
src/services/learning/semantic-search.ts
Normal file
@@ -0,0 +1,386 @@
|
||||
/**
|
||||
* Semantic Search Service
|
||||
*
|
||||
* High-level API for semantic learning retrieval
|
||||
*/
|
||||
|
||||
import * as path from "path";
|
||||
|
||||
import { EMBEDDING_SEARCH } from "@constants/embeddings";
|
||||
import {
|
||||
getGlobalConfigDir,
|
||||
getLocalConfigDir,
|
||||
} from "@services/project-config";
|
||||
|
||||
import type { StoredLearning } from "@/types/learning";
|
||||
import type {
|
||||
EmbeddingIndex,
|
||||
SemanticSearchResult,
|
||||
SemanticSearchOptions,
|
||||
SimilarityResult,
|
||||
} from "@/types/embeddings";
|
||||
|
||||
import {
|
||||
embed,
|
||||
isEmbeddingAvailable,
|
||||
initializeEmbeddingService,
|
||||
getEmbeddingModel,
|
||||
} from "@services/learning/embeddings";
|
||||
|
||||
import {
|
||||
loadIndex,
|
||||
saveIndex,
|
||||
upsertEmbedding,
|
||||
removeEmbedding,
|
||||
findSimilar,
|
||||
hasEmbedding,
|
||||
} from "@services/learning/vector-store";
|
||||
|
||||
// =============================================================================
|
||||
// Index Management
|
||||
// =============================================================================
|
||||
|
||||
let globalIndex: EmbeddingIndex | null = null;
|
||||
let localIndex: EmbeddingIndex | null = null;
|
||||
|
||||
const getGlobalIndexDir = (): string =>
|
||||
path.join(getGlobalConfigDir(), "learnings");
|
||||
|
||||
const getLocalIndexDir = (): string =>
|
||||
path.join(getLocalConfigDir(), "learnings");
|
||||
|
||||
/**
|
||||
* Initialize or get the global embedding index
|
||||
*/
|
||||
const getGlobalIndex = async (): Promise<EmbeddingIndex | null> => {
|
||||
if (!isEmbeddingAvailable()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (globalIndex) {
|
||||
return globalIndex;
|
||||
}
|
||||
|
||||
const model = getEmbeddingModel();
|
||||
if (!model) {
|
||||
return null;
|
||||
}
|
||||
|
||||
globalIndex = await loadIndex(getGlobalIndexDir(), model);
|
||||
return globalIndex;
|
||||
};
|
||||
|
||||
/**
|
||||
* Initialize or get the local embedding index
|
||||
*/
|
||||
const getLocalIndex = async (): Promise<EmbeddingIndex | null> => {
|
||||
if (!isEmbeddingAvailable()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (localIndex) {
|
||||
return localIndex;
|
||||
}
|
||||
|
||||
const model = getEmbeddingModel();
|
||||
if (!model) {
|
||||
return null;
|
||||
}
|
||||
|
||||
localIndex = await loadIndex(getLocalIndexDir(), model);
|
||||
return localIndex;
|
||||
};
|
||||
|
||||
/**
|
||||
* Save both indexes to disk
|
||||
*/
|
||||
const persistIndexes = async (): Promise<void> => {
|
||||
if (globalIndex) {
|
||||
await saveIndex(getGlobalIndexDir(), globalIndex);
|
||||
}
|
||||
if (localIndex) {
|
||||
await saveIndex(getLocalIndexDir(), localIndex);
|
||||
}
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Embedding Management
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Add embedding for a learning
|
||||
*/
|
||||
export const indexLearning = async (
|
||||
learning: StoredLearning,
|
||||
global: boolean,
|
||||
): Promise<boolean> => {
|
||||
await initializeEmbeddingService();
|
||||
|
||||
if (!isEmbeddingAvailable()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const result = await embed(learning.content);
|
||||
if (!result) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const index = global ? await getGlobalIndex() : await getLocalIndex();
|
||||
if (!index) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const updatedIndex = upsertEmbedding(index, learning.id, result.embedding);
|
||||
|
||||
if (global) {
|
||||
globalIndex = updatedIndex;
|
||||
} else {
|
||||
localIndex = updatedIndex;
|
||||
}
|
||||
|
||||
await persistIndexes();
|
||||
return true;
|
||||
};
|
||||
|
||||
/**
|
||||
* Remove embedding for a learning
|
||||
*/
|
||||
export const unindexLearning = async (
|
||||
learningId: string,
|
||||
global: boolean,
|
||||
): Promise<void> => {
|
||||
const index = global ? await getGlobalIndex() : await getLocalIndex();
|
||||
if (!index) {
|
||||
return;
|
||||
}
|
||||
|
||||
const updatedIndex = removeEmbedding(index, learningId);
|
||||
|
||||
if (global) {
|
||||
globalIndex = updatedIndex;
|
||||
} else {
|
||||
localIndex = updatedIndex;
|
||||
}
|
||||
|
||||
await persistIndexes();
|
||||
};
|
||||
|
||||
/**
|
||||
* Check if a learning has an embedding
|
||||
*/
|
||||
export const isLearningIndexed = async (
|
||||
learningId: string,
|
||||
): Promise<boolean> => {
|
||||
const gIndex = await getGlobalIndex();
|
||||
const lIndex = await getLocalIndex();
|
||||
|
||||
return (
|
||||
(gIndex !== null && hasEmbedding(gIndex, learningId)) ||
|
||||
(lIndex !== null && hasEmbedding(lIndex, learningId))
|
||||
);
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Semantic Search
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Search learnings by semantic similarity
|
||||
*/
|
||||
export const searchLearnings = async (
|
||||
query: string,
|
||||
learnings: StoredLearning[],
|
||||
options: SemanticSearchOptions = {},
|
||||
): Promise<SemanticSearchResult<StoredLearning>[]> => {
|
||||
const {
|
||||
topK = EMBEDDING_SEARCH.TOP_K,
|
||||
minSimilarity = EMBEDDING_SEARCH.MIN_SIMILARITY,
|
||||
} = options;
|
||||
|
||||
await initializeEmbeddingService();
|
||||
|
||||
if (!isEmbeddingAvailable()) {
|
||||
// Fallback to keyword matching
|
||||
return fallbackKeywordSearch(query, learnings, topK);
|
||||
}
|
||||
|
||||
// Embed the query
|
||||
const queryResult = await embed(query);
|
||||
if (!queryResult) {
|
||||
return fallbackKeywordSearch(query, learnings, topK);
|
||||
}
|
||||
|
||||
// Search both indexes
|
||||
const gIndex = await getGlobalIndex();
|
||||
const lIndex = await getLocalIndex();
|
||||
|
||||
const allResults: SimilarityResult[] = [];
|
||||
|
||||
if (gIndex) {
|
||||
allResults.push(
|
||||
...findSimilar(gIndex, queryResult.embedding, topK * 2, minSimilarity),
|
||||
);
|
||||
}
|
||||
|
||||
if (lIndex) {
|
||||
allResults.push(
|
||||
...findSimilar(lIndex, queryResult.embedding, topK * 2, minSimilarity),
|
||||
);
|
||||
}
|
||||
|
||||
// Deduplicate and sort
|
||||
const seen = new Set<string>();
|
||||
const uniqueResults: SimilarityResult[] = [];
|
||||
|
||||
for (const result of allResults.sort((a, b) => b.score - a.score)) {
|
||||
if (!seen.has(result.id)) {
|
||||
seen.add(result.id);
|
||||
uniqueResults.push(result);
|
||||
}
|
||||
}
|
||||
|
||||
// Map results to learnings
|
||||
const learningMap = new Map(learnings.map((l) => [l.id, l]));
|
||||
const searchResults: SemanticSearchResult<StoredLearning>[] = [];
|
||||
|
||||
for (let i = 0; i < Math.min(uniqueResults.length, topK); i++) {
|
||||
const result = uniqueResults[i];
|
||||
const learning = learningMap.get(result.id);
|
||||
|
||||
if (learning) {
|
||||
searchResults.push({
|
||||
item: learning,
|
||||
score: result.score,
|
||||
rank: i + 1,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return searchResults;
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Fallback Search
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Simple keyword-based search as fallback when embeddings unavailable
|
||||
*/
|
||||
const fallbackKeywordSearch = (
|
||||
query: string,
|
||||
learnings: StoredLearning[],
|
||||
topK: number,
|
||||
): SemanticSearchResult<StoredLearning>[] => {
|
||||
const queryTokens = tokenize(query);
|
||||
|
||||
if (queryTokens.length === 0) {
|
||||
// Return most recent if no query tokens
|
||||
return learnings.slice(0, topK).map((item, i) => ({
|
||||
item,
|
||||
score: 1 - i * 0.05,
|
||||
rank: i + 1,
|
||||
}));
|
||||
}
|
||||
|
||||
// Score each learning by token overlap
|
||||
const scored = learnings.map((learning) => {
|
||||
const contentTokens = tokenize(learning.content);
|
||||
const overlap = queryTokens.filter((t) => contentTokens.includes(t)).length;
|
||||
const score = overlap / Math.max(queryTokens.length, 1);
|
||||
|
||||
return { learning, score };
|
||||
});
|
||||
|
||||
// Sort by score and return top K
|
||||
return scored
|
||||
.filter((s) => s.score > 0)
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, topK)
|
||||
.map((s, i) => ({
|
||||
item: s.learning,
|
||||
score: s.score,
|
||||
rank: i + 1,
|
||||
}));
|
||||
};
|
||||
|
||||
/**
|
||||
* Simple tokenizer for fallback search
|
||||
*/
|
||||
const tokenize = (text: string): string[] =>
|
||||
text
|
||||
.toLowerCase()
|
||||
.split(/\W+/)
|
||||
.filter((t) => t.length > 2);
|
||||
|
||||
// =============================================================================
|
||||
// Index Rebuilding
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Rebuild all embeddings for existing learnings
|
||||
*/
|
||||
export const rebuildIndex = async (
|
||||
learnings: StoredLearning[],
|
||||
global: boolean,
|
||||
onProgress?: (current: number, total: number) => void,
|
||||
): Promise<{ indexed: number; failed: number }> => {
|
||||
await initializeEmbeddingService();
|
||||
|
||||
if (!isEmbeddingAvailable()) {
|
||||
return { indexed: 0, failed: learnings.length };
|
||||
}
|
||||
|
||||
let indexed = 0;
|
||||
let failed = 0;
|
||||
|
||||
for (let i = 0; i < learnings.length; i++) {
|
||||
const learning = learnings[i];
|
||||
const success = await indexLearning(learning, global);
|
||||
|
||||
if (success) {
|
||||
indexed++;
|
||||
} else {
|
||||
failed++;
|
||||
}
|
||||
|
||||
onProgress?.(i + 1, learnings.length);
|
||||
}
|
||||
|
||||
return { indexed, failed };
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Cache Management
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Clear in-memory index cache
|
||||
*/
|
||||
export const clearIndexCache = (): void => {
|
||||
globalIndex = null;
|
||||
localIndex = null;
|
||||
};
|
||||
|
||||
/**
|
||||
* Get index statistics
|
||||
*/
|
||||
export const getIndexStatistics = async (): Promise<{
|
||||
global: { count: number; model: string } | null;
|
||||
local: { count: number; model: string } | null;
|
||||
embeddingsAvailable: boolean;
|
||||
}> => {
|
||||
await initializeEmbeddingService();
|
||||
|
||||
const gIndex = await getGlobalIndex();
|
||||
const lIndex = await getLocalIndex();
|
||||
|
||||
return {
|
||||
global: gIndex
|
||||
? { count: Object.keys(gIndex.embeddings).length, model: gIndex.model }
|
||||
: null,
|
||||
local: lIndex
|
||||
? { count: Object.keys(lIndex.embeddings).length, model: lIndex.model }
|
||||
: null,
|
||||
embeddingsAvailable: isEmbeddingAvailable(),
|
||||
};
|
||||
};
|
||||
243
src/services/learning/vector-store.ts
Normal file
243
src/services/learning/vector-store.ts
Normal file
@@ -0,0 +1,243 @@
|
||||
/**
|
||||
* Vector Store
|
||||
*
|
||||
* Stores and searches embeddings for semantic retrieval
|
||||
*/
|
||||
|
||||
import * as fs from "fs/promises";
|
||||
import * as path from "path";
|
||||
|
||||
import { EMBEDDING_STORAGE, EMBEDDING_SEARCH } from "@constants/embeddings";
|
||||
|
||||
import type {
|
||||
EmbeddingVector,
|
||||
EmbeddingIndex,
|
||||
StoredEmbedding,
|
||||
SimilarityResult,
|
||||
} from "@/types/embeddings";
|
||||
|
||||
import { createEmptyIndex } from "@/types/embeddings";
|
||||
|
||||
// =============================================================================
|
||||
// Vector Math
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Compute cosine similarity between two vectors
|
||||
* Returns value between -1 and 1 (1 = identical, 0 = orthogonal, -1 = opposite)
|
||||
*/
|
||||
export const cosineSimilarity = (
|
||||
a: EmbeddingVector,
|
||||
b: EmbeddingVector,
|
||||
): number => {
|
||||
if (a.length !== b.length) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let dotProduct = 0;
|
||||
let normA = 0;
|
||||
let normB = 0;
|
||||
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
dotProduct += a[i] * b[i];
|
||||
normA += a[i] * a[i];
|
||||
normB += b[i] * b[i];
|
||||
}
|
||||
|
||||
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
|
||||
|
||||
if (magnitude === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return dotProduct / magnitude;
|
||||
};
|
||||
|
||||
/**
|
||||
* Compute Euclidean distance between two vectors
|
||||
*/
|
||||
export const euclideanDistance = (
|
||||
a: EmbeddingVector,
|
||||
b: EmbeddingVector,
|
||||
): number => {
|
||||
if (a.length !== b.length) {
|
||||
return Infinity;
|
||||
}
|
||||
|
||||
let sum = 0;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
const diff = a[i] - b[i];
|
||||
sum += diff * diff;
|
||||
}
|
||||
|
||||
return Math.sqrt(sum);
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Index File Operations
|
||||
// =============================================================================
|
||||
|
||||
const getIndexPath = (baseDir: string): string =>
|
||||
path.join(baseDir, EMBEDDING_STORAGE.INDEX_FILE);
|
||||
|
||||
/**
|
||||
* Load embedding index from disk
|
||||
*/
|
||||
export const loadIndex = async (
|
||||
baseDir: string,
|
||||
model: string,
|
||||
): Promise<EmbeddingIndex> => {
|
||||
const indexPath = getIndexPath(baseDir);
|
||||
|
||||
try {
|
||||
const data = await fs.readFile(indexPath, "utf-8");
|
||||
const index = JSON.parse(data) as EmbeddingIndex;
|
||||
|
||||
// Check version and model compatibility
|
||||
if (index.version !== EMBEDDING_STORAGE.VERSION || index.model !== model) {
|
||||
// Index is incompatible, create new one
|
||||
return createEmptyIndex(model);
|
||||
}
|
||||
|
||||
return index;
|
||||
} catch {
|
||||
// Index doesn't exist or is invalid
|
||||
return createEmptyIndex(model);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Save embedding index to disk
|
||||
*/
|
||||
export const saveIndex = async (
|
||||
baseDir: string,
|
||||
index: EmbeddingIndex,
|
||||
): Promise<void> => {
|
||||
const indexPath = getIndexPath(baseDir);
|
||||
|
||||
await fs.mkdir(baseDir, { recursive: true });
|
||||
await fs.writeFile(indexPath, JSON.stringify(index, null, 2), "utf-8");
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Index Operations
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Add or update an embedding in the index
|
||||
*/
|
||||
export const upsertEmbedding = (
|
||||
index: EmbeddingIndex,
|
||||
id: string,
|
||||
embedding: EmbeddingVector,
|
||||
): EmbeddingIndex => {
|
||||
const stored: StoredEmbedding = {
|
||||
id,
|
||||
embedding,
|
||||
model: index.model,
|
||||
createdAt: Date.now(),
|
||||
};
|
||||
|
||||
return {
|
||||
...index,
|
||||
embeddings: {
|
||||
...index.embeddings,
|
||||
[id]: stored,
|
||||
},
|
||||
lastUpdated: Date.now(),
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Remove an embedding from the index
|
||||
*/
|
||||
export const removeEmbedding = (
|
||||
index: EmbeddingIndex,
|
||||
id: string,
|
||||
): EmbeddingIndex => {
|
||||
const { [id]: _, ...remaining } = index.embeddings;
|
||||
|
||||
return {
|
||||
...index,
|
||||
embeddings: remaining,
|
||||
lastUpdated: Date.now(),
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Check if an embedding exists in the index
|
||||
*/
|
||||
export const hasEmbedding = (index: EmbeddingIndex, id: string): boolean =>
|
||||
id in index.embeddings;
|
||||
|
||||
/**
|
||||
* Get an embedding from the index
|
||||
*/
|
||||
export const getEmbedding = (
|
||||
index: EmbeddingIndex,
|
||||
id: string,
|
||||
): StoredEmbedding | null => index.embeddings[id] ?? null;
|
||||
|
||||
// =============================================================================
|
||||
// Similarity Search
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Find the most similar embeddings to a query vector
|
||||
*/
|
||||
export const findSimilar = (
|
||||
index: EmbeddingIndex,
|
||||
queryVector: EmbeddingVector,
|
||||
topK: number = EMBEDDING_SEARCH.TOP_K,
|
||||
minSimilarity: number = EMBEDDING_SEARCH.MIN_SIMILARITY,
|
||||
): SimilarityResult[] => {
|
||||
const results: SimilarityResult[] = [];
|
||||
|
||||
for (const [id, stored] of Object.entries(index.embeddings)) {
|
||||
const score = cosineSimilarity(queryVector, stored.embedding);
|
||||
|
||||
if (score >= minSimilarity) {
|
||||
results.push({ id, score });
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by score descending and take top K
|
||||
return results.sort((a, b) => b.score - a.score).slice(0, topK);
|
||||
};
|
||||
|
||||
/**
|
||||
* Find all embeddings above a similarity threshold
|
||||
*/
|
||||
export const findAboveThreshold = (
|
||||
index: EmbeddingIndex,
|
||||
queryVector: EmbeddingVector,
|
||||
threshold: number,
|
||||
): SimilarityResult[] => {
|
||||
const results: SimilarityResult[] = [];
|
||||
|
||||
for (const [id, stored] of Object.entries(index.embeddings)) {
|
||||
const score = cosineSimilarity(queryVector, stored.embedding);
|
||||
|
||||
if (score >= threshold) {
|
||||
results.push({ id, score });
|
||||
}
|
||||
}
|
||||
|
||||
return results.sort((a, b) => b.score - a.score);
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Index Statistics
|
||||
// =============================================================================
|
||||
|
||||
export const getIndexStats = (
|
||||
index: EmbeddingIndex,
|
||||
): {
|
||||
count: number;
|
||||
model: string;
|
||||
lastUpdated: number;
|
||||
} => ({
|
||||
count: Object.keys(index.embeddings).length,
|
||||
model: index.model,
|
||||
lastUpdated: index.lastUpdated,
|
||||
});
|
||||
Reference in New Issue
Block a user