Search
Code165
│├─ Need the best accuracy?│ └─ YES → Use mixedbread-embeddings-cosine.ts or openai-cosine.ts│└─ Want managed search (no embeddings management)? └─ YES → Use jigsawstack-orama.ts or mixedbread.ts```| **transformers-local-onnx** | ~150ms | ~50ms | ~10-30ms | **~60-80ms** | ❌ No || **transformers-cosine** | ~3-5s | ~150ms | ~10-30ms | ~160-180ms | ✅ First run only || **mixedbread-embeddings** | N/A | N/A | ~50-100ms | ~50-100ms | ✅ Every query || **openai-cosine** | N/A | N/A | ~100-200ms | ~100-200ms | ✅ Every query || **hf-inference-qwen3** | N/A | N/A | ~150-300ms | ~150-300ms | ✅ Every query || **transformers-local-onnx** | $0 | ∞ | 100% free, runs locally || **transformers-cosine** | $0 | ∞ | 100% free, runs locally || **mixedbread-embeddings** | $0-$ | Generous | Free tier: 150 req/min, 100M tokens/mo || **openai-cosine** | $$ | Limited | $0.0001/1K tokens (text-embedding-3-small) || **hf-inference-qwen3** | $0 | Generous | Free tier: 1000 req/day || **transformers-local-onnx** | all-MiniLM-L6-v2 | 384 | ~58 | Fast, good quality || **transformers-cosine** | all-MiniLM-L6-v2 | 384 | ~58 | Same as local || **mixedbread-embeddings** | mxbai-embed-large-v1 | 1024 | ~64 | Higher quality || **openai-cosine** | text-embedding-3-small | 1536 | ~62 | Reliable, tested || **hf-inference-qwen3** | Qwen3-Embedding-8B | 768 | ~65 | Very high quality |### 🎯 Maximum Accuracy**Use**: `hf-inference-qwen3-cosine.ts` or `mixedbread-embeddings-cosine.ts`**Why**:**Why**:- No embedding management needed- Handles storage, search, and embeddings- Less code to maintain**Best for**:- When you want a managed solution- Don't want to store embeddings yourself- Prefer APIs over local computation ```typescript // Before // import { searchStrategy, generateEmbeddings } from "./openai-cosine.ts"; // After import { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts"; ```3. Recalculate embeddings (if dimensions differ): ```bash GET /cache/recalculate?force=true// Mixedbread Embeddings + Cosine Similarity Strategy// Uses Mixedbread's Embeddings API for vector generation with local cosine similarity// Model: mxbai-embed-large-v1 (state-of-the-art English embeddings)import type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts"; if (!MIXEDBREAD_API_KEY) { throw new Error("MIXEDBREAD_API_KEY or MXBAI_API_KEY not found - embeddings disabled"); } }// Generate embeddings using Mixedbread Embeddings APIexport const generateEmbeddings = async (content: string): Promise<number[] | null> => { try { const client = await getMixedbreadClient(); // Generate embeddings using the SDK's embeddings.create() method const response = await client.embeddings.create({ model: "mixedbread-ai/mxbai-embed-large-v1", input: content, } console.warn("Unexpected Mixedbread Embeddings API response format:", JSON.stringify(response, null, 2)); return null; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error("Mixedbread embeddings failed:", errorMessage); if (error instanceof Error && error.stack) { console.error("Stack trace:", error.stack);export const searchStrategy: SearchStrategy = { name: "mixedbread-embeddings-cosine", description: "Semantic search using Mixedbread Embeddings API (mxbai-embed-large-v1) with local cosine similarity", search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResult[]> => { const limit = options.limit || 10; // Generate query embedding (Mixedbread API call) const queryEmbedStart = performance.now(); const queryEmbedding = await generateEmbeddings(query); if (enableTiming) { timings.queryEmbedding = performance.now() - queryEmbedStart; for (const page of pages) { const pageEmbedding = page.embeddings; if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) { continue; // Skip pages without embeddings or wrong dimension } snippet: generateSnippet(page.content, queryWords, query.toLowerCase()), metadata: { strategy: "mixedbread-embeddings-cosine", similarity, embeddingDimension: queryEmbedding.length,}// Generate embeddings using local ONNX model (runs locally, no network calls)export const generateEmbeddings = async (content: string): Promise<number[] | null> => { try { const pipeline = await getEmbeddingPipeline(); // Generate embeddings - returns a tensor, we need to extract the array const output = await pipeline(content, { pooling: 'mean', } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error("Local ONNX embeddings failed:", errorMessage); return null; } // Generate query embedding (local, no network calls) const queryEmbedStart = performance.now(); const queryEmbedding = await generateEmbeddings(query); if (enableTiming) { timings.queryEmbedding = performance.now() - queryEmbedStart; for (const page of pages) { const pageEmbedding = page.embeddings; if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) { continue; // Skip pages without embeddings or wrong dimension } url: string; title?: string; embeddings?: number[] | null; metadata?: any; contentHash?: string; }); // Embeddings are arrays - need to handle them specially // fast-json emits arrays element by element, so we need to collect them const embeddingsBuffer = new Map<number, number[]>(); fastJson.on('results[*].embeddings[*]', (value) => { if (!resultsMap.has(currentIndex)) resultsMap.set(currentIndex, {}); if (!embeddingsBuffer.has(currentIndex)) embeddingsBuffer.set(currentIndex, []); embeddingsBuffer.get(currentIndex)!.push(value as unknown as number); }); // When embeddings array is done, store it fastJson.on('results[*].embeddings', (value) => { if (!resultsMap.has(currentIndex)) resultsMap.set(currentIndex, {}); // If value is already an array, use it directly (fallback) if (Array.isArray(value)) { resultsMap.get(currentIndex)!.embeddings = value; } else if (embeddingsBuffer.has(currentIndex)) { // Use collected array elements resultsMap.get(currentIndex)!.embeddings = embeddingsBuffer.get(currentIndex)!; embeddingsBuffer.delete(currentIndex); } else { resultsMap.get(currentIndex)!.embeddings = value as unknown as number[] | null; } }); content: result.content || "", // Content now included in JSON title: result.title, embeddings: result.embeddings, metadata: result.metadata, })); content: result.content || "", // Content now included in JSON title: result.title, embeddings: result.embeddings, metadata: result.metadata, }));import { generateSnippet } from "./utils.ts";// Fake embeddings function for placeholder (not actually used, but kept for interface consistency)export const generateEmbeddings = async (content: string): Promise<number[] | null> => { // Not used in placeholder strategy, but kept for consistency // Returns fake embeddings for caching purposes console.debug("Generating fake embeddings for placeholder strategy"); const fakeEmbedding: number[] = []; content: string; title?: string; embeddings?: number[] | null; metadata?: any;};// JigsawStack + Orama Strategy: Semantic search using JigsawStack embeddings and Oramaimport type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts";import { create, insertMultiple, search } from "npm:@orama/orama@latest";// JigsawStack embeddings function (v2 API)export const generateEmbeddings = async (content: string): Promise<number[] | null> => { const JIGSAWSTACK_API_KEY = Deno.env.get("JIGSAWSTACK_API_KEY"); const JIGSAWSTACK_API_URL = "https://api.jigsawstack.com/v2/embedding"; if (!JIGSAWSTACK_API_KEY) { console.warn("JIGSAWSTACK_API_KEY not found - embeddings disabled"); return null; } const data = await response.json(); // JigsawStack v2 returns: { success: true, embeddings: [[...]], chunks: [...], _usage: {...} } // embeddings is an array of arrays, we want the first embedding vector if (data.success && data.embeddings && Array.isArray(data.embeddings) && data.embeddings.length > 0) { return data.embeddings[0]; // Return the first embedding vector } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error("JigsawStack embeddings failed:", errorMessage); return null; }export const searchStrategy: SearchStrategy = { name: "jigsawstack-orama", description: "Semantic search using JigsawStack embeddings with Orama vector search", search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResult[]> => { const limit = options.limit || 10; // Generate query embedding (this is the main bottleneck - external API call) const queryEmbedStart = performance.now(); const queryEmbedding = await generateEmbeddings(query); if (enableTiming) timings.queryEmbedding = performance.now() - queryEmbedStart; } // Determine embedding dimension from query (or first page with embeddings) const embeddingDimension = queryEmbedding.length; if (embeddingDimension === 0) { if (enableTiming) timings.dbCreate = performance.now() - dbCreateStart; // Prepare documents with embeddings for insertion const documents: Array<{ path: string; }> = []; // Collect pages with embeddings const docPrepStart = performance.now(); for (const page of pages) { // Use cached embeddings if available, otherwise generate let pageEmbedding = page.embeddings; if (!pageEmbedding) { // Generate embedding if not cached (should be cached from recalculate, but handle missing) pageEmbedding = await generateEmbeddings(page.content); if (!pageEmbedding || pageEmbedding.length !== embeddingDimension) { continue; // Skip pages without embeddings or wrong dimension } }// OpenAI + Orama Strategy: Semantic search using OpenAI embeddings and Orama// Faster than JigsawStack (~100-200ms vs ~550ms for query embeddings)import type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts";import { create, insertMultiple, search } from "npm:@orama/orama@latest";// OpenAI embeddings functionexport const generateEmbeddings = async (content: string): Promise<number[] | null> => { const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY"); const OPENAI_API_URL = "https://api.openai.com/v1/embeddings"; if (!OPENAI_API_KEY) { console.warn("OPENAI_API_KEY not found - embeddings disabled"); return null; } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error("OpenAI embeddings failed:", errorMessage); return null; }};// No caching for query embeddings - we want to benchmark actual API performanceexport const searchStrategy: SearchStrategy = { name: "openai-orama", description: "Semantic search using OpenAI embeddings with Orama vector search (faster than JigsawStack)", search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResult[]> => { const limit = options.limit || 10; // Generate query embedding (no caching - benchmarking actual API performance) const queryEmbedStart = performance.now(); const queryEmbedding = await generateEmbeddings(query); if (enableTiming) { timings.queryEmbedding = performance.now() - queryEmbedStart; } // Determine embedding dimension from query (or first page with embeddings) const embeddingDimension = queryEmbedding.length; if (embeddingDimension === 0) { if (enableTiming) timings.dbCreate = performance.now() - dbCreateStart; // Prepare documents with embeddings for insertion const documents: Array<{ path: string; }> = []; // Collect pages with embeddings const docPrepStart = performance.now(); for (const page of pages) { // Use cached embeddings if available, otherwise generate let pageEmbedding = page.embeddings; if (!pageEmbedding) { // Generate embedding if not cached (should be cached from recalculate, but handle missing) pageEmbedding = await generateEmbeddings(page.content); if (!pageEmbedding || pageEmbedding.length !== embeddingDimension) { continue; // Skip pages without embeddings or wrong dimension } }