Search
Code168
// Run with: deno run --allow-read --allow-env --allow-net --allow-ffi test-server-mode.tsimport { searchStrategy, generateEmbeddings } from "../transformers-local-onnx.ts";console.log("π§ͺ Testing ONNX in Server Mode (long-running process)\n"); const start = performance.now(); const embedding = await generateEmbeddings(query); const elapsed = performance.now() - start; // Run with: deno run --allow-read --allow-env --allow-net test-local-model.tsimport { searchStrategy, generateEmbeddings } from "../transformers-local-onnx.ts";console.log("π§ͺ Testing Local ONNX Model Strategy\n");// Test 1: Generate embeddings for a simple queryconsole.log("Test 1: Generate embeddings for a query");console.log("Query: 'What is Groq?'\n");const start = performance.now();const embeddings = await generateEmbeddings("What is Groq?");const elapsed = performance.now() - start;if (embeddings) { console.log(`β
Generated embeddings successfully!`); console.log(` Dimensions: ${embeddings.length}`); console.log(` First 5 values: [${embeddings.slice(0, 5).map(v => v.toFixed(4)).join(", ")}...]`); console.log(` Time: ${elapsed.toFixed(2)}ms`);} else { console.log(`β Failed to generate embeddings`);}console.log("\n" + "=".repeat(60) + "\n");// Test 2: Generate embeddings for multiple queries (to test caching)console.log("Test 2: Generate embeddings for multiple queries (testing cache)");const queries = [for (const query of queries) { const queryStart = performance.now(); const queryEmbedding = await generateEmbeddings(query); const queryElapsed = performance.now() - queryStart; title: "Introduction to Groq", content: "Groq is a fast AI inference platform that provides APIs for various language models.", embeddings: await generateEmbeddings("Groq is a fast AI inference platform that provides APIs for various language models."), }, { title: "API Keys", content: "Learn how to create and manage your Groq API keys for authentication.", embeddings: await generateEmbeddings("Learn how to create and manage your Groq API keys for authentication."), }, { title: "Available Models", content: "Groq supports various language models including Llama, Mixtral, and Gemma.", embeddings: await generateEmbeddings("Groq supports various language models including Llama, Mixtral, and Gemma."), },];```typescriptimport { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts";``````typescript// import { searchStrategy, generateEmbeddings } from "./transformers-cosine.ts";``````typescript// Comment out the current strategy// import { searchStrategy, generateEmbeddings } from "./transformers-cosine.ts";// Uncomment the local ONNX strategyimport { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts";``` total model load: 239msβ
Generated embeddings successfully! Dimensions: 384 First 5 values: [-0.0457, -0.0109, -0.0935, ...] Time: 247msTest 2: Generate embeddings for multiple queries (testing cache)β
"How to use Groq API?" Time: 3.87ms (cached pipeline)**Fix**: Check `search/index.ts` has the correct import:```typescriptimport { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts";```- Slightly less accurate: ~57 vs ~58 MTEB score### Optional: Recalculate EmbeddingsIf you were using a different strategy before, regenerate embeddings:```bash```This ensures all page embeddings use the same model.## Performance Comparisonβββ Need the best accuracy?β ββ YES β Use mixedbread-embeddings-cosine.ts or openai-cosine.tsβββ Want managed search (no embeddings management)? ββ YES β Use jigsawstack-orama.ts or mixedbread.ts```| **transformers-local-onnx** | ~150ms | ~50ms | ~10-30ms | **~60-80ms** | β No || **transformers-cosine** | ~3-5s | ~150ms | ~10-30ms | ~160-180ms | β
First run only || **mixedbread-embeddings** | N/A | N/A | ~50-100ms | ~50-100ms | β
Every query || **openai-cosine** | N/A | N/A | ~100-200ms | ~100-200ms | β
Every query || **hf-inference-qwen3** | N/A | N/A | ~150-300ms | ~150-300ms | β
Every query || **transformers-local-onnx** | $0 | β | 100% free, runs locally || **transformers-cosine** | $0 | β | 100% free, runs locally || **mixedbread-embeddings** | $0-$ | Generous | Free tier: 150 req/min, 100M tokens/mo || **openai-cosine** | $$ | Limited | $0.0001/1K tokens (text-embedding-3-small) || **hf-inference-qwen3** | $0 | Generous | Free tier: 1000 req/day || **transformers-local-onnx** | all-MiniLM-L6-v2 | 384 | ~58 | Fast, good quality || **transformers-cosine** | all-MiniLM-L6-v2 | 384 | ~58 | Same as local || **mixedbread-embeddings** | mxbai-embed-large-v1 | 1024 | ~64 | Higher quality || **openai-cosine** | text-embedding-3-small | 1536 | ~62 | Reliable, tested || **hf-inference-qwen3** | Qwen3-Embedding-8B | 768 | ~65 | Very high quality |### π― Maximum Accuracy**Use**: `hf-inference-qwen3-cosine.ts` or `mixedbread-embeddings-cosine.ts`**Why**:**Why**:- No embedding management needed- Handles storage, search, and embeddings- Less code to maintain**Best for**:- When you want a managed solution- Don't want to store embeddings yourself- Prefer APIs over local computation ```typescript // Before // import { searchStrategy, generateEmbeddings } from "./openai-cosine.ts"; // After import { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts"; ```3. Recalculate embeddings (if dimensions differ): ```bash GET /cache/recalculate?force=true// Mixedbread Embeddings + Cosine Similarity Strategy// Uses Mixedbread's Embeddings API for vector generation with local cosine similarity// Model: mxbai-embed-large-v1 (state-of-the-art English embeddings)import type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts"; if (!MIXEDBREAD_API_KEY) { throw new Error("MIXEDBREAD_API_KEY or MXBAI_API_KEY not found - embeddings disabled"); } }// Generate embeddings using Mixedbread Embeddings APIexport const generateEmbeddings = async (content: string): Promise<number[] | null> => { try { const client = await getMixedbreadClient(); // Generate embeddings using the SDK's embeddings.create() method const response = await client.embeddings.create({ model: "mixedbread-ai/mxbai-embed-large-v1", input: content, } console.warn("Unexpected Mixedbread Embeddings API response format:", JSON.stringify(response, null, 2)); return null; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error("Mixedbread embeddings failed:", errorMessage); if (error instanceof Error && error.stack) { console.error("Stack trace:", error.stack);export const searchStrategy: SearchStrategy = { name: "mixedbread-embeddings-cosine", description: "Semantic search using Mixedbread Embeddings API (mxbai-embed-large-v1) with local cosine similarity", search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResult[]> => { const limit = options.limit || 10; // Generate query embedding (Mixedbread API call) const queryEmbedStart = performance.now(); const queryEmbedding = await generateEmbeddings(query); if (enableTiming) { timings.queryEmbedding = performance.now() - queryEmbedStart; for (const page of pages) { const pageEmbedding = page.embeddings; if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) { continue; // Skip pages without embeddings or wrong dimension } snippet: generateSnippet(page.content, queryWords, query.toLowerCase()), metadata: { strategy: "mixedbread-embeddings-cosine", similarity, embeddingDimension: queryEmbedding.length,}// Generate embeddings using local ONNX model (runs locally, no network calls)export const generateEmbeddings = async (content: string): Promise<number[] | null> => { try { const pipeline = await getEmbeddingPipeline(); // Generate embeddings - returns a tensor, we need to extract the array const output = await pipeline(content, { pooling: 'mean', } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error("Local ONNX embeddings failed:", errorMessage); return null; } // Generate query embedding (local, no network calls) const queryEmbedStart = performance.now(); const queryEmbedding = await generateEmbeddings(query); if (enableTiming) { timings.queryEmbedding = performance.now() - queryEmbedStart; for (const page of pages) { const pageEmbedding = page.embeddings; if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) { continue; // Skip pages without embeddings or wrong dimension } url: string; title?: string; embeddings?: number[] | null; metadata?: any; contentHash?: string; }); // Embeddings are arrays - need to handle them specially // fast-json emits arrays element by element, so we need to collect them const embeddingsBuffer = new Map<number, number[]>(); fastJson.on('results[*].embeddings[*]', (value) => { if (!resultsMap.has(currentIndex)) resultsMap.set(currentIndex, {}); if (!embeddingsBuffer.has(currentIndex)) embeddingsBuffer.set(currentIndex, []); embeddingsBuffer.get(currentIndex)!.push(value as unknown as number); }); // When embeddings array is done, store it fastJson.on('results[*].embeddings', (value) => { if (!resultsMap.has(currentIndex)) resultsMap.set(currentIndex, {}); // If value is already an array, use it directly (fallback) if (Array.isArray(value)) { resultsMap.get(currentIndex)!.embeddings = value; } else if (embeddingsBuffer.has(currentIndex)) { // Use collected array elements resultsMap.get(currentIndex)!.embeddings = embeddingsBuffer.get(currentIndex)!; embeddingsBuffer.delete(currentIndex); } else { resultsMap.get(currentIndex)!.embeddings = value as unknown as number[] | null; } }); content: result.content || "", // Content now included in JSON title: result.title, embeddings: result.embeddings, metadata: result.metadata, })); content: result.content || "", // Content now included in JSON title: result.title, embeddings: result.embeddings, metadata: result.metadata, }));Users
No users found
No docs found