Search

178 results found for β€œembeddings” (4125ms)

Code
168

// Run with: deno run --allow-read --allow-env --allow-net --allow-ffi test-server-mode.ts
import { searchStrategy, generateEmbeddings } from "../transformers-local-onnx.ts";
console.log("πŸ§ͺ Testing ONNX in Server Mode (long-running process)\n");
const start = performance.now();
const embedding = await generateEmbeddings(query);
const elapsed = performance.now() - start;
// Run with: deno run --allow-read --allow-env --allow-net test-local-model.ts
import { searchStrategy, generateEmbeddings } from "../transformers-local-onnx.ts";
console.log("πŸ§ͺ Testing Local ONNX Model Strategy\n");
// Test 1: Generate embeddings for a simple query
console.log("Test 1: Generate embeddings for a query");
console.log("Query: 'What is Groq?'\n");
const start = performance.now();
const embeddings = await generateEmbeddings("What is Groq?");
const elapsed = performance.now() - start;
if (embeddings) {
console.log(`βœ… Generated embeddings successfully!`);
console.log(` Dimensions: ${embeddings.length}`);
console.log(` First 5 values: [${embeddings.slice(0, 5).map(v => v.toFixed(4)).join(", ")}..
console.log(` Time: ${elapsed.toFixed(2)}ms`);
} else {
console.log(`❌ Failed to generate embeddings`);
}
console.log("\n" + "=".repeat(60) + "\n");
// Test 2: Generate embeddings for multiple queries (to test caching)
console.log("Test 2: Generate embeddings for multiple queries (testing cache)");
const queries = [
for (const query of queries) {
const queryStart = performance.now();
const queryEmbedding = await generateEmbeddings(query);
const queryElapsed = performance.now() - queryStart;
title: "Introduction to Groq",
content: "Groq is a fast AI inference platform that provides APIs for various language model
embeddings: await generateEmbeddings("Groq is a fast AI inference platform that provides API
},
{
title: "API Keys",
content: "Learn how to create and manage your Groq API keys for authentication.",
embeddings: await generateEmbeddings("Learn how to create and manage your Groq API keys for
},
{
title: "Available Models",
content: "Groq supports various language models including Llama, Mixtral, and Gemma.",
embeddings: await generateEmbeddings("Groq supports various language models including Llama,
},
];
```typescript
import { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts";
```
```typescript
// import { searchStrategy, generateEmbeddings } from "./transformers-cosine.ts";
```
```typescript
// Comment out the current strategy
// import { searchStrategy, generateEmbeddings } from "./transformers-cosine.ts";
// Uncomment the local ONNX strategy
import { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts";
```
total model load: 239ms
βœ… Generated embeddings successfully!
Dimensions: 384
First 5 values: [-0.0457, -0.0109, -0.0935, ...]
Time: 247ms
Test 2: Generate embeddings for multiple queries (testing cache)
βœ… "How to use Groq API?"
Time: 3.87ms (cached pipeline)
**Fix**: Check `search/index.ts` has the correct import:
```typescript
import { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts";
```
- Slightly less accurate: ~57 vs ~58 MTEB score
### Optional: Recalculate Embeddings
If you were using a different strategy before, regenerate embeddings:
```bash
```
This ensures all page embeddings use the same model.
## Performance Comparison
β”‚
β”œβ”€ Need the best accuracy?
β”‚ └─ YES β†’ Use mixedbread-embeddings-cosine.ts or openai-cosine.ts
β”‚
└─ Want managed search (no embeddings management)?
└─ YES β†’ Use jigsawstack-orama.ts or mixedbread.ts
```
| **transformers-local-onnx** | ~150ms | ~50ms | ~10-30ms | **~60-80ms** | ❌ No |
| **transformers-cosine** | ~3-5s | ~150ms | ~10-30ms | ~160-180ms | βœ… First run only |
| **mixedbread-embeddings** | N/A | N/A | ~50-100ms | ~50-100ms | βœ… Every query |
| **openai-cosine** | N/A | N/A | ~100-200ms | ~100-200ms | βœ… Every query |
| **hf-inference-qwen3** | N/A | N/A | ~150-300ms | ~150-300ms | βœ… Every query |
| **transformers-local-onnx** | $0 | ∞ | 100% free, runs locally |
| **transformers-cosine** | $0 | ∞ | 100% free, runs locally |
| **mixedbread-embeddings** | $0-$ | Generous | Free tier: 150 req/min, 100M tokens/mo |
| **openai-cosine** | $$ | Limited | $0.0001/1K tokens (text-embedding-3-small) |
| **hf-inference-qwen3** | $0 | Generous | Free tier: 1000 req/day |
| **transformers-local-onnx** | all-MiniLM-L6-v2 | 384 | ~58 | Fast, good quality |
| **transformers-cosine** | all-MiniLM-L6-v2 | 384 | ~58 | Same as local |
| **mixedbread-embeddings** | mxbai-embed-large-v1 | 1024 | ~64 | Higher quality |
| **openai-cosine** | text-embedding-3-small | 1536 | ~62 | Reliable, tested |
| **hf-inference-qwen3** | Qwen3-Embedding-8B | 768 | ~65 | Very high quality |
### 🎯 Maximum Accuracy
**Use**: `hf-inference-qwen3-cosine.ts` or `mixedbread-embeddings-cosine.ts`
**Why**:
**Why**:
- No embedding management needed
- Handles storage, search, and embeddings
- Less code to maintain
**Best for**:
- When you want a managed solution
- Don't want to store embeddings yourself
- Prefer APIs over local computation
```typescript
// Before
// import { searchStrategy, generateEmbeddings } from "./openai-cosine.ts";
// After
import { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts";
```
3. Recalculate embeddings (if dimensions differ):
```bash
GET /cache/recalculate?force=true
// Mixedbread Embeddings + Cosine Similarity Strategy
// Uses Mixedbread's Embeddings API for vector generation with local cosine similarity
// Model: mxbai-embed-large-v1 (state-of-the-art English embeddings)
import type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts";
if (!MIXEDBREAD_API_KEY) {
throw new Error("MIXEDBREAD_API_KEY or MXBAI_API_KEY not found - embeddings disabled");
}
}
// Generate embeddings using Mixedbread Embeddings API
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
try {
const client = await getMixedbreadClient();
// Generate embeddings using the SDK's embeddings.create() method
const response = await client.embeddings.create({
model: "mixedbread-ai/mxbai-embed-large-v1",
input: content,
}
console.warn("Unexpected Mixedbread Embeddings API response format:", JSON.stringify(respons
return null;
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.error("Mixedbread embeddings failed:", errorMessage);
if (error instanceof Error && error.stack) {
console.error("Stack trace:", error.stack);
export const searchStrategy: SearchStrategy = {
name: "mixedbread-embeddings-cosine",
description: "Semantic search using Mixedbread Embeddings API (mxbai-embed-large-v1) with loca
search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResul
const limit = options.limit || 10;
// Generate query embedding (Mixedbread API call)
const queryEmbedStart = performance.now();
const queryEmbedding = await generateEmbeddings(query);
if (enableTiming) {
timings.queryEmbedding = performance.now() - queryEmbedStart;
for (const page of pages) {
const pageEmbedding = page.embeddings;
if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) {
continue; // Skip pages without embeddings or wrong dimension
}
snippet: generateSnippet(page.content, queryWords, query.toLowerCase()),
metadata: {
strategy: "mixedbread-embeddings-cosine",
similarity,
embeddingDimension: queryEmbedding.length,
}
// Generate embeddings using local ONNX model (runs locally, no network calls)
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
try {
const pipeline = await getEmbeddingPipeline();
// Generate embeddings - returns a tensor, we need to extract the array
const output = await pipeline(content, {
pooling: 'mean',
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.error("Local ONNX embeddings failed:", errorMessage);
return null;
}
// Generate query embedding (local, no network calls)
const queryEmbedStart = performance.now();
const queryEmbedding = await generateEmbeddings(query);
if (enableTiming) {
timings.queryEmbedding = performance.now() - queryEmbedStart;
for (const page of pages) {
const pageEmbedding = page.embeddings;
if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) {
continue; // Skip pages without embeddings or wrong dimension
}
url: string;
title?: string;
embeddings?: number[] | null;
metadata?: any;
contentHash?: string;
});
// Embeddings are arrays - need to handle them specially
// fast-json emits arrays element by element, so we need to collect them
const embeddingsBuffer = new Map<number, number[]>();
fastJson.on('results[*].embeddings[*]', (value) => {
if (!resultsMap.has(currentIndex)) resultsMap.set(currentIndex, {});
if (!embeddingsBuffer.has(currentIndex)) embeddingsBuffer.set(currentIndex, []);
embeddingsBuffer.get(currentIndex)!.push(value as unknown as number);
});
// When embeddings array is done, store it
fastJson.on('results[*].embeddings', (value) => {
if (!resultsMap.has(currentIndex)) resultsMap.set(currentIndex, {});
// If value is already an array, use it directly (fallback)
if (Array.isArray(value)) {
resultsMap.get(currentIndex)!.embeddings = value;
} else if (embeddingsBuffer.has(currentIndex)) {
// Use collected array elements
resultsMap.get(currentIndex)!.embeddings = embeddingsBuffer.get(currentIndex)!;
embeddingsBuffer.delete(currentIndex);
} else {
resultsMap.get(currentIndex)!.embeddings = value as unknown as number[] | null;
}
});
content: result.content || "", // Content now included in JSON
title: result.title,
embeddings: result.embeddings,
metadata: result.metadata,
}));
content: result.content || "", // Content now included in JSON
title: result.title,
embeddings: result.embeddings,
metadata: result.metadata,
}));
peterqliu
PineconeIndex
Vector db's on Pinecone, with OpenAI embeddings
Public
tmcw
surprisingEmbeddings
Visualizing embedding distances
Public
maxm
emojiVectorEmbeddings
Β 
Public
janpaul123
blogPostEmbeddingsDimensionalityReduction
Β 
Public
janpaul123
compareEmbeddings
Β 
Public

Users

No users found
No docs found