Search

174 results found for embeddings (3465ms)

Code
165

├─ Need the best accuracy?
│ └─ YES → Use mixedbread-embeddings-cosine.ts or openai-cosine.ts
└─ Want managed search (no embeddings management)?
└─ YES → Use jigsawstack-orama.ts or mixedbread.ts
```
| **transformers-local-onnx** | ~150ms | ~50ms | ~10-30ms | **~60-80ms** | ❌ No |
| **transformers-cosine** | ~3-5s | ~150ms | ~10-30ms | ~160-180ms | ✅ First run only |
| **mixedbread-embeddings** | N/A | N/A | ~50-100ms | ~50-100ms | ✅ Every query |
| **openai-cosine** | N/A | N/A | ~100-200ms | ~100-200ms | ✅ Every query |
| **hf-inference-qwen3** | N/A | N/A | ~150-300ms | ~150-300ms | ✅ Every query |
| **transformers-local-onnx** | $0 | ∞ | 100% free, runs locally |
| **transformers-cosine** | $0 | ∞ | 100% free, runs locally |
| **mixedbread-embeddings** | $0-$ | Generous | Free tier: 150 req/min, 100M tokens/mo |
| **openai-cosine** | $$ | Limited | $0.0001/1K tokens (text-embedding-3-small) |
| **hf-inference-qwen3** | $0 | Generous | Free tier: 1000 req/day |
| **transformers-local-onnx** | all-MiniLM-L6-v2 | 384 | ~58 | Fast, good quality |
| **transformers-cosine** | all-MiniLM-L6-v2 | 384 | ~58 | Same as local |
| **mixedbread-embeddings** | mxbai-embed-large-v1 | 1024 | ~64 | Higher quality |
| **openai-cosine** | text-embedding-3-small | 1536 | ~62 | Reliable, tested |
| **hf-inference-qwen3** | Qwen3-Embedding-8B | 768 | ~65 | Very high quality |
### 🎯 Maximum Accuracy
**Use**: `hf-inference-qwen3-cosine.ts` or `mixedbread-embeddings-cosine.ts`
**Why**:
**Why**:
- No embedding management needed
- Handles storage, search, and embeddings
- Less code to maintain
**Best for**:
- When you want a managed solution
- Don't want to store embeddings yourself
- Prefer APIs over local computation
```typescript
// Before
// import { searchStrategy, generateEmbeddings } from "./openai-cosine.ts";
// After
import { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts";
```
3. Recalculate embeddings (if dimensions differ):
```bash
GET /cache/recalculate?force=true
// Mixedbread Embeddings + Cosine Similarity Strategy
// Uses Mixedbread's Embeddings API for vector generation with local cosine similarity
// Model: mxbai-embed-large-v1 (state-of-the-art English embeddings)
import type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts";
if (!MIXEDBREAD_API_KEY) {
throw new Error("MIXEDBREAD_API_KEY or MXBAI_API_KEY not found - embeddings disabled");
}
}
// Generate embeddings using Mixedbread Embeddings API
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
try {
const client = await getMixedbreadClient();
// Generate embeddings using the SDK's embeddings.create() method
const response = await client.embeddings.create({
model: "mixedbread-ai/mxbai-embed-large-v1",
input: content,
}
console.warn("Unexpected Mixedbread Embeddings API response format:", JSON.stringify(respons
return null;
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.error("Mixedbread embeddings failed:", errorMessage);
if (error instanceof Error && error.stack) {
console.error("Stack trace:", error.stack);
export const searchStrategy: SearchStrategy = {
name: "mixedbread-embeddings-cosine",
description: "Semantic search using Mixedbread Embeddings API (mxbai-embed-large-v1) with loca
search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResul
const limit = options.limit || 10;
// Generate query embedding (Mixedbread API call)
const queryEmbedStart = performance.now();
const queryEmbedding = await generateEmbeddings(query);
if (enableTiming) {
timings.queryEmbedding = performance.now() - queryEmbedStart;
for (const page of pages) {
const pageEmbedding = page.embeddings;
if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) {
continue; // Skip pages without embeddings or wrong dimension
}
snippet: generateSnippet(page.content, queryWords, query.toLowerCase()),
metadata: {
strategy: "mixedbread-embeddings-cosine",
similarity,
embeddingDimension: queryEmbedding.length,
}
// Generate embeddings using local ONNX model (runs locally, no network calls)
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
try {
const pipeline = await getEmbeddingPipeline();
// Generate embeddings - returns a tensor, we need to extract the array
const output = await pipeline(content, {
pooling: 'mean',
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.error("Local ONNX embeddings failed:", errorMessage);
return null;
}
// Generate query embedding (local, no network calls)
const queryEmbedStart = performance.now();
const queryEmbedding = await generateEmbeddings(query);
if (enableTiming) {
timings.queryEmbedding = performance.now() - queryEmbedStart;
for (const page of pages) {
const pageEmbedding = page.embeddings;
if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) {
continue; // Skip pages without embeddings or wrong dimension
}
url: string;
title?: string;
embeddings?: number[] | null;
metadata?: any;
contentHash?: string;
});
// Embeddings are arrays - need to handle them specially
// fast-json emits arrays element by element, so we need to collect them
const embeddingsBuffer = new Map<number, number[]>();
fastJson.on('results[*].embeddings[*]', (value) => {
if (!resultsMap.has(currentIndex)) resultsMap.set(currentIndex, {});
if (!embeddingsBuffer.has(currentIndex)) embeddingsBuffer.set(currentIndex, []);
embeddingsBuffer.get(currentIndex)!.push(value as unknown as number);
});
// When embeddings array is done, store it
fastJson.on('results[*].embeddings', (value) => {
if (!resultsMap.has(currentIndex)) resultsMap.set(currentIndex, {});
// If value is already an array, use it directly (fallback)
if (Array.isArray(value)) {
resultsMap.get(currentIndex)!.embeddings = value;
} else if (embeddingsBuffer.has(currentIndex)) {
// Use collected array elements
resultsMap.get(currentIndex)!.embeddings = embeddingsBuffer.get(currentIndex)!;
embeddingsBuffer.delete(currentIndex);
} else {
resultsMap.get(currentIndex)!.embeddings = value as unknown as number[] | null;
}
});
content: result.content || "", // Content now included in JSON
title: result.title,
embeddings: result.embeddings,
metadata: result.metadata,
}));
content: result.content || "", // Content now included in JSON
title: result.title,
embeddings: result.embeddings,
metadata: result.metadata,
}));
import { generateSnippet } from "./utils.ts";
// Fake embeddings function for placeholder (not actually used, but kept for interface consisten
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
// Not used in placeholder strategy, but kept for consistency
// Returns fake embeddings for caching purposes
console.debug("Generating fake embeddings for placeholder strategy");
const fakeEmbedding: number[] = [];
content: string;
title?: string;
embeddings?: number[] | null;
metadata?: any;
};
// JigsawStack + Orama Strategy: Semantic search using JigsawStack embeddings and Orama
import type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts";
import { create, insertMultiple, search } from "npm:@orama/orama@latest";
// JigsawStack embeddings function (v2 API)
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
const JIGSAWSTACK_API_KEY = Deno.env.get("JIGSAWSTACK_API_KEY");
const JIGSAWSTACK_API_URL = "https://api.jigsawstack.com/v2/embedding";
if (!JIGSAWSTACK_API_KEY) {
console.warn("JIGSAWSTACK_API_KEY not found - embeddings disabled");
return null;
}
const data = await response.json();
// JigsawStack v2 returns: { success: true, embeddings: [[...]], chunks: [...], _usage: {...
// embeddings is an array of arrays, we want the first embedding vector
if (data.success && data.embeddings && Array.isArray(data.embeddings) && data.embeddings.len
return data.embeddings[0]; // Return the first embedding vector
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.error("JigsawStack embeddings failed:", errorMessage);
return null;
}
export const searchStrategy: SearchStrategy = {
name: "jigsawstack-orama",
description: "Semantic search using JigsawStack embeddings with Orama vector search",
search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResul
const limit = options.limit || 10;
// Generate query embedding (this is the main bottleneck - external API call)
const queryEmbedStart = performance.now();
const queryEmbedding = await generateEmbeddings(query);
if (enableTiming) timings.queryEmbedding = performance.now() - queryEmbedStart;
}
// Determine embedding dimension from query (or first page with embeddings)
const embeddingDimension = queryEmbedding.length;
if (embeddingDimension === 0) {
if (enableTiming) timings.dbCreate = performance.now() - dbCreateStart;
// Prepare documents with embeddings for insertion
const documents: Array<{
path: string;
}> = [];
// Collect pages with embeddings
const docPrepStart = performance.now();
for (const page of pages) {
// Use cached embeddings if available, otherwise generate
let pageEmbedding = page.embeddings;
if (!pageEmbedding) {
// Generate embedding if not cached (should be cached from recalculate, but handle missi
pageEmbedding = await generateEmbeddings(page.content);
if (!pageEmbedding || pageEmbedding.length !== embeddingDimension) {
continue; // Skip pages without embeddings or wrong dimension
}
}
// OpenAI + Orama Strategy: Semantic search using OpenAI embeddings and Orama
// Faster than JigsawStack (~100-200ms vs ~550ms for query embeddings)
import type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts";
import { create, insertMultiple, search } from "npm:@orama/orama@latest";
// OpenAI embeddings function
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY");
const OPENAI_API_URL = "https://api.openai.com/v1/embeddings";
if (!OPENAI_API_KEY) {
console.warn("OPENAI_API_KEY not found - embeddings disabled");
return null;
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.error("OpenAI embeddings failed:", errorMessage);
return null;
}
};
// No caching for query embeddings - we want to benchmark actual API performance
export const searchStrategy: SearchStrategy = {
name: "openai-orama",
description: "Semantic search using OpenAI embeddings with Orama vector search (faster than Ji
search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResul
const limit = options.limit || 10;
// Generate query embedding (no caching - benchmarking actual API performance)
const queryEmbedStart = performance.now();
const queryEmbedding = await generateEmbeddings(query);
if (enableTiming) {
timings.queryEmbedding = performance.now() - queryEmbedStart;
}
// Determine embedding dimension from query (or first page with embeddings)
const embeddingDimension = queryEmbedding.length;
if (embeddingDimension === 0) {
if (enableTiming) timings.dbCreate = performance.now() - dbCreateStart;
// Prepare documents with embeddings for insertion
const documents: Array<{
path: string;
}> = [];
// Collect pages with embeddings
const docPrepStart = performance.now();
for (const page of pages) {
// Use cached embeddings if available, otherwise generate
let pageEmbedding = page.embeddings;
if (!pageEmbedding) {
// Generate embedding if not cached (should be cached from recalculate, but handle missi
pageEmbedding = await generateEmbeddings(page.content);
if (!pageEmbedding || pageEmbedding.length !== embeddingDimension) {
continue; // Skip pages without embeddings or wrong dimension
}
}
tmcw
surprisingEmbeddings
Visualizing embedding distances
Public
maxm
emojiVectorEmbeddings
 
Public
janpaul123
blogPostEmbeddingsDimensionalityReduction
 
Public
janpaul123
compareEmbeddings
 
Public
yawnxyz
embeddingsSearchExample
 
Public

Users

No users found
No docs found