Search

178 results found for embeddings (3701ms)

Code
168

} from "./utils.ts";
import { generatePageMetadata } from "./groq.ts";
import { searchPages, getActiveEmbeddingsFunction, type Page } from "./search/index.ts";
import { answerQuestion, answerStrategy } from "./answer/index.ts";
import { urls } from "./urls.ts";
content?: string;
title?: string;
embeddings?: number[] | null;
metadata?: any;
frontmatter?: any;
content?: string;
title?: string;
embeddings?: number[] | null;
metadata?: any;
frontmatter?: any;
cacheMap[result.url] = {
title: result.title,
embeddings: result.embeddings || null,
metadata: result.metadata || null,
};
const frontmatter = cached.frontmatter;
// On Val Town, SQLite cache already has everything we need
// Content is available, embeddings are available - no need to fetch!
return {
path,
content: cached.content, // Already cached in SQLite, no fetch needed!
title: frontmatter?.title || path,
embeddings: cached.embeddings, // Already cached in SQLite
metadata: cached.metadata,
};
if (jsonCache && jsonCache[url]) {
const jsonData = jsonCache[url];
// JSON cache has embeddings, title, metadata from recalculate
// We skip fetching content to speed up search - content is only for snippets
// NO NETWORK CALLS - everything is from the JSON cache!
if (jsonData.embeddings && Array.isArray(jsonData.embeddings) && jsonData.embeddings.len
// Use cached data - NO network fetch! This is why it's fast (was 6.5s, now 0.16ms)
return {
content: '', // Skipped - only needed for snippets, can fetch lazily if needed
title: jsonData.title || path,
embeddings: jsonData.embeddings, // From JSON cache (from recalculate)
metadata: jsonData.metadata || null,
};
} else {
// Missing embeddings - this shouldn't happen if recalculate ran successfully
uncachedUrls.push(url);
}
}
// If not cached anywhere OR cached data has no embeddings, skip it
// (search requires embeddings to work, and we NEVER fetch live data - that's the point!)
return null;
})
}
// Generate embeddings using active strategy's embeddings function
const generateEmbeddings = getActiveEmbeddingsFunction();
const embeddings = await generateEmbeddings(pageData.content);
// Cache it with metadata, hash, and embeddings - track errors
const cacheError = await setCache(url, {
content: pageData.content,
metadata,
contentHash,
embeddings,
});
if (cacheError) {
mpleted ${path} (metadata: ${metadata ? 'yes' : 'no'}, embeddings: ${embeddings ? 'yes' : 'no'},
} else {
mpleted ${path} (metadata: ${metadata ? 'yes' : 'no'}, embeddings: ${embeddings ? 'yes' : 'no'},
}
}
// Content is now included in latest.json from recalculate
om JSON (${cachedData.results?.length || 0} pages with embeddings and content)\n`);
// Load test questions
// Hugging Face Inference Client + Qwen3-Embedding-8B + Cosine Similarity Strategy
// Uses @huggingface/inference SDK for embeddings via API
// Model: Qwen/Qwen3-Embedding-8B (8B params, up to 4096 dimensions, 32k context)
if (!HF_TOKEN) {
_TOKEN, HF_API_KEY, or HUGGINGFACE_API_KEY not found - embeddings disabled");
}
}
// Generate embeddings using Hugging Face Inference Client
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
try {
const client = await getInferenceClient();
// The API typically returns a nested array or flat array
if (Array.isArray(output)) {
// Handle nested array format [[...embeddings...]]
if (Array.isArray(output[0])) {
return output[0]; // Return first (and typically only) embedding vector
}
// Handle flat array format [...embeddings...]
return output;
}
// Handle object format with embeddings property
if (output && typeof output === 'object') {
if (output.embeddings && Array.isArray(output.embeddings)) {
return Array.isArray(output.embeddings[0]) ? output.embeddings[0] : output.embeddings;
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.error("HF Inference Client embeddings failed:", errorMessage);
return null;
}
// Generate query embedding (HF Inference API call)
const queryEmbedStart = performance.now();
const queryEmbedding = await generateEmbeddings(query);
if (enableTiming) {
timings.queryEmbedding = performance.now() - queryEmbedStart;
for (const page of pages) {
const pageEmbedding = page.embeddings;
if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) {
continue; // Skip pages without embeddings or wrong dimension
}
// Cloudflare Workers AI BGE-Large Strategy
Uses Cloudflare's @cf/baai/bge-large-en-v1.5 model for embeddings with local cosine similarity
import type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts";
}
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
try {
const { accountId, apiToken } = getCloudflareConfig();
const responseData = await response.json();
response in: { result: { shape: [1, 1024], data: [[...embeddings...]] }, success: true, errors:
if (responseData.result && responseData.result.data && Array.isArray(responseData.result.dat
const embedding = responseData.result.data[0];
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.error("Cloudflare embeddings failed:", errorMessage);
if (error instanceof Error && error.stack) {
console.error("Stack trace:", error.stack);
// Generate query embedding
const queryEmbedStart = performance.now();
const queryEmbedding = await generateEmbeddings(query);
if (enableTiming) {
timings.queryEmbedding = performance.now() - queryEmbedStart;
for (const page of pages) {
const pageEmbedding = page.embeddings;
if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) {
continue;
// Mixedbread Stores Strategy: Managed AI Search Service
// Uses Mixedbread's Store API for document storage and semantic search
// No local embeddings needed - Mixedbread handles everything
import type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts";
// This function is required for compatibility with the recalculation system
// For Mixedbread managed Store, embeddings are handled by Mixedbread internally
// Return a dummy array to satisfy the recalculation script
// The actual embeddings are generated and stored by Mixedbread when documents are uploaded
export const generateEmbeddings = async (_content: string): Promise<number[] | null> => {
// Return a dummy embedding array to satisfy recalculation
// The recalculation script requires this, but for Mixedbread Store,
// you should use `deno task recalc-mxbai` instead, which uploads docs to Mixedbread
return [0]; // Dummy value - actual embeddings are handled by Mixedbread Store
};
export const searchStrategy: SearchStrategy = {
name: "mixedbread",
ed AI search using Mixedbread Stores (handles storage, embeddings, and search)",
search: async (query: string, _pages: Page[], options: SearchOptions = {}): Promise<SearchResu
const limit = options.limit || 10;
```typescript
import { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts";
```
| **transformers-local-onnx** ⭐ | ~60-80ms | Free | Download model | Production |
| **transformers-cosine** | ~160-180ms | Free | None (auto-download) | Development |
| **mixedbread-embeddings** | ~50-100ms | Free tier | API key | High accuracy |
| **openai-cosine** | ~100-200ms | Paid | API key | Reliability |
| **hf-inference-qwen3** | ~150-300ms | Free tier | API key | Best accuracy |
- **`transformers-local-onnx.ts`** - Local ONNX models (fastest, recommended)
- **`transformers-cosine.ts`** - Auto-download ONNX models
- **`mixedbread-embeddings-cosine.ts`** - Mixedbread API + local cosine
- **`openai-cosine.ts`** - OpenAI embeddings + local cosine
- **`hf-inference-qwen3-cosine.ts`** - HuggingFace Qwen3-8B embeddings
- **`cloudflare-bge-cosine.ts`** - Cloudflare Workers AI
- **`jigsawstack-orama.ts`** - JigsawStack managed search
- **`mixedbread.ts`** - Mixedbread Stores (managed)
- **`placeholder.ts`** - Fake embeddings for testing
## Documentation
**Returns**: Array of search results sorted by relevance
### Generate Embeddings
```typescript
async function generateEmbeddings(
content: string
): Promise<number[] | null>
Generate Query Embedding (10-30ms)
Compare with Page Embeddings (cosine similarity, <1ms per page)
Sort by Similarity
1. **Use local ONNX models** for production (fastest, most reliable)
2. **Pre-calculate embeddings** during recalculation (don't generate at query time)
3. **Cache the pipeline** (automatically done, but worth noting)
4. **Use quantized models** if memory is constrained (set `USE_QUANTIZED = true`)
Check the import in `search/index.ts`:
```typescript
import { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts";
```
```
This regenerates all embeddings with the new strategy.
## Contributing
};
export const generateEmbeddings = async (content: string) => {
// Generate embeddings
};
```
```
You can then use the model to compute embeddings like this:
```js
const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
// Compute sentence embeddings
const sentences = ['This is an example sentence', 'Each sentence is converted'];
const output = await extractor(sentences, { pooling: 'mean', normalize: true });
"intermediate_size": 1536,
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"num_attention_heads": 12,
echo "Next steps:"
echo "1. Update search/index.ts to use the local ONNX strategy:"
echo " import { searchStrategy, generateEmbeddings } from \"./transformers-local-onnx.ts\"
echo ""
echo "2. Run your application - the model will load from local files!"
```typescript
import { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts";
```
peterqliu
PineconeIndex
Vector db's on Pinecone, with OpenAI embeddings
Public
tmcw
surprisingEmbeddings
Visualizing embedding distances
Public
maxm
emojiVectorEmbeddings
 
Public
janpaul123
blogPostEmbeddingsDimensionalityReduction
 
Public
janpaul123
compareEmbeddings
 
Public

Users

No users found
No docs found