Search
Code168
} from "./utils.ts";import { generatePageMetadata } from "./groq.ts";import { searchPages, getActiveEmbeddingsFunction, type Page } from "./search/index.ts";import { answerQuestion, answerStrategy } from "./answer/index.ts";import { urls } from "./urls.ts"; content?: string; title?: string; embeddings?: number[] | null; metadata?: any; frontmatter?: any; content?: string; title?: string; embeddings?: number[] | null; metadata?: any; frontmatter?: any; cacheMap[result.url] = { title: result.title, embeddings: result.embeddings || null, metadata: result.metadata || null, }; const frontmatter = cached.frontmatter; // On Val Town, SQLite cache already has everything we need // Content is available, embeddings are available - no need to fetch! return { path, content: cached.content, // Already cached in SQLite, no fetch needed! title: frontmatter?.title || path, embeddings: cached.embeddings, // Already cached in SQLite metadata: cached.metadata, }; if (jsonCache && jsonCache[url]) { const jsonData = jsonCache[url]; // JSON cache has embeddings, title, metadata from recalculate // We skip fetching content to speed up search - content is only for snippets // NO NETWORK CALLS - everything is from the JSON cache! if (jsonData.embeddings && Array.isArray(jsonData.embeddings) && jsonData.embeddings.length > 0) { // Use cached data - NO network fetch! This is why it's fast (was 6.5s, now 0.16ms) return { content: '', // Skipped - only needed for snippets, can fetch lazily if needed title: jsonData.title || path, embeddings: jsonData.embeddings, // From JSON cache (from recalculate) metadata: jsonData.metadata || null, }; } else { // Missing embeddings - this shouldn't happen if recalculate ran successfully uncachedUrls.push(url); } } // If not cached anywhere OR cached data has no embeddings, skip it // (search requires embeddings to work, and we NEVER fetch live data - that's the point!) return null; }) } // Generate embeddings using active strategy's embeddings function const generateEmbeddings = getActiveEmbeddingsFunction(); const embeddings = await generateEmbeddings(pageData.content); // Cache it with metadata, hash, and embeddings - track errors const cacheError = await setCache(url, { content: pageData.content, metadata, contentHash, embeddings, }); if (cacheError) { console.warn(`Completed ${path} (metadata: ${metadata ? 'yes' : 'no'}, embeddings: ${embeddings ? 'yes' : 'no'}, cache error: ${cacheError})`); } else { console.log(`Completed ${path} (metadata: ${metadata ? 'yes' : 'no'}, embeddings: ${embeddings ? 'yes' : 'no'}, cached: yes)`); } } // Content is now included in latest.json from recalculate console.log(`\n✅ Content loaded from JSON (${cachedData.results?.length || 0} pages with embeddings and content)\n`); // Load test questions// Hugging Face Inference Client + Qwen3-Embedding-8B + Cosine Similarity Strategy// Uses @huggingface/inference SDK for embeddings via API// Model: Qwen/Qwen3-Embedding-8B (8B params, up to 4096 dimensions, 32k context) if (!HF_TOKEN) { throw new Error("HF_TOKEN, HF_API_KEY, or HUGGINGFACE_API_KEY not found - embeddings disabled"); } }// Generate embeddings using Hugging Face Inference Clientexport const generateEmbeddings = async (content: string): Promise<number[] | null> => { try { const client = await getInferenceClient(); // The API typically returns a nested array or flat array if (Array.isArray(output)) { // Handle nested array format [[...embeddings...]] if (Array.isArray(output[0])) { return output[0]; // Return first (and typically only) embedding vector } // Handle flat array format [...embeddings...] return output; } // Handle object format with embeddings property if (output && typeof output === 'object') { if (output.embeddings && Array.isArray(output.embeddings)) { return Array.isArray(output.embeddings[0]) ? output.embeddings[0] : output.embeddings; } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error("HF Inference Client embeddings failed:", errorMessage); return null; } // Generate query embedding (HF Inference API call) const queryEmbedStart = performance.now(); const queryEmbedding = await generateEmbeddings(query); if (enableTiming) { timings.queryEmbedding = performance.now() - queryEmbedStart; for (const page of pages) { const pageEmbedding = page.embeddings; if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) { continue; // Skip pages without embeddings or wrong dimension } // Cloudflare Workers AI BGE-Large Strategy// Uses Cloudflare's @cf/baai/bge-large-en-v1.5 model for embeddings with local cosine similarityimport type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts";}export const generateEmbeddings = async (content: string): Promise<number[] | null> => { try { const { accountId, apiToken } = getCloudflareConfig(); const responseData = await response.json(); // Cloudflare wraps the response in: { result: { shape: [1, 1024], data: [[...embeddings...]] }, success: true, errors: [], messages: [] } if (responseData.result && responseData.result.data && Array.isArray(responseData.result.data) && responseData.result.data.length > 0) { const embedding = responseData.result.data[0]; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error("Cloudflare embeddings failed:", errorMessage); if (error instanceof Error && error.stack) { console.error("Stack trace:", error.stack); // Generate query embedding const queryEmbedStart = performance.now(); const queryEmbedding = await generateEmbeddings(query); if (enableTiming) { timings.queryEmbedding = performance.now() - queryEmbedStart; for (const page of pages) { const pageEmbedding = page.embeddings; if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) { continue;// Mixedbread Stores Strategy: Managed AI Search Service// Uses Mixedbread's Store API for document storage and semantic search// No local embeddings needed - Mixedbread handles everythingimport type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts";// This function is required for compatibility with the recalculation system// For Mixedbread managed Store, embeddings are handled by Mixedbread internally// Return a dummy array to satisfy the recalculation script// The actual embeddings are generated and stored by Mixedbread when documents are uploadedexport const generateEmbeddings = async (_content: string): Promise<number[] | null> => { // Return a dummy embedding array to satisfy recalculation // The recalculation script requires this, but for Mixedbread Store, // you should use `deno task recalc-mxbai` instead, which uploads docs to Mixedbread return [0]; // Dummy value - actual embeddings are handled by Mixedbread Store};export const searchStrategy: SearchStrategy = { name: "mixedbread", description: "Managed AI search using Mixedbread Stores (handles storage, embeddings, and search)", search: async (query: string, _pages: Page[], options: SearchOptions = {}): Promise<SearchResult[]> => { const limit = options.limit || 10;```typescriptimport { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts";```| **transformers-local-onnx** ⭐ | ~60-80ms | Free | Download model | Production || **transformers-cosine** | ~160-180ms | Free | None (auto-download) | Development || **mixedbread-embeddings** | ~50-100ms | Free tier | API key | High accuracy || **openai-cosine** | ~100-200ms | Paid | API key | Reliability || **hf-inference-qwen3** | ~150-300ms | Free tier | API key | Best accuracy |- **`transformers-local-onnx.ts`** - Local ONNX models (fastest, recommended)- **`transformers-cosine.ts`** - Auto-download ONNX models- **`mixedbread-embeddings-cosine.ts`** - Mixedbread API + local cosine- **`openai-cosine.ts`** - OpenAI embeddings + local cosine- **`hf-inference-qwen3-cosine.ts`** - HuggingFace Qwen3-8B embeddings- **`cloudflare-bge-cosine.ts`** - Cloudflare Workers AI- **`jigsawstack-orama.ts`** - JigsawStack managed search- **`mixedbread.ts`** - Mixedbread Stores (managed)- **`placeholder.ts`** - Fake embeddings for testing## Documentation**Returns**: Array of search results sorted by relevance### Generate Embeddings```typescriptasync function generateEmbeddings( content: string): Promise<number[] | null>Generate Query Embedding (10-30ms) ↓Compare with Page Embeddings (cosine similarity, <1ms per page) ↓Sort by Similarity1. **Use local ONNX models** for production (fastest, most reliable)2. **Pre-calculate embeddings** during recalculation (don't generate at query time)3. **Cache the pipeline** (automatically done, but worth noting)4. **Use quantized models** if memory is constrained (set `USE_QUANTIZED = true`)Check the import in `search/index.ts`:```typescriptimport { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts";``````This regenerates all embeddings with the new strategy.## Contributing }; export const generateEmbeddings = async (content: string) => { // Generate embeddings }; ``````You can then use the model to compute embeddings like this:```jsconst extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');// Compute sentence embeddingsconst sentences = ['This is an example sentence', 'Each sentence is converted'];const output = await extractor(sentences, { pooling: 'mean', normalize: true }); "intermediate_size": 1536, "layer_norm_eps": 1e-12, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, echo "Next steps:" echo "1. Update search/index.ts to use the local ONNX strategy:" echo " import { searchStrategy, generateEmbeddings } from \"./transformers-local-onnx.ts\";" echo "" echo "2. Run your application - the model will load from local files!"Users
No users found
No docs found