Search
Code165
} // Load actual pages with embeddings for search const loadPagesStart = performance.now(); const pagesWithEmbeddings = await getAllPagesForSearch(true); // Enable timing timings.loadPages = performance.now() - loadPagesStart; console.log(`⏱️ Total pages load duration: ${timings.loadPages?.toFixed(2)}ms\n`); const queryStart = performance.now(); const testResult = await runSearchTest(query, pagesWithEmbeddings, { limit: 10, minScore: 0,- Fetches documentation pages from Groq's console- Caches page content, metadata, token counts, and embeddings in SQLite- Token counting using tiktoken (GPT-4 encoding)- AI-generated metadata (categories, tags, use cases, sample questions)- Content embeddings generation with multiple strategies (local ONNX, Transformers.js, API-based)- Semantic search with configurable strategies (embeddings + cosine similarity)- **RAG-based question answering** with configurable answer strategies (search + LLM)- Hash-based change detection to skip unchanged pages during recalculation- Calculate token counts for each page- Generate AI metadata (categories, tags, use cases, questions)- Generate embeddings for each page- Calculate content hashes for change detection- Store everything in the SQLite cache3. **Content updates** - Documentation pages have been updated and you want fresh data4. **Token count needed** - You need accurate token counts for new content5. **Metadata refresh** - You want to regenerate AI metadata or embeddings### 🔄 Default Mode (Smart Recalculation)- **Skips pages with unchanged content** (saves time and API calls)- Only processes pages that have changed- Still generates embeddings and metadata for changed pages**Response includes:****Use cases:**- Regenerating all metadata/embeddings even if content unchanged- After updating metadata generation prompts- When you want to ensure everything is fresh- Uses cached pages when available for faster results**Note**: Currently uses embeddings-based semantic search. Multiple strategies available (see Search section).#### `GET /answer`#### `GET /cache/recalculate`Recalculate pages with AI metadata and embeddings generation.**Query Parameters:**- Calculates token counts- Generates AI metadata (categories, tags, use cases, questions)- Generates embeddings (currently fake, ready for Groq API)- Calculates content hashes for change detection- Stores everything in cache metadata TEXT, contentHash TEXT, embeddings TEXT, cachedAt INTEGER NOT NULL)- `metadata` - AI-generated metadata (categories, tags, use cases, questions)- `contentHash` - SHA-256 hash of content (for change detection)- `embeddings` - Content embeddings vector (JSON array)- `cachedAt` - Timestamp when cached2. Activate in `search/index.ts`: ```typescript import { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts"; ```| Strategy | File | Speed | Cost | Pros ||----------|------|-------|------|------|| **Mixedbread** | `mixedbread-embeddings-cosine.ts` | ~50-100ms | Free tier | High quality, 1024 dims || **OpenAI** | `openai-cosine.ts` | ~100-200ms | Paid | High quality, reliable || **HuggingFace** | `hf-inference-qwen3-cosine.ts` | ~150-300ms | Free tier | Qwen3-8B model |```typescript// Comment out current strategy// import { searchStrategy, generateEmbeddings } from "./transformers-cosine.ts";// Uncomment desired strategyimport { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts";```### Current Implementation (Semantic Search)The search system uses semantic embeddings for intelligent search:- Understands meaning, not just keywords- Finds relevant results even with different wording1. **Embedding Generation**: Content is converted to 384-dimensional vectors2. **Cosine Similarity**: Query embeddings compared against page embeddings3. **Ranking**: Results sorted by similarity score4. **Snippet Generation**: Context-aware snippets around relevant content- Adjust system prompts## EmbeddingsContent embeddings are generated for each page using the active search strategy (see Search section above).**Current Default**: Local ONNX models (`transformers-local-onnx.ts`)- Storage: Cached as JSON arrays in SQLiteEmbeddings are:- Generated during `/cache/recalculate`- Stored in cache for fast retrievaldeno task recalc-f# Recalculate with Mixedbread embeddings strategydeno task recalc-mxbai# Force recalculation with Mixedbread embeddingsdeno task recalc-mxbai-f```} from "../utils.ts";import { generatePageMetadata } from "../groq.ts";import { getActiveEmbeddingsFunction } from "../search/index.ts";import { saveTestData } from "../main.tsx"; title?: string; metadata?: any; embeddings?: number[] | null;}> | null> { if (IS_VALTOWN) { title?: string; metadata?: any; embeddings?: number[] | null; }> = {}; title: result.title, metadata: result.metadata, embeddings: result.embeddings || null, }; } metadata?: any; contentHash?: string; embeddings?: number[] | null; error?: string; cacheError?: string; metadata: cached.metadata || undefined, contentHash: cachedHash, embeddings: cached.embeddings || null, skipped: true, reason: "Content unchanged (hash matches)", metadata: cachedFromJson.metadata || undefined, contentHash: cachedHash, embeddings: cachedFromJson.embeddings || null, skipped: true, reason: "Content unchanged (hash matches)", } // Generate embeddings using active strategy's embeddings function const generateEmbeddings = getActiveEmbeddingsFunction(); let embeddings: number[] | null = null; let embeddingsError: string | undefined = undefined; try { embeddings = await generateEmbeddings(pageData.content); if (!embeddings) { embeddingsError = "Embeddings generation returned null"; console.error(` ❌ Embeddings generation failed for ${path}: ${embeddingsError}`); } } catch (embedError) { const embedErrorMessage = embedError instanceof Error ? embedError.message : String(embedError); embeddingsError = `Embeddings generation threw error: ${embedErrorMessage}`; console.error(` ❌ Embeddings generation failed for ${path}: ${embeddingsError}`); // Don't throw - continue with null embeddings, but log the error } // Cache it with metadata, hash, and embeddings - track errors const cacheError = await setCache(url, { content: pageData.content, metadata, contentHash, embeddings, }); metadata, contentHash, embeddings: embeddings || null, ...(cacheError && { cacheError }), ...(embeddingsError && { embeddingsError }), }); const statusParts = [ `metadata: ${metadata ? 'yes' : 'no'}`, `embeddings: ${embeddings ? 'yes' : 'no'}`, ]; if (embeddingsError) { statusParts.push(`embeddings error: ${embeddingsError.substring(0, 50)}...`); } if (cacheError) { const status = statusParts.join(', '); if (embeddingsError || cacheError) { console.warn(`Completed ${path} (${status})`); } else {import type { Page, SearchOptions, SearchResult } from "./types.ts";// import { searchStrategy, generateEmbeddings } from "./placeholder.ts";// import { searchStrategy, generateEmbeddings } from "./jigsawstack-orama.ts"; // ~550ms query embeddings (no API key needed, works on Val Town)// import { searchStrategy, generateEmbeddings } from "./openai-orama.ts"; // ~100-200ms query embeddings (API), ~29ms DB overhead// import { searchStrategy, generateEmbeddings } from "./openai-cosine.ts"; // ~100-200ms query embeddings (API), <1ms cosine calc// import { searchStrategy, generateEmbeddings } from "./mixedbread-embeddings-cosine.ts"; // Mixedbread Embeddings API (mxbai-embed-large-v1) with local cosine similarity// import { searchStrategy, generateEmbeddings } from "./mixedbread.ts"; // Mixedbread Stores (managed search service, handles storage + embeddings + search)// import { searchStrategy, generateEmbeddings } from "./hf-inference-qwen3-cosine.ts"; // HF Inference Client with Qwen3-Embedding-8B (API, requires HF_TOKEN)import { searchStrategy, generateEmbeddings } from "./cloudflare-bge-cosine.ts"; // Cloudflare Workers AI (bge-large-en-v1.5, 1024-dim) with local cosine similarity// import { searchStrategy, generateEmbeddings } from "./transformers-cosine.ts"; // ~10-30ms (local embeddings, ~23MB model cached after first load)// import { searchStrategy, generateEmbeddings } from "./transformers-local-onnx.ts"; // ~10-30ms (pre-downloaded local ONNX models, NO downloads, NO isolate loading)// Main search function - comment/uncomment imports above to switch strategiesexport { searchStrategy };// Embeddings function for recalculate - uses same import as search strategyexport const getActiveEmbeddingsFunction = (): ((content: string) => Promise<number[] | null>) => { return generateEmbeddings;};// Transformers.js + Cosine Similarity Strategy: Local embeddings in browser/Deno// Fastest option - no API calls, runs entirely client-side// Model: all-MiniLM-L6-v2 (~86MB, ~384 dimensions, cached locally after first load) // Load feature extraction pipeline with all-MiniLM-L6-v2 model // This is a small, fast model optimized for embeddings (~86MB, ~384 dims) // Downloads from Hugging Face on first run, then uses cache (~150ms vs several seconds) const pipelineStart = performance.now();}// Generate embeddings using transformers.js (runs locally, no API calls)export const generateEmbeddings = async (content: string): Promise<number[] | null> => { try { const pipeline = await getEmbeddingPipeline(); // Generate embeddings - returns a tensor, we need to extract the array const output = await pipeline(content, { pooling: 'mean', } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error("Transformers.js embeddings failed:", errorMessage); return null; }export const searchStrategy: SearchStrategy = { name: "transformers-cosine", description: "Semantic search using Transformers.js local embeddings with cosine similarity (fastest, no API calls)", search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResult[]> => { const limit = options.limit || 10; // Generate query embedding (local, no API call) const queryEmbedStart = performance.now(); const queryEmbedding = await generateEmbeddings(query); if (enableTiming) { timings.queryEmbedding = performance.now() - queryEmbedStart; for (const page of pages) { const pageEmbedding = page.embeddings; if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) { continue; // Skip pages without embeddings or wrong dimension } - for TINY DATA SETS you do very different things than massive data sets; vectorize, mixedbread, turbopuffer are all set up for MASSIVE data sets- doing 2sec responses is EASY; sub 1-sec prob requires vps- network hops + isolate warmups kill latency - cosine dist calc and embeddings calc don't matter as much- small/cheap embeddings seem to not be a problem (?? quality??)- generating embeddings locally w/ a small model requires downloading a model (size+computationally expensive) while generating embeddings from an API costs around ~600ms- loading an 80mb xenova--all-miniLM is really good/fast but you it sucks for serverless and for mobile users to download that - possibly best way is to host this somewhere w/ the onnx and everything saved locally and you can just run it- cloudflare ai embeddings are very fast, but worker needs to be warmed up; if not warm then expect ~800ms-2000ms - if you ping the cf ai embeddings with a fake request for warmup then it's fast (e.g. while a user is typing)- for testing at least, loading in a massive json file into memory takes a very long time 10+ seconds-} from "./utils.ts";import { generatePageMetadata } from "./groq.ts";import { searchPages, getActiveEmbeddingsFunction, type Page } from "./search/index.ts";import { answerQuestion, answerStrategy } from "./answer/index.ts";import { urls } from "./urls.ts"; content?: string; title?: string; embeddings?: number[] | null; metadata?: any; frontmatter?: any; content?: string; title?: string; embeddings?: number[] | null; metadata?: any; frontmatter?: any; cacheMap[result.url] = { title: result.title, embeddings: result.embeddings || null, metadata: result.metadata || null, }; const frontmatter = cached.frontmatter; // On Val Town, SQLite cache already has everything we need // Content is available, embeddings are available - no need to fetch! return { path, content: cached.content, // Already cached in SQLite, no fetch needed! title: frontmatter?.title || path, embeddings: cached.embeddings, // Already cached in SQLite metadata: cached.metadata, }; if (jsonCache && jsonCache[url]) { const jsonData = jsonCache[url]; // JSON cache has embeddings, title, metadata from recalculate // We skip fetching content to speed up search - content is only for snippets // NO NETWORK CALLS - everything is from the JSON cache! if (jsonData.embeddings && Array.isArray(jsonData.embeddings) && jsonData.embeddings.length > 0) { // Use cached data - NO network fetch! This is why it's fast (was 6.5s, now 0.16ms) return { content: '', // Skipped - only needed for snippets, can fetch lazily if needed title: jsonData.title || path, embeddings: jsonData.embeddings, // From JSON cache (from recalculate) metadata: jsonData.metadata || null, }; } else { // Missing embeddings - this shouldn't happen if recalculate ran successfully uncachedUrls.push(url); } } // If not cached anywhere OR cached data has no embeddings, skip it // (search requires embeddings to work, and we NEVER fetch live data - that's the point!) return null; }) } // Generate embeddings using active strategy's embeddings function const generateEmbeddings = getActiveEmbeddingsFunction(); const embeddings = await generateEmbeddings(pageData.content); // Cache it with metadata, hash, and embeddings - track errors const cacheError = await setCache(url, { content: pageData.content, metadata, contentHash, embeddings, }); if (cacheError) { console.warn(`Completed ${path} (metadata: ${metadata ? 'yes' : 'no'}, embeddings: ${embeddings ? 'yes' : 'no'}, cache error: ${cacheError})`); } else { console.log(`Completed ${path} (metadata: ${metadata ? 'yes' : 'no'}, embeddings: ${embeddings ? 'yes' : 'no'}, cached: yes)`); } } // Content is now included in latest.json from recalculate console.log(`\n✅ Content loaded from JSON (${cachedData.results?.length || 0} pages with embeddings and content)\n`); // Load test questions// Hugging Face Inference Client + Qwen3-Embedding-8B + Cosine Similarity Strategy// Uses @huggingface/inference SDK for embeddings via API// Model: Qwen/Qwen3-Embedding-8B (8B params, up to 4096 dimensions, 32k context) if (!HF_TOKEN) { throw new Error("HF_TOKEN, HF_API_KEY, or HUGGINGFACE_API_KEY not found - embeddings disabled"); } }// Generate embeddings using Hugging Face Inference Clientexport const generateEmbeddings = async (content: string): Promise<number[] | null> => { try { const client = await getInferenceClient(); // The API typically returns a nested array or flat array if (Array.isArray(output)) { // Handle nested array format [[...embeddings...]] if (Array.isArray(output[0])) { return output[0]; // Return first (and typically only) embedding vector } // Handle flat array format [...embeddings...] return output; } // Handle object format with embeddings property if (output && typeof output === 'object') { if (output.embeddings && Array.isArray(output.embeddings)) { return Array.isArray(output.embeddings[0]) ? output.embeddings[0] : output.embeddings; } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error("HF Inference Client embeddings failed:", errorMessage); return null; } // Generate query embedding (HF Inference API call) const queryEmbedStart = performance.now(); const queryEmbedding = await generateEmbeddings(query); if (enableTiming) { timings.queryEmbedding = performance.now() - queryEmbedStart; for (const page of pages) { const pageEmbedding = page.embeddings; if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) { continue; // Skip pages without embeddings or wrong dimension } // Cloudflare Workers AI BGE-Large Strategy// Uses Cloudflare's @cf/baai/bge-large-en-v1.5 model for embeddings with local cosine similarityimport type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts";}export const generateEmbeddings = async (content: string): Promise<number[] | null> => { try { const { accountId, apiToken } = getCloudflareConfig(); const responseData = await response.json(); // Cloudflare wraps the response in: { result: { shape: [1, 1024], data: [[...embeddings...]] }, success: true, errors: [], messages: [] } if (responseData.result && responseData.result.data && Array.isArray(responseData.result.data) && responseData.result.data.length > 0) { const embedding = responseData.result.data[0]; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error("Cloudflare embeddings failed:", errorMessage); if (error instanceof Error && error.stack) { console.error("Stack trace:", error.stack); // Generate query embedding const queryEmbedStart = performance.now(); const queryEmbedding = await generateEmbeddings(query); if (enableTiming) { timings.queryEmbedding = performance.now() - queryEmbedStart; for (const page of pages) { const pageEmbedding = page.embeddings; if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) { continue;Users
No users found
Embedding Vals in other sites. Copy page Copy page. Copy this page as Markdown for LLMs. View as Markdown View this page as plain text. Open in ChatGPT Ask questions
Register a new Slash Command. Section titled “Step 5: Register a new Slash Command” The embedded code below should have your name in the top-left corner. If you see anonymous,