Search
Code168
import { generateSnippet } from "./utils.ts";// Fake embeddings function for placeholder (not actually used, but kept for interface consistency)export const generateEmbeddings = async (content: string): Promise<number[] | null> => { // Not used in placeholder strategy, but kept for consistency // Returns fake embeddings for caching purposes console.debug("Generating fake embeddings for placeholder strategy"); const fakeEmbedding: number[] = []; content: string; title?: string; embeddings?: number[] | null; metadata?: any;};// JigsawStack + Orama Strategy: Semantic search using JigsawStack embeddings and Oramaimport type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts";import { create, insertMultiple, search } from "npm:@orama/orama@latest";// JigsawStack embeddings function (v2 API)export const generateEmbeddings = async (content: string): Promise<number[] | null> => { const JIGSAWSTACK_API_KEY = Deno.env.get("JIGSAWSTACK_API_KEY"); const JIGSAWSTACK_API_URL = "https://api.jigsawstack.com/v2/embedding"; if (!JIGSAWSTACK_API_KEY) { console.warn("JIGSAWSTACK_API_KEY not found - embeddings disabled"); return null; } const data = await response.json(); // JigsawStack v2 returns: { success: true, embeddings: [[...]], chunks: [...], _usage: {...} } // embeddings is an array of arrays, we want the first embedding vector if (data.success && data.embeddings && Array.isArray(data.embeddings) && data.embeddings.length > 0) { return data.embeddings[0]; // Return the first embedding vector } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error("JigsawStack embeddings failed:", errorMessage); return null; }export const searchStrategy: SearchStrategy = { name: "jigsawstack-orama", description: "Semantic search using JigsawStack embeddings with Orama vector search", search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResult[]> => { const limit = options.limit || 10; // Generate query embedding (this is the main bottleneck - external API call) const queryEmbedStart = performance.now(); const queryEmbedding = await generateEmbeddings(query); if (enableTiming) timings.queryEmbedding = performance.now() - queryEmbedStart; } // Determine embedding dimension from query (or first page with embeddings) const embeddingDimension = queryEmbedding.length; if (embeddingDimension === 0) { if (enableTiming) timings.dbCreate = performance.now() - dbCreateStart; // Prepare documents with embeddings for insertion const documents: Array<{ path: string; }> = []; // Collect pages with embeddings const docPrepStart = performance.now(); for (const page of pages) { // Use cached embeddings if available, otherwise generate let pageEmbedding = page.embeddings; if (!pageEmbedding) { // Generate embedding if not cached (should be cached from recalculate, but handle missing) pageEmbedding = await generateEmbeddings(page.content); if (!pageEmbedding || pageEmbedding.length !== embeddingDimension) { continue; // Skip pages without embeddings or wrong dimension } }// OpenAI + Orama Strategy: Semantic search using OpenAI embeddings and Orama// Faster than JigsawStack (~100-200ms vs ~550ms for query embeddings)import type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts";import { create, insertMultiple, search } from "npm:@orama/orama@latest";// OpenAI embeddings functionexport const generateEmbeddings = async (content: string): Promise<number[] | null> => { const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY"); const OPENAI_API_URL = "https://api.openai.com/v1/embeddings"; if (!OPENAI_API_KEY) { console.warn("OPENAI_API_KEY not found - embeddings disabled"); return null; } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error("OpenAI embeddings failed:", errorMessage); return null; }};// No caching for query embeddings - we want to benchmark actual API performanceexport const searchStrategy: SearchStrategy = { name: "openai-orama", description: "Semantic search using OpenAI embeddings with Orama vector search (faster than JigsawStack)", search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResult[]> => { const limit = options.limit || 10; // Generate query embedding (no caching - benchmarking actual API performance) const queryEmbedStart = performance.now(); const queryEmbedding = await generateEmbeddings(query); if (enableTiming) { timings.queryEmbedding = performance.now() - queryEmbedStart; } // Determine embedding dimension from query (or first page with embeddings) const embeddingDimension = queryEmbedding.length; if (embeddingDimension === 0) { if (enableTiming) timings.dbCreate = performance.now() - dbCreateStart; // Prepare documents with embeddings for insertion const documents: Array<{ path: string; }> = []; // Collect pages with embeddings const docPrepStart = performance.now(); for (const page of pages) { // Use cached embeddings if available, otherwise generate let pageEmbedding = page.embeddings; if (!pageEmbedding) { // Generate embedding if not cached (should be cached from recalculate, but handle missing) pageEmbedding = await generateEmbeddings(page.content); if (!pageEmbedding || pageEmbedding.length !== embeddingDimension) { continue; // Skip pages without embeddings or wrong dimension } }import { generateSnippet, cosineSimilarity } from "./utils.ts";// OpenAI embeddings functionexport const generateEmbeddings = async (content: string): Promise<number[] | null> => { const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY"); const OPENAI_API_URL = "https://api.openai.com/v1/embeddings"; if (!OPENAI_API_KEY) { console.warn("OPENAI_API_KEY not found - embeddings disabled"); return null; } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error("OpenAI embeddings failed:", errorMessage); return null; }};// No caching for query embeddings - we want to benchmark actual API performanceexport const searchStrategy: SearchStrategy = { name: "openai-cosine", description: "Semantic search using OpenAI embeddings with direct cosine similarity (fastest for small datasets)", search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResult[]> => { const limit = options.limit || 10; // Generate query embedding (no caching - benchmarking actual API performance) const queryEmbedStart = performance.now(); const queryEmbedding = await generateEmbeddings(query); if (enableTiming) { timings.queryEmbedding = performance.now() - queryEmbedStart; for (const page of pages) { const pageEmbedding = page.embeddings; if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) { continue; // Skip pages without embeddings or wrong dimension } // Initialize cache table only in Val Town (bypass SQLite when running locally)const CACHE_TABLE = "groq_docs_cache_v3"; // Updated table with hash and embeddingsif (IS_VALTOWN) { try { metadata TEXT, contentHash TEXT, embeddings TEXT, cachedAt INTEGER NOT NULL )`); metadata: any | null; contentHash: string | null; embeddings: number[] | null;} | null> => { if (!IS_VALTOWN) { try { const result = await sqlite.execute({ sql: `SELECT content, charCount, tokenCount, frontmatter, metadata, contentHash, embeddings FROM ${CACHE_TABLE} WHERE url = ?`, args: [url] }); metadata: rowObj.metadata ? JSON.parse(rowObj.metadata as string) : null, contentHash: rowObj.contentHash as string | null, embeddings: rowObj.embeddings ? JSON.parse(rowObj.embeddings as string) : null, }; }};export const setCache = async (url: string, data: { content: string; charCount: number; tokenCount: number | null; frontmatter: any; metadata?: any; contentHash?: string; embeddings?: number[] | null }): Promise<string | null> => { if (!IS_VALTOWN) { return "Cache unavailable - running locally"; try { await sqlite.execute({ sql: `INSERT OR REPLACE INTO ${CACHE_TABLE} (url, content, charCount, tokenCount, frontmatter, metadata, contentHash, embeddings, cachedAt) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, args: [ url, data.metadata ? JSON.stringify(data.metadata) : null, data.contentHash || null, data.embeddings ? JSON.stringify(data.embeddings) : null, Date.now(), ]// Embeddings and Search utilities// TODO: Replace fake implementations with actual embeddings API and vector search// Generate embeddings for content (fake default implementation)export const generateEmbeddings = async (content: string): Promise<number[] | null> => { // TODO: Replace with actual embeddings API when available // For now, return a fake embedding vector console.debug("Generating fake embeddings (replace with actual API when available)"); // Generate a fake embedding of fixed size (e.g., 384 dimensions) // Using a simple hash-based approach to create deterministic fake embeddings const fakeEmbedding: number[] = []; const hash = content.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0);// Search pages by query (fake implementation using keyword matching)// TODO: Replace with actual vector similarity search using embeddingsexport const searchPages = async ( query: string, content: string; title?: string; embeddings?: number[] | null; metadata?: any; }>, const minScore = options.minScore || 0; // TODO: Use embeddings for semantic search // For now, use simple keyword matching };// Vector similarity search (for future use with embeddings)export const vectorSearch = async ( queryEmbedding: number[], url: string; title?: string; embeddings: number[] | null; }>, options: { for (const page of pages) { if (!page.embeddings) { continue; } const similarity = cosineSimilarity(queryEmbedding, page.embeddings); if (similarity >= minSimilarity) { "slug": "val-vibes", "link": "/blog/val-vibes", "description": "How to build semantic search with embeddings for Val Town within Val Town itself", "pubDate": "Tue, 18 Jun 2024 00:00:00 GMT", "author": "JP Posma", "slug": "val-vibes", "link": "/blog/val-vibes", "description": "How to build semantic search with embeddings for Val Town within Val Town itself", "pubDate": "Tue, 18 Jun 2024 00:00:00 GMT", "author": "JP Posma", "slug": "val-vibes", "link": "/blog/val-vibes", "description": "How to build semantic search with embeddings for Val Town within Val Town itself", "pubDate": "Tue, 18 Jun 2024 00:00:00 GMT", "author": "JP Posma",Users
No users found
No docs found