Search

178 results found for embeddings (5413ms)

Code
168

import { generateSnippet } from "./utils.ts";
// Fake embeddings function for placeholder (not actually used, but kept for interface consisten
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
// Not used in placeholder strategy, but kept for consistency
// Returns fake embeddings for caching purposes
console.debug("Generating fake embeddings for placeholder strategy");
const fakeEmbedding: number[] = [];
content: string;
title?: string;
embeddings?: number[] | null;
metadata?: any;
};
// JigsawStack + Orama Strategy: Semantic search using JigsawStack embeddings and Orama
import type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts";
import { create, insertMultiple, search } from "npm:@orama/orama@latest";
// JigsawStack embeddings function (v2 API)
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
const JIGSAWSTACK_API_KEY = Deno.env.get("JIGSAWSTACK_API_KEY");
const JIGSAWSTACK_API_URL = "https://api.jigsawstack.com/v2/embedding";
if (!JIGSAWSTACK_API_KEY) {
console.warn("JIGSAWSTACK_API_KEY not found - embeddings disabled");
return null;
}
const data = await response.json();
// JigsawStack v2 returns: { success: true, embeddings: [[...]], chunks: [...], _usage: {...
// embeddings is an array of arrays, we want the first embedding vector
if (data.success && data.embeddings && Array.isArray(data.embeddings) && data.embeddings.len
return data.embeddings[0]; // Return the first embedding vector
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.error("JigsawStack embeddings failed:", errorMessage);
return null;
}
export const searchStrategy: SearchStrategy = {
name: "jigsawstack-orama",
description: "Semantic search using JigsawStack embeddings with Orama vector search",
search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResul
const limit = options.limit || 10;
// Generate query embedding (this is the main bottleneck - external API call)
const queryEmbedStart = performance.now();
const queryEmbedding = await generateEmbeddings(query);
if (enableTiming) timings.queryEmbedding = performance.now() - queryEmbedStart;
}
// Determine embedding dimension from query (or first page with embeddings)
const embeddingDimension = queryEmbedding.length;
if (embeddingDimension === 0) {
if (enableTiming) timings.dbCreate = performance.now() - dbCreateStart;
// Prepare documents with embeddings for insertion
const documents: Array<{
path: string;
}> = [];
// Collect pages with embeddings
const docPrepStart = performance.now();
for (const page of pages) {
// Use cached embeddings if available, otherwise generate
let pageEmbedding = page.embeddings;
if (!pageEmbedding) {
// Generate embedding if not cached (should be cached from recalculate, but handle missi
pageEmbedding = await generateEmbeddings(page.content);
if (!pageEmbedding || pageEmbedding.length !== embeddingDimension) {
continue; // Skip pages without embeddings or wrong dimension
}
}
// OpenAI + Orama Strategy: Semantic search using OpenAI embeddings and Orama
// Faster than JigsawStack (~100-200ms vs ~550ms for query embeddings)
import type { SearchStrategy, SearchResult, Page, SearchOptions } from "./types.ts";
import { create, insertMultiple, search } from "npm:@orama/orama@latest";
// OpenAI embeddings function
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY");
const OPENAI_API_URL = "https://api.openai.com/v1/embeddings";
if (!OPENAI_API_KEY) {
console.warn("OPENAI_API_KEY not found - embeddings disabled");
return null;
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.error("OpenAI embeddings failed:", errorMessage);
return null;
}
};
// No caching for query embeddings - we want to benchmark actual API performance
export const searchStrategy: SearchStrategy = {
name: "openai-orama",
description: "Semantic search using OpenAI embeddings with Orama vector search (faster than Ji
search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResul
const limit = options.limit || 10;
// Generate query embedding (no caching - benchmarking actual API performance)
const queryEmbedStart = performance.now();
const queryEmbedding = await generateEmbeddings(query);
if (enableTiming) {
timings.queryEmbedding = performance.now() - queryEmbedStart;
}
// Determine embedding dimension from query (or first page with embeddings)
const embeddingDimension = queryEmbedding.length;
if (embeddingDimension === 0) {
if (enableTiming) timings.dbCreate = performance.now() - dbCreateStart;
// Prepare documents with embeddings for insertion
const documents: Array<{
path: string;
}> = [];
// Collect pages with embeddings
const docPrepStart = performance.now();
for (const page of pages) {
// Use cached embeddings if available, otherwise generate
let pageEmbedding = page.embeddings;
if (!pageEmbedding) {
// Generate embedding if not cached (should be cached from recalculate, but handle missi
pageEmbedding = await generateEmbeddings(page.content);
if (!pageEmbedding || pageEmbedding.length !== embeddingDimension) {
continue; // Skip pages without embeddings or wrong dimension
}
}
import { generateSnippet, cosineSimilarity } from "./utils.ts";
// OpenAI embeddings function
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY");
const OPENAI_API_URL = "https://api.openai.com/v1/embeddings";
if (!OPENAI_API_KEY) {
console.warn("OPENAI_API_KEY not found - embeddings disabled");
return null;
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.error("OpenAI embeddings failed:", errorMessage);
return null;
}
};
// No caching for query embeddings - we want to benchmark actual API performance
export const searchStrategy: SearchStrategy = {
name: "openai-cosine",
description: "Semantic search using OpenAI embeddings with direct cosine similarity (fastest f
search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResul
const limit = options.limit || 10;
// Generate query embedding (no caching - benchmarking actual API performance)
const queryEmbedStart = performance.now();
const queryEmbedding = await generateEmbeddings(query);
if (enableTiming) {
timings.queryEmbedding = performance.now() - queryEmbedStart;
for (const page of pages) {
const pageEmbedding = page.embeddings;
if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) {
continue; // Skip pages without embeddings or wrong dimension
}
// Initialize cache table only in Val Town (bypass SQLite when running locally)
const CACHE_TABLE = "groq_docs_cache_v3"; // Updated table with hash and embeddings
if (IS_VALTOWN) {
try {
metadata TEXT,
contentHash TEXT,
embeddings TEXT,
cachedAt INTEGER NOT NULL
)`);
metadata: any | null;
contentHash: string | null;
embeddings: number[] | null;
} | null> => {
if (!IS_VALTOWN) {
try {
const result = await sqlite.execute({
Count, tokenCount, frontmatter, metadata, contentHash, embeddings FROM ${CACHE_TABLE} WHERE url
args: [url]
});
metadata: rowObj.metadata ? JSON.parse(rowObj.metadata as string) : null,
contentHash: rowObj.contentHash as string | null,
embeddings: rowObj.embeddings ? JSON.parse(rowObj.embeddings as string) : null,
};
}
};
rontmatter: any; metadata?: any; contentHash?: string; embeddings?: number[] | null }): Promise<
if (!IS_VALTOWN) {
return "Cache unavailable - running locally";
try {
await sqlite.execute({
Count, tokenCount, frontmatter, metadata, contentHash, embeddings, cachedAt) VALUES (?, ?, ?, ?,
args: [
url,
data.metadata ? JSON.stringify(data.metadata) : null,
data.contentHash || null,
data.embeddings ? JSON.stringify(data.embeddings) : null,
Date.now(),
]
// Embeddings and Search utilities
// TODO: Replace fake implementations with actual embeddings API and vector search
// Generate embeddings for content (fake default implementation)
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
// TODO: Replace with actual embeddings API when available
// For now, return a fake embedding vector
console.debug("Generating fake embeddings (replace with actual API when available)");
// Generate a fake embedding of fixed size (e.g., 384 dimensions)
// Using a simple hash-based approach to create deterministic fake embeddings
const fakeEmbedding: number[] = [];
const hash = content.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0);
// Search pages by query (fake implementation using keyword matching)
// TODO: Replace with actual vector similarity search using embeddings
export const searchPages = async (
query: string,
content: string;
title?: string;
embeddings?: number[] | null;
metadata?: any;
}>,
const minScore = options.minScore || 0;
// TODO: Use embeddings for semantic search
// For now, use simple keyword matching
};
// Vector similarity search (for future use with embeddings)
export const vectorSearch = async (
queryEmbedding: number[],
url: string;
title?: string;
embeddings: number[] | null;
}>,
options: {
for (const page of pages) {
if (!page.embeddings) {
continue;
}
const similarity = cosineSimilarity(queryEmbedding, page.embeddings);
if (similarity >= minSimilarity) {
"slug": "val-vibes",
"link": "/blog/val-vibes",
"description": "How to build semantic search with embeddings for Val Town within Val Town it
"pubDate": "Tue, 18 Jun 2024 00:00:00 GMT",
"author": "JP Posma",
"slug": "val-vibes",
"link": "/blog/val-vibes",
"description": "How to build semantic search with embeddings for Val Town within Val Town it
"pubDate": "Tue, 18 Jun 2024 00:00:00 GMT",
"author": "JP Posma",
"slug": "val-vibes",
"link": "/blog/val-vibes",
"description": "How to build semantic search with embeddings for Val Town within Val Town it
"pubDate": "Tue, 18 Jun 2024 00:00:00 GMT",
"author": "JP Posma",
peterqliu
PineconeIndex
Vector db's on Pinecone, with OpenAI embeddings
Public
tmcw
surprisingEmbeddings
Visualizing embedding distances
Public
maxm
emojiVectorEmbeddings
 
Public
janpaul123
blogPostEmbeddingsDimensionalityReduction
 
Public
janpaul123
compareEmbeddings
 
Public

Users

No users found
No docs found