Search

174 results found for embeddings (3560ms)

Code
165

import { generateSnippet, cosineSimilarity } from "./utils.ts";
// OpenAI embeddings function
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY");
const OPENAI_API_URL = "https://api.openai.com/v1/embeddings";
if (!OPENAI_API_KEY) {
console.warn("OPENAI_API_KEY not found - embeddings disabled");
return null;
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.error("OpenAI embeddings failed:", errorMessage);
return null;
}
};
// No caching for query embeddings - we want to benchmark actual API performance
export const searchStrategy: SearchStrategy = {
name: "openai-cosine",
description: "Semantic search using OpenAI embeddings with direct cosine similarity (fastest f
search: async (query: string, pages: Page[], options: SearchOptions = {}): Promise<SearchResul
const limit = options.limit || 10;
// Generate query embedding (no caching - benchmarking actual API performance)
const queryEmbedStart = performance.now();
const queryEmbedding = await generateEmbeddings(query);
if (enableTiming) {
timings.queryEmbedding = performance.now() - queryEmbedStart;
for (const page of pages) {
const pageEmbedding = page.embeddings;
if (!pageEmbedding || pageEmbedding.length !== queryEmbedding.length) {
continue; // Skip pages without embeddings or wrong dimension
}
// Initialize cache table only in Val Town (bypass SQLite when running locally)
const CACHE_TABLE = "groq_docs_cache_v3"; // Updated table with hash and embeddings
if (IS_VALTOWN) {
try {
metadata TEXT,
contentHash TEXT,
embeddings TEXT,
cachedAt INTEGER NOT NULL
)`);
metadata: any | null;
contentHash: string | null;
embeddings: number[] | null;
} | null> => {
if (!IS_VALTOWN) {
try {
const result = await sqlite.execute({
Count, tokenCount, frontmatter, metadata, contentHash, embeddings FROM ${CACHE_TABLE} WHERE url
args: [url]
});
metadata: rowObj.metadata ? JSON.parse(rowObj.metadata as string) : null,
contentHash: rowObj.contentHash as string | null,
embeddings: rowObj.embeddings ? JSON.parse(rowObj.embeddings as string) : null,
};
}
};
rontmatter: any; metadata?: any; contentHash?: string; embeddings?: number[] | null }): Promise<
if (!IS_VALTOWN) {
return "Cache unavailable - running locally";
try {
await sqlite.execute({
Count, tokenCount, frontmatter, metadata, contentHash, embeddings, cachedAt) VALUES (?, ?, ?, ?,
args: [
url,
data.metadata ? JSON.stringify(data.metadata) : null,
data.contentHash || null,
data.embeddings ? JSON.stringify(data.embeddings) : null,
Date.now(),
]
// Embeddings and Search utilities
// TODO: Replace fake implementations with actual embeddings API and vector search
// Generate embeddings for content (fake default implementation)
export const generateEmbeddings = async (content: string): Promise<number[] | null> => {
// TODO: Replace with actual embeddings API when available
// For now, return a fake embedding vector
console.debug("Generating fake embeddings (replace with actual API when available)");
// Generate a fake embedding of fixed size (e.g., 384 dimensions)
// Using a simple hash-based approach to create deterministic fake embeddings
const fakeEmbedding: number[] = [];
const hash = content.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0);
// Search pages by query (fake implementation using keyword matching)
// TODO: Replace with actual vector similarity search using embeddings
export const searchPages = async (
query: string,
content: string;
title?: string;
embeddings?: number[] | null;
metadata?: any;
}>,
const minScore = options.minScore || 0;
// TODO: Use embeddings for semantic search
// For now, use simple keyword matching
};
// Vector similarity search (for future use with embeddings)
export const vectorSearch = async (
queryEmbedding: number[],
url: string;
title?: string;
embeddings: number[] | null;
}>,
options: {
for (const page of pages) {
if (!page.embeddings) {
continue;
}
const similarity = cosineSimilarity(queryEmbedding, page.embeddings);
if (similarity >= minSimilarity) {
"slug": "val-vibes",
"link": "/blog/val-vibes",
"description": "How to build semantic search with embeddings for Val Town within Val Town it
"pubDate": "Tue, 18 Jun 2024 00:00:00 GMT",
"author": "JP Posma",
"slug": "val-vibes",
"link": "/blog/val-vibes",
"description": "How to build semantic search with embeddings for Val Town within Val Town it
"pubDate": "Tue, 18 Jun 2024 00:00:00 GMT",
"author": "JP Posma",
"slug": "val-vibes",
"link": "/blog/val-vibes",
"description": "How to build semantic search with embeddings for Val Town within Val Town it
"pubDate": "Tue, 18 Jun 2024 00:00:00 GMT",
"author": "JP Posma",
"slug": "val-vibes",
"link": "/blog/val-vibes",
"description": "How to build semantic search with embeddings for Val Town within Val Town it
"pubDate": "Tue, 18 Jun 2024 00:00:00 GMT",
"author": "JP Posma",
- `imageRecognition.labels`: Visual elements detected (people, objects, logos,
etc.)
- `vectors`: Text embeddings for semantic similarity (using Basilica method)
- **Content metadata fields** (may not yet be generally populated):
- `description`: Manual content descriptions
- `imageRecognition.labels`: Visual elements detected (people, objects, logos,
etc.)
- `vectors`: Text embeddings for semantic similarity (using Basilica method)
- **Content metadata fields** (may not yet be generally populated):
- `description`: Manual content descriptions
- `imageRecognition.labels`: Visual elements detected (people, objects, logos,
etc.)
- `vectors`: Text embeddings for semantic similarity (using Basilica method)
- **Content metadata fields** (may not yet be generally populated):
- `description`: Manual content descriptions
tmcw
surprisingEmbeddings
Visualizing embedding distances
Public
maxm
emojiVectorEmbeddings
 
Public
janpaul123
blogPostEmbeddingsDimensionalityReduction
 
Public
janpaul123
compareEmbeddings
 
Public
yawnxyz
embeddingsSearchExample
 
Public

Users

No users found
No docs found