Search
Code127
import blogPostEmbeddingsDimensionalityReduction from "https://esm.town/v/janpaul123/blogPostEmbeddingsDimensionalityReduction";export async function semanticSearchBlogPostPlot() { { parseHTML: p }, ) => p(`<a>`)); const points = await blogPostEmbeddingsDimensionalityReduction(); const chart = Plot.plot({ document,
import OpenAI from "npm:openai";export default async function blogPostEmbeddingsDimensionalityReduction() { const points = [ "animal that barks", const openai = new OpenAI(); async function getEmbedding(str) { return (await openai.embeddings.create({ model: "text-embedding-3-large", input: str, })).data[0].embedding; } let embeddings = await blob.getJSON("blogPostEmbeddings"); if (!embeddings) { embeddings = await Promise.all(points.map((point) => getEmbedding(point))); await blob.setJSON("blogPostEmbeddings", embeddings); } const matrix = druid.Matrix.from(embeddings); const dr = new druid.UMAP(matrix, { n_neighbors: 2,}console.log(await blogPostEmbeddingsDimensionalityReduction());
Migrated from folder: semanticSearchBlogPost/blogPostEmbeddingsDimensionalityReduction
const cache = {};async function getEmbedding(str) { cache[str] = cache[str] || (await openai.embeddings.create({ model: "text-embedding-3-large", input: str,
) as any;// const allValsBlobEmbeddingsMeta = (await blob.getJSON(`allValsBlob${dimensions}EmbeddingsMeta`)) ?? {};const allValsBlobEmbeddingsMeta = {};const existingEmbeddingsIds = new Set(Object.keys(allValsBlobEmbeddingsMeta));function idForVal(val: any): string {for (const val of allVals) { const id = idForVal(val); if (!existingEmbeddingsIds.has(id)) { currentBatch.push(val); }let nextDataIndex = Math.max( 0, ...Object.values(allValsBlobEmbeddingsMeta).map((item: any) => item.batchDataIndex + 1),);for (const newValsBatch of newValsBatches) { const batchDataIndex = nextDataIndex; const embeddingsBatch = new Float32Array(dimensions * newValsBatch.length); await Promise.all([...Array(newValsBatch.length).keys()].map(async (valIndex) => { const val = newValsBatch[valIndex]; const code = getValCode(val); const embedding = await openai.embeddings.create({ model: "text-embedding-3-small", input: truncateMessage(code, "text-embedding-3-small"), const id = idForVal(val); embeddingsBatch.set(embeddingBinary, dimensions * valIndex); allValsBlobEmbeddingsMeta[id] = { batchDataIndex, valIndex }; })); const embeddingsBatchBlobName = `allValsBlob${dimensions}EmbeddingsData_${batchDataIndex}`; await blob.set(embeddingsBatchBlobName, embeddingsBatch.buffer); await blob.setJSON(`allValsBlob${dimensions}EmbeddingsMeta`, allValsBlobEmbeddingsMeta); console.log( `Saved batch to ${embeddingsBatchBlobName} with ${newValsBatch.length} records (${embeddingsBatch.byteLength} bytes) ${ batchDataIndex + 1 }/${newValsBatches.length}`,}console.log(`Finished, we have indexed ${Object.keys(allValsBlobEmbeddingsMeta).length} records`);
export default async function semanticSearchPublicVals(query) { const client = new Client(Deno.env.get("NEON_URL_VALSEMBEDDINGS")); await client.connect(); const openai = new OpenAI(); const queryEmbedding = (await openai.embeddings.create({ model: "text-embedding-3-small", input: query, const embeddedBinaryString = `[${queryEmbedding.join(",")}]`; const result = await client .queryObject`SELECT id, embedding <=> ${embeddedBinaryString} AS distance FROM vals_embeddings ORDER BY embedding <=> ${embeddedBinaryString} LIMIT 50`; return result.rows.map((row) => {
import { truncateMessage } from "npm:openai-tokens";// CREATE TABLE vals_embeddings (id TEXT PRIMARY KEY, embedding VECTOR(1536));export default async function() { const dimensions = 1536; const client = new Client(Deno.env.get("NEON_URL_VALSEMBEDDINGS")); await client.connect(); ) as any; const existingEmbeddingsIds = new Set( (await client.queryObject`SELECT id FROM vals_embeddings`).rows.map(row => row.id), ); for (const val of allVals) { const id = idForVal(val); if (!existingEmbeddingsIds.has(id)) { currentBatch.push(val); } const code = getValCode(val); const embedding = await openai.embeddings.create({ model: "text-embedding-3-small", input: truncateMessage(code, "text-embedding-3-small"), const embeddedBinaryString = `[${embeddingBinary.join(",")}]`; const result = await client .queryObject`INSERT INTO vals_embeddings (id, embedding) VALUES (${id}, ${embeddedBinaryString})`; console.log(`Processed ${id}..`); }));
Migrated from folder: semanticSearchBlogPost/compareEmbeddings
export const getBlurbFromUrl = async (url: string, { getSummary=true, getSummaryPrompt, getTags=true, getTagsPrompt, getEmbeddings} = {}) => { const content = await getContentFromUrl(url); } let summary, tags, embeddings if(getSummary) { summary = await getSummaryFn(getSummaryPrompt, content); tags = await getTagsFn(getTagsPrompt, content); } if(getEmbeddings) { embeddings = await getEmbeddingsFn(content); } summary: summary.text, tags: tags.text, embeddings }};}export const getEmbeddingsFn = async (content) => { let result = await ai({ provider: "openai", // const body = await c.req.parseBody() || await c.req.json(); const body = await c.req.json(); const { url, getTags, getTagsPrompt, getSummary, getSummaryPrompt, getEmbeddings } = body; const content = await getContentFromUrl(url); } let summary, tags, embeddings if(getSummary) { summary = await getSummaryFn(getSummaryPrompt, content); tags = await getTagsFn(getTagsPrompt, content); } if(getEmbeddings) { embeddings = await getEmbeddingsFn(content); } return c.json({ summary: summary.text, tags: tags.text, content, embeddings, });});