Unlisted
HTTP (deprecated)
janpaul123-metaevalluator.web.val.run
July 30, 2024
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
/** @jsxImportSource https://esm.sh/react */
// This val creates a matrix of evaluations for different LLM models and context window sizes.
// It uses React with renderToString to generate the HTML on the server side.
// The client-side JavaScript handles the dynamic evaluation process, updating the table in real-time.
// Models are now run in parallel for each context window size.
// Results are stored in a structured object and attached to the window for easy access.
// Each cell now shows results from individual runs.
// A start button has been added to initiate the evaluation process.
import React from "npm:react";
import { renderToString } from "npm:react-dom/server";
const models = [
"gpt-4o",
"gpt-4o-mini",
"claude-3-5-sonnet-20240620",
"ft:gpt-4o-mini-2024-07-18:val-town::9oyLBIYw",
];
const contextWindowSizes = [200, 50, 10];
const runs = 3;
const App = () => (
<html lang="en">
<head>
<meta charSet="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>LLM Evaluation Matrix</title>
<style>
{`
table { border-collapse: collapse; }
th, td { border: 1px solid black; padding: 8px; text-align: center; }
.running { background-color: #ffff99; }
.queued { background-color: #e6e6e6; }
#evaluationFrames { display: flex; flex-wrap: wrap; gap: 10px; margin-top: 20px; }
.evalFrame { width: 48%; height: 300px; border: 1px solid #ccc; }
#startButton { margin: 20px 0; padding: 10px 20px; font-size: 16px; }
`}
</style>
</head>
<body>
<h1>LLM Evaluation Matrix</h1>
<button id="startButton">Start Evaluation</button>
<table id="resultTable">
<thead>
<tr>
<th>Model / Context Window Size</th>
{contextWindowSizes.map(size => <th key={size}>{size}</th>)}
</tr>
</thead>
<tbody>
{models.map(model => (
<tr key={model}>
<th>{model}</th>
{contextWindowSizes.map(size => <td key={size} className="queued">Queued</td>)}
</tr>
))}
</tbody>
</table>
<div id="status"></div>
<div id="evaluationFrames"></div>
<script
dangerouslySetInnerHTML={{
__html: `
const models = ${JSON.stringify(models)};
const contextWindowSizes = ${JSON.stringify(contextWindowSizes)};
const runs = ${runs};
let currentSize = 0;
let currentRun = 0;
window.results = {};
models.forEach(model => {
window.results[model] = {};
contextWindowSizes.forEach(size => {
window.results[model][size] = new Array(runs).fill(null);
});
});
function updateStatus() {
document.getElementById('status').textContent =
\`Running context window size \${contextWindowSizes[currentSize]} (Run \${currentRun + 1}/\${runs})\`;
}
function updateTable(model, size, score, status) {
const table = document.getElementById('resultTable');
const row = Array.from(table.rows).find(row => row.cells[0].textContent === model);
const cell = Array.from(row.cells).find((_, index) => table.rows[0].cells[index].textContent == size);
if (status === 'running') {
cell.className = 'running';
cell.textContent = 'Running...';
} else if (status === 'done') {
cell.className = '';
window.results[model][size][currentRun] = score;
cell.innerHTML = window.results[model][size].map(s => s !== null ? s.toFixed(2) : '-').join('<br>');
}
}
async function runEvaluation() {
if (currentRun >= runs) {
Val Town is a social website to write and deploy JavaScript.
Build APIs and schedule functions from your browser.
Comments
Nobody has commented on this val yet: be the first!
July 30, 2024