Skip to content

Commit 8f6491d

Browse files
authored
Web UI for running evals (promptfoo#103)
1 parent 4bb5415 commit 8f6491d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+6579
-6011
lines changed

.prettierignore

+3
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
11
dist
22
venv
3+
.aider*
4+
src/web/nextui/out
5+
src/web/nextui/.next

package-lock.json

+161
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+9-4
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,14 @@
2525
"promptfoo": "dist/src/main.js"
2626
},
2727
"scripts": {
28+
"tsc": "tsc",
2829
"local": "ts-node --esm --files src/main.ts",
29-
"install:client": "cd src/web/client && npm install",
30+
"install:nextui": "cd src/web/nextui && npm install",
3031
"build:clean": "rm -rf dist",
31-
"build:client": "cd src/web/client && npm run build && cp -r dist/ ../../../dist/src/web/client",
32+
"build:nextui": "cd src/web/nextui && npm run build && cp -r out/ ../../../dist/src/web/nextui",
3233
"build:watch": "tsc --watch",
33-
"build": "tsc && cp src/*.html dist/src && npm run build:client && chmod +x dist/src/main.js",
34-
"prepare": "npm run install:client && npm run build:clean && npm run build",
34+
"build": "tsc && cp src/*.html dist/src && npm run build:nextui && chmod +x dist/src/main.js",
35+
"prepare": "npm run install:nextui && npm run build:clean && npm run build",
3536
"test": "jest",
3637
"test:watch": "jest --watch",
3738
"format": "prettier -w ."
@@ -41,6 +42,7 @@
4142
"@types/cache-manager": "^4.0.2",
4243
"@types/cache-manager-fs-hash": "^0.0.1",
4344
"@types/cli-progress": "^3.11.0",
45+
"@types/compression": "^1.7.2",
4446
"@types/cors": "^2.8.13",
4547
"@types/debounce": "^1.2.1",
4648
"@types/express": "^4.17.17",
@@ -51,6 +53,7 @@
5153
"@types/nunjucks": "^3.2.2",
5254
"@types/opener": "^1.4.0",
5355
"@types/semver": "^7.5.0",
56+
"@types/uuid": "^9.0.2",
5457
"babel-jest": "^29.5.0",
5558
"jest": "^29.5.0",
5659
"jest-watch-typeahead": "^2.2.2",
@@ -69,6 +72,7 @@
6972
"cli-progress": "^3.12.0",
7073
"cli-table3": "^0.6.3",
7174
"commander": "^10.0.1",
75+
"compression": "^1.7.4",
7276
"cors": "^2.8.5",
7377
"csv-parse": "^5.3.8",
7478
"csv-stringify": "^6.3.2",
@@ -84,6 +88,7 @@
8488
"semver": "^7.5.3",
8589
"socket.io": "^4.6.1",
8690
"tiny-invariant": "^1.3.1",
91+
"uuid": "^9.0.0",
8792
"winston": "^3.8.2"
8893
}
8994
}

src/evaluator.ts

+17-9
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,6 @@ class Evaluator {
385385
// Set up progress bar...
386386
let progressbar: SingleBar | undefined;
387387
if (options.showProgressBar) {
388-
const totalNumRuns = runEvalOptions.length;
389388
const cliProgress = await import('cli-progress');
390389
progressbar = new cliProgress.SingleBar(
391390
{
@@ -394,34 +393,40 @@ class Evaluator {
394393
},
395394
cliProgress.Presets.shades_classic,
396395
);
397-
progressbar.start(totalNumRuns, 0, {
396+
progressbar.start(runEvalOptions.length, 0, {
398397
provider: '',
399398
prompt: '',
400399
vars: '',
401400
});
402401
}
402+
if (options.progressCallback) {
403+
options.progressCallback(0, runEvalOptions.length);
404+
}
403405

404406
// Actually run the eval
405407
const results: EvaluateResult[] = [];
406408
await async.forEachOfLimit(
407409
runEvalOptions,
408410
options.maxConcurrency || DEFAULT_MAX_CONCURRENCY,
409-
async (options: RunEvalOptions, index: number | string) => {
410-
const row = await this.runEval(options);
411+
async (evalStep: RunEvalOptions, index: number | string) => {
412+
const row = await this.runEval(evalStep);
411413

412414
results.push(row);
413415

414416
if (progressbar) {
415417
progressbar.increment({
416-
provider: options.provider.id(),
417-
prompt: options.prompt.raw.slice(0, 10).replace(/\n/g, ' '),
418-
vars: Object.entries(options.test.vars || {})
418+
provider: evalStep.provider.id(),
419+
prompt: evalStep.prompt.raw.slice(0, 10).replace(/\n/g, ' '),
420+
vars: Object.entries(evalStep.test.vars || {})
419421
.map(([k, v]) => `${k}=${v}`)
420422
.join(' ')
421423
.slice(0, 10)
422424
.replace(/\n/g, ' '),
423425
});
424426
}
427+
if (options.progressCallback) {
428+
options.progressCallback(results.length, runEvalOptions.length);
429+
}
425430

426431
// Bookkeeping for table
427432
if (typeof index !== 'number') {
@@ -441,13 +446,13 @@ class Evaluator {
441446
resultText = row.response?.output || row.error || '';
442447
}
443448

444-
const { rowIndex, colIndex } = options;
449+
const { rowIndex, colIndex } = evalStep;
445450
if (!table.body[rowIndex]) {
446451
table.body[rowIndex] = {
447452
outputs: [],
448453
vars: table.head.vars
449454
.map((varName) => {
450-
const varValue = options.test.vars?.[varName] || '';
455+
const varValue = evalStep.test.vars?.[varName] || '';
451456
if (typeof varValue === 'string') {
452457
return varValue;
453458
}
@@ -475,6 +480,9 @@ class Evaluator {
475480
if (progressbar) {
476481
progressbar.stop();
477482
}
483+
if (options.progressCallback) {
484+
options.progressCallback(runEvalOptions.length, runEvalOptions.length);
485+
}
478486

479487
telemetry.record('eval_ran', {});
480488

0 commit comments

Comments
 (0)