Skip to content

Commit d8fc97d

Browse files
authored
Make thrown evaluator errors not interrupt dataset flow (#5017)
1 parent d6e25af commit d8fc97d

File tree

2 files changed

+70
-2
lines changed

2 files changed

+70
-2
lines changed

langchain/src/smith/runner_utils.ts

+4-2
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,7 @@ const applyEvaluators = async ({
541541
for (let i = 0; i < runs.length; i += 1) {
542542
const run = runs[i];
543543
const example = examples[i];
544-
const evaluatorResults = await Promise.all(
544+
const evaluatorResults = await Promise.allSettled(
545545
evaluators.map((evaluator) =>
546546
client.evaluateRun(run, evaluator, {
547547
referenceExample: example,
@@ -555,7 +555,9 @@ const applyEvaluators = async ({
555555
run?.end_time && run.start_time
556556
? run.end_time - run.start_time
557557
: undefined,
558-
feedback: evaluatorResults,
558+
feedback: evaluatorResults.map((evalResult) =>
559+
evalResult.status === "fulfilled" ? evalResult.value : evalResult.reason
560+
),
559561
run_id: run.id,
560562
};
561563
}

langchain/src/smith/tests/run_on_dataset.int.test.ts

+66
Original file line numberDiff line numberDiff line change
@@ -251,3 +251,69 @@ test(`Chat model dataset`, async () => {
251251
})
252252
);
253253
});
254+
255+
test("Thrown errors should not interrupt dataset run", async () => {
256+
async function ragPipeline(_: string): Promise<string> {
257+
throw new Error("I don't know, I am learning from aliens.");
258+
}
259+
260+
const examples = [
261+
[
262+
"When was the Apple Vision Pro released in the US?",
263+
"The Apple Vision Pro was released in the United States on February 2, 2024.",
264+
],
265+
[
266+
"What is LangChain?",
267+
"LangChain is an open-source framework for building applications using large language models.",
268+
],
269+
[
270+
"Who is the chairman of OpenAI?",
271+
"Bret Taylor is the chairman of the OpenAI",
272+
],
273+
];
274+
275+
const lsClient = new Client();
276+
const datasetName = "JS run on dataset integration test";
277+
let dataset: Dataset;
278+
try {
279+
dataset = await lsClient.readDataset({ datasetName });
280+
} catch (e) {
281+
dataset = await lsClient.createDataset(datasetName);
282+
await Promise.all(
283+
examples.map(async ([question, answer]) => {
284+
await lsClient.createExample(
285+
{ question },
286+
{ answer },
287+
{ datasetId: dataset.id }
288+
);
289+
})
290+
);
291+
}
292+
293+
// An illustrative custom evaluator example
294+
const dummy = async (_: DynamicRunEvaluatorParams) => {
295+
console.log("RUNNING EVAL");
296+
throw new Error("Expected error");
297+
};
298+
299+
const evaluation: RunEvalConfig = {
300+
// Custom evaluators can be user-defined RunEvaluator's
301+
// or a compatible function
302+
customEvaluators: [dummy],
303+
};
304+
305+
const wrappedRagPipeline = async ({
306+
question,
307+
}: {
308+
question: string;
309+
}): Promise<string> => {
310+
return ragPipeline(question);
311+
};
312+
313+
console.log(
314+
await runOnDataset(wrappedRagPipeline, datasetName, {
315+
evaluationConfig: evaluation,
316+
maxConcurrency: 1,
317+
})
318+
);
319+
});

0 commit comments

Comments
 (0)