Skip to content

Commit 96bb84a

Browse files
authored
[MLOB-2240] fix(openai, langchain, llmobs): OpenAI and LangChain instrumentation is ESM-compatible (#5267)
* add module register hooks * fix instrumentation * re-enable tests * file extension fix for openai * Update packages/datadog-plugin-openai/test/integration-test/client.spec.js * trigger ci * change openai instrumentation * fix for new openai version * try fix for --loader * name arguments * name arguments
1 parent 23abc09 commit 96bb84a

File tree

8 files changed

+136
-100
lines changed

8 files changed

+136
-100
lines changed

initialize.mjs

+8-4
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,12 @@ ${result.source}`
3535
const [NODE_MAJOR, NODE_MINOR] = process.versions.node.split('.').map(x => +x)
3636

3737
const brokenLoaders = NODE_MAJOR === 18 && NODE_MINOR === 0
38+
const iitmExclusions = [/langsmith/, /openai\/_shims/, /openai\/resources\/chat\/completions\/messages/]
3839

39-
export async function load (...args) {
40-
const loadHook = brokenLoaders ? args[args.length - 1] : origLoad
41-
return insertInit(await loadHook(...args))
40+
export async function load (url, context, nextLoad) {
41+
const iitmExclusionsMatch = iitmExclusions.some((exclusion) => exclusion.test(url))
42+
const loadHook = (brokenLoaders || iitmExclusionsMatch) ? nextLoad : origLoad
43+
return insertInit(await loadHook(url, context, nextLoad))
4244
}
4345

4446
export const resolve = brokenLoaders ? undefined : origResolve
@@ -53,6 +55,8 @@ if (isMainThread) {
5355
const require = Module.createRequire(import.meta.url)
5456
require('./init.js')
5557
if (Module.register) {
56-
Module.register('./loader-hook.mjs', import.meta.url)
58+
Module.register('./loader-hook.mjs', import.meta.url, {
59+
data: { exclude: iitmExclusions }
60+
})
5761
}
5862
}

packages/datadog-instrumentations/src/helpers/hooks.js

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ module.exports = {
1919
'@jest/test-sequencer': () => require('../jest'),
2020
'@jest/transform': () => require('../jest'),
2121
'@koa/router': () => require('../koa'),
22-
'@langchain/core': () => require('../langchain'),
23-
'@langchain/openai': () => require('../langchain'),
22+
'@langchain/core': { esmFirst: true, fn: () => require('../langchain') },
23+
'@langchain/openai': { esmFirst: true, fn: () => require('../langchain') },
2424
'@node-redis/client': () => require('../redis'),
2525
'@opensearch-project/opensearch': () => require('../opensearch'),
2626
'@opentelemetry/sdk-trace-node': () => require('../otel-sdk-trace'),
@@ -100,7 +100,7 @@ module.exports = {
100100
'node:vm': () => require('../vm'),
101101
nyc: () => require('../nyc'),
102102
oracledb: () => require('../oracledb'),
103-
openai: () => require('../openai'),
103+
openai: { esmFirst: true, fn: () => require('../openai') },
104104
paperplane: () => require('../paperplane'),
105105
passport: () => require('../passport'),
106106
'passport-http': () => require('../passport-http'),

packages/datadog-instrumentations/src/openai.js

+78-74
Original file line numberDiff line numberDiff line change
@@ -8,98 +8,98 @@ const ch = dc.tracingChannel('apm:openai:request')
88

99
const V4_PACKAGE_SHIMS = [
1010
{
11-
file: 'resources/chat/completions.js',
11+
file: 'resources/chat/completions',
1212
targetClass: 'Completions',
1313
baseResource: 'chat.completions',
1414
methods: ['create'],
1515
streamedResponse: true
1616
},
1717
{
18-
file: 'resources/completions.js',
18+
file: 'resources/completions',
1919
targetClass: 'Completions',
2020
baseResource: 'completions',
2121
methods: ['create'],
2222
streamedResponse: true
2323
},
2424
{
25-
file: 'resources/embeddings.js',
25+
file: 'resources/embeddings',
2626
targetClass: 'Embeddings',
2727
baseResource: 'embeddings',
2828
methods: ['create']
2929
},
3030
{
31-
file: 'resources/files.js',
31+
file: 'resources/files',
3232
targetClass: 'Files',
3333
baseResource: 'files',
3434
methods: ['create', 'del', 'list', 'retrieve']
3535
},
3636
{
37-
file: 'resources/files.js',
37+
file: 'resources/files',
3838
targetClass: 'Files',
3939
baseResource: 'files',
4040
methods: ['retrieveContent'],
4141
versions: ['>=4.0.0 <4.17.1']
4242
},
4343
{
44-
file: 'resources/files.js',
44+
file: 'resources/files',
4545
targetClass: 'Files',
4646
baseResource: 'files',
4747
methods: ['content'], // replaced `retrieveContent` in v4.17.1
4848
versions: ['>=4.17.1']
4949
},
5050
{
51-
file: 'resources/images.js',
51+
file: 'resources/images',
5252
targetClass: 'Images',
5353
baseResource: 'images',
5454
methods: ['createVariation', 'edit', 'generate']
5555
},
5656
{
57-
file: 'resources/fine-tuning/jobs/jobs.js',
57+
file: 'resources/fine-tuning/jobs/jobs',
5858
targetClass: 'Jobs',
5959
baseResource: 'fine_tuning.jobs',
6060
methods: ['cancel', 'create', 'list', 'listEvents', 'retrieve'],
6161
versions: ['>=4.34.0'] // file location changed in 4.34.0
6262
},
6363
{
64-
file: 'resources/fine-tuning/jobs.js',
64+
file: 'resources/fine-tuning/jobs',
6565
targetClass: 'Jobs',
6666
baseResource: 'fine_tuning.jobs',
6767
methods: ['cancel', 'create', 'list', 'listEvents', 'retrieve'],
6868
versions: ['>=4.1.0 <4.34.0']
6969
},
7070
{
71-
file: 'resources/fine-tunes.js', // deprecated after 4.1.0
71+
file: 'resources/fine-tunes', // deprecated after 4.1.0
7272
targetClass: 'FineTunes',
7373
baseResource: 'fine-tune',
7474
methods: ['cancel', 'create', 'list', 'listEvents', 'retrieve'],
7575
versions: ['>=4.0.0 <4.1.0']
7676
},
7777
{
78-
file: 'resources/models.js',
78+
file: 'resources/models',
7979
targetClass: 'Models',
8080
baseResource: 'models',
8181
methods: ['del', 'list', 'retrieve']
8282
},
8383
{
84-
file: 'resources/moderations.js',
84+
file: 'resources/moderations',
8585
targetClass: 'Moderations',
8686
baseResource: 'moderations',
8787
methods: ['create']
8888
},
8989
{
90-
file: 'resources/audio/transcriptions.js',
90+
file: 'resources/audio/transcriptions',
9191
targetClass: 'Transcriptions',
9292
baseResource: 'audio.transcriptions',
9393
methods: ['create']
9494
},
9595
{
96-
file: 'resources/audio/translations.js',
96+
file: 'resources/audio/translations',
9797
targetClass: 'Translations',
9898
baseResource: 'audio.translations',
9999
methods: ['create']
100100
},
101101
{
102-
file: 'resources/chat/completions/completions.js',
102+
file: 'resources/chat/completions/completions',
103103
targetClass: 'Completions',
104104
baseResource: 'chat.completions',
105105
methods: ['create'],
@@ -267,82 +267,86 @@ function wrapStreamIterator (response, options, n, ctx) {
267267
}
268268
}
269269

270-
for (const shim of V4_PACKAGE_SHIMS) {
271-
const { file, targetClass, baseResource, methods, versions, streamedResponse } = shim
272-
addHook({ name: 'openai', file, versions: versions || ['>=4'] }, exports => {
273-
const targetPrototype = exports[targetClass].prototype
270+
const extensions = ['.js', '.mjs']
274271

275-
for (const methodName of methods) {
276-
shimmer.wrap(targetPrototype, methodName, methodFn => function () {
277-
if (!ch.start.hasSubscribers) {
278-
return methodFn.apply(this, arguments)
279-
}
272+
for (const extension of extensions) {
273+
for (const shim of V4_PACKAGE_SHIMS) {
274+
const { file, targetClass, baseResource, methods, versions, streamedResponse } = shim
275+
addHook({ name: 'openai', file: file + extension, versions: versions || ['>=4'] }, exports => {
276+
const targetPrototype = exports[targetClass].prototype
280277

281-
// The OpenAI library lets you set `stream: true` on the options arg to any method
282-
// However, we only want to handle streamed responses in specific cases
283-
// chat.completions and completions
284-
const stream = streamedResponse && getOption(arguments, 'stream', false)
285-
286-
// we need to compute how many prompts we are sending in streamed cases for completions
287-
// not applicable for chat completiond
288-
let n
289-
if (stream) {
290-
n = getOption(arguments, 'n', 1)
291-
const prompt = getOption(arguments, 'prompt')
292-
if (Array.isArray(prompt) && typeof prompt[0] !== 'number') {
293-
n *= prompt.length
278+
for (const methodName of methods) {
279+
shimmer.wrap(targetPrototype, methodName, methodFn => function () {
280+
if (!ch.start.hasSubscribers) {
281+
return methodFn.apply(this, arguments)
294282
}
295-
}
296283

297-
const client = this._client || this.client
284+
// The OpenAI library lets you set `stream: true` on the options arg to any method
285+
// However, we only want to handle streamed responses in specific cases
286+
// chat.completions and completions
287+
const stream = streamedResponse && getOption(arguments, 'stream', false)
288+
289+
// we need to compute how many prompts we are sending in streamed cases for completions
290+
// not applicable for chat completiond
291+
let n
292+
if (stream) {
293+
n = getOption(arguments, 'n', 1)
294+
const prompt = getOption(arguments, 'prompt')
295+
if (Array.isArray(prompt) && typeof prompt[0] !== 'number') {
296+
n *= prompt.length
297+
}
298+
}
298299

299-
const ctx = {
300-
methodName: `${baseResource}.${methodName}`,
301-
args: arguments,
302-
basePath: client.baseURL,
303-
apiKey: client.apiKey
304-
}
300+
const client = this._client || this.client
305301

306-
return ch.start.runStores(ctx, () => {
307-
const apiProm = methodFn.apply(this, arguments)
302+
const ctx = {
303+
methodName: `${baseResource}.${methodName}`,
304+
args: arguments,
305+
basePath: client.baseURL,
306+
apiKey: client.apiKey
307+
}
308+
309+
return ch.start.runStores(ctx, () => {
310+
const apiProm = methodFn.apply(this, arguments)
308311

309-
if (baseResource === 'chat.completions' && typeof apiProm._thenUnwrap === 'function') {
310-
// this should only ever be invoked from a client.beta.chat.completions.parse call
311-
shimmer.wrap(apiProm, '_thenUnwrap', origApiPromThenUnwrap => function () {
312-
// TODO(sam.brenner): I wonder if we can patch the APIPromise prototype instead, although
313-
// we might not have access to everything we need...
312+
if (baseResource === 'chat.completions' && typeof apiProm._thenUnwrap === 'function') {
313+
// this should only ever be invoked from a client.beta.chat.completions.parse call
314+
shimmer.wrap(apiProm, '_thenUnwrap', origApiPromThenUnwrap => function () {
315+
// TODO(sam.brenner): I wonder if we can patch the APIPromise prototype instead, although
316+
// we might not have access to everything we need...
314317

315-
// this is a new apipromise instance
316-
const unwrappedPromise = origApiPromThenUnwrap.apply(this, arguments)
318+
// this is a new apipromise instance
319+
const unwrappedPromise = origApiPromThenUnwrap.apply(this, arguments)
317320

318-
shimmer.wrap(unwrappedPromise, 'parse', origApiPromParse => function () {
319-
const parsedPromise = origApiPromParse.apply(this, arguments)
320-
.then(body => Promise.all([this.responsePromise, body]))
321+
shimmer.wrap(unwrappedPromise, 'parse', origApiPromParse => function () {
322+
const parsedPromise = origApiPromParse.apply(this, arguments)
323+
.then(body => Promise.all([this.responsePromise, body]))
321324

322-
return handleUnwrappedAPIPromise(parsedPromise, ctx, stream, n)
325+
return handleUnwrappedAPIPromise(parsedPromise, ctx, stream, n)
326+
})
327+
328+
return unwrappedPromise
323329
})
330+
}
324331

325-
return unwrappedPromise
332+
// wrapping `parse` avoids problematic wrapping of `then` when trying to call
333+
// `withResponse` in userland code after. This way, we can return the whole `APIPromise`
334+
shimmer.wrap(apiProm, 'parse', origApiPromParse => function () {
335+
const parsedPromise = origApiPromParse.apply(this, arguments)
336+
.then(body => Promise.all([this.responsePromise, body]))
337+
338+
return handleUnwrappedAPIPromise(parsedPromise, ctx, stream, n)
326339
})
327-
}
328340

329-
// wrapping `parse` avoids problematic wrapping of `then` when trying to call
330-
// `withResponse` in userland code after. This way, we can return the whole `APIPromise`
331-
shimmer.wrap(apiProm, 'parse', origApiPromParse => function () {
332-
const parsedPromise = origApiPromParse.apply(this, arguments)
333-
.then(body => Promise.all([this.responsePromise, body]))
341+
ch.end.publish(ctx)
334342

335-
return handleUnwrappedAPIPromise(parsedPromise, ctx, stream, n)
343+
return apiProm
336344
})
337-
338-
ch.end.publish(ctx)
339-
340-
return apiProm
341345
})
342-
})
343-
}
344-
return exports
345-
})
346+
}
347+
return exports
348+
})
349+
}
346350
}
347351

348352
function handleUnwrappedAPIPromise (apiProm, ctx, stream, n) {

packages/datadog-plugin-langchain/test/integration-test/client.spec.js

+4-4
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@ const {
88
} = require('../../../../integration-tests/helpers')
99
const { assert } = require('chai')
1010

11-
// there is currently an issue with langchain + esm loader hooks from IITM
12-
// https://github.com/nodejs/import-in-the-middle/issues/163
13-
describe.skip('esm', () => {
11+
describe('esm', () => {
1412
let agent
1513
let proc
1614
let sandbox
@@ -47,7 +45,9 @@ describe.skip('esm', () => {
4745
assert.strictEqual(checkSpansForServiceName(payload, 'langchain.request'), true)
4846
})
4947

50-
proc = await spawnPluginIntegrationTestProc(sandbox.folder, 'server.mjs', agent.port)
48+
proc = await spawnPluginIntegrationTestProc(sandbox.folder, 'server.mjs', agent.port, null, {
49+
NODE_OPTIONS: '--import dd-trace/register.js'
50+
})
5151

5252
await res
5353
}).timeout(20000)
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,21 @@
11
import 'dd-trace/init.js'
2+
23
import { OpenAI } from '@langchain/openai'
34
import { StringOutputParser } from '@langchain/core/output_parsers'
45
import nock from 'nock'
56

67
nock('https://api.openai.com:443')
78
.post('/v1/completions')
8-
.reply(200, {})
9+
.reply(200, {
10+
model: 'gpt-3.5-turbo-instruct',
11+
choices: [{
12+
text: 'The answer is 4',
13+
index: 0,
14+
logprobs: null,
15+
finish_reason: 'length'
16+
}],
17+
usage: { prompt_tokens: 8, completion_tokens: 12, otal_tokens: 20 }
18+
})
919

1020
const llm = new OpenAI({
1121
apiKey: '<not-a-real-key>'
@@ -15,4 +25,4 @@ const parser = new StringOutputParser()
1525

1626
const chain = llm.pipe(parser)
1727

18-
await chain.invoke('a test')
28+
await chain.invoke('what is 2 + 2?')

packages/datadog-plugin-openai/test/integration-test/client.spec.js

+5-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@ describe('esm', () => {
1515
let sandbox
1616

1717
// limit v4 tests while the IITM issue is resolved or a workaround is introduced
18+
// this is only relevant for `openai` >=4.0 <=4.1
1819
// issue link: https://github.com/DataDog/import-in-the-middle/issues/60
19-
withVersions('openai', 'openai', '>=3 <4', version => {
20+
withVersions('openai', 'openai', '>=3 <4.0.0 || >4.1.0', version => {
2021
before(async function () {
2122
this.timeout(20000)
2223
sandbox = await createSandbox([`'openai@${version}'`, 'nock'], false, [
@@ -43,7 +44,9 @@ describe('esm', () => {
4344
assert.strictEqual(checkSpansForServiceName(payload, 'openai.request'), true)
4445
})
4546

46-
proc = await spawnPluginIntegrationTestProc(sandbox.folder, 'server.mjs', agent.port)
47+
proc = await spawnPluginIntegrationTestProc(sandbox.folder, 'server.mjs', agent.port, null, {
48+
NODE_OPTIONS: '--import dd-trace/register.js'
49+
})
4750

4851
await res
4952
}).timeout(20000)

0 commit comments

Comments
 (0)