docs[minor],langchain[minor],google-common[minor]: Add Gemini tools agent docs (#4930)

bracesproul · web-flow · commit 6b3210eee695 · 2024-04-01T09:33:17.000-07:00
* docs[minor],langchain[minor],google-common[minor]: Add Gemini tools agent docs

* chore: lint files

* chore: lint files

* bruh

* chore: lint files

* cr

* fix int tests

* drop test

* fix rest of tests

* nit
diff --git a/docs/core_docs/docs/integrations/chat/google_vertex_ai.mdx b/docs/core_docs/docs/integrations/chat/google_vertex_ai.mdx
@@ -119,3 +119,16 @@ import ChatVertexAIWSA from "@examples/models/chat/integration_googlevertexai-ws
 :::tip
 See the LangSmith trace for the example above [here](https://smith.langchain.com/public/41bbbddb-f357-4bfa-a111-def8294a4514/r).
 :::
+
+### VertexAI tools agent
+
+The Gemini family of models not only support tool calling, but can also be used in the OpenAI Tools agent.
+Here's an example:
+
+import AgentsExample from "@examples/models/chat/chat_mistralai_agents.ts";
+
+<CodeBlock language="typescript">{AgentsExample}</CodeBlock>
+
+:::tip
+See the LangSmith trace for the agent example above [here](https://smith.langchain.com/public/3294d553-c961-4088-acfe-62252ab17d9a/r).
+:::
diff --git a/docs/core_docs/docs/integrations/chat/mistral.mdx b/docs/core_docs/docs/integrations/chat/mistral.mdx
@@ -96,7 +96,7 @@ import WSAJSONExample from "@examples/models/chat/chat_mistralai_wsa_json.ts";
 
 <CodeBlock language="typescript">{WSAJSONExample}</CodeBlock>
 
-### OpenAI tools agent
+### OpenAI-style tools agent
 
 The larger Mistral models not only support tool calling, but can also be used in the OpenAI Tools agent.
 Here's an example:
diff --git a/examples/src/models/chat/chat_vertexai_agents.ts b/examples/src/models/chat/chat_vertexai_agents.ts
@@ -0,0 +1,49 @@
+import { z } from "zod";
+
+import { pull } from "langchain/hub";
+import { DynamicStructuredTool } from "@langchain/core/tools";
+import { AgentExecutor, createOpenAIToolsAgent } from "langchain/agents";
+
+import type { ChatPromptTemplate } from "@langchain/core/prompts";
+import { ChatVertexAI } from "@langchain/google-vertexai";
+// Uncomment this if you're running inside a web/edge environment.
+// import { ChatVertexAI } from "@langchain/google-vertexai-web";
+
+const llm: any = new ChatVertexAI({
+  temperature: 0,
+  modelName: "gemini-1.0-pro",
+});
+
+// Get the prompt to use - you can modify this!
+// If you want to see the prompt in full, you can at:
+// https://smith.langchain.com/hub/hwchase17/openai-tools-agent
+const prompt = await pull<ChatPromptTemplate>("hwchase17/openai-tools-agent");
+
+const currentWeatherTool = new DynamicStructuredTool({
+  name: "get_current_weather",
+  description: "Get the current weather in a given location",
+  schema: z.object({
+    location: z.string().describe("The city and state, e.g. San Francisco, CA"),
+  }),
+  func: async () => Promise.resolve("28 °C"),
+});
+
+const agent = await createOpenAIToolsAgent({
+  llm,
+  tools: [currentWeatherTool],
+  prompt,
+});
+
+const agentExecutor = new AgentExecutor({
+  agent,
+  tools: [currentWeatherTool],
+});
+
+const input = "What's the weather like in Paris?";
+const { output } = await agentExecutor.invoke({ input });
+
+console.log(output);
+
+/* 
+It's 28 degrees Celsius in Paris.
+*/
diff --git a/libs/langchain-google-common/src/tests/chat_models.test.ts b/libs/langchain-google-common/src/tests/chat_models.test.ts
@@ -6,14 +6,9 @@ import {
   HumanMessage,
   HumanMessageChunk,
   MessageContentComplex,
-  MessageContentText,
   SystemMessage,
   ToolMessage,
 } from "@langchain/core/messages";
-import { StructuredToolInterface } from "@langchain/core/tools";
-import { FakeTool } from "@langchain/core/utils/testing";
-// eslint-disable-next-line import/no-extraneous-dependencies
-import { z } from "zod";
 
 import { ChatGoogleBase, ChatGoogleBaseInput } from "../chat_models.js";
 import { authOptions, MockClient, MockClientAuthInfo, mockId } from "./mock.js";
@@ -213,13 +208,7 @@ describe("Mock ChatGoogle", () => {
     expect(result._getType()).toEqual("ai");
     const aiMessage = result as AIMessage;
     expect(aiMessage.content).toBeDefined();
-    expect(aiMessage.content.length).toBeGreaterThanOrEqual(1);
-    expect(aiMessage.content[0]).toHaveProperty("type");
-
-    const complexContent = aiMessage.content[0] as MessageContentComplex;
-    expect(complexContent.type).toEqual("text");
-    const content = complexContent as MessageContentText;
-    expect(content.text).toEqual("T");
+    expect(aiMessage.content).toBe("T");
   });
 
   test("1. Invoke response format", async () => {
@@ -244,13 +233,7 @@ describe("Mock ChatGoogle", () => {
     expect(result._getType()).toEqual("ai");
     const aiMessage = result as AIMessage;
     expect(aiMessage.content).toBeDefined();
-    expect(aiMessage.content.length).toBeGreaterThanOrEqual(1);
-    expect(aiMessage.content[0]).toHaveProperty("type");
-
-    const complexContent = aiMessage.content[0] as MessageContentComplex;
-    expect(complexContent.type).toEqual("text");
-    const content = complexContent as MessageContentText;
-    expect(content.text).toEqual("T");
+    expect(aiMessage.content).toBe("T");
   });
 
   // SystemMessages will be turned into the human request with the prompt
@@ -327,13 +310,7 @@ describe("Mock ChatGoogle", () => {
       expect(result._getType()).toEqual("ai");
       const aiMessage = result as AIMessage;
       expect(aiMessage.content).toBeDefined();
-      expect(aiMessage.content.length).toBeGreaterThanOrEqual(1);
-      expect(aiMessage.content[0]).toHaveProperty("type");
-
-      const complexContent = aiMessage.content[0] as MessageContentComplex;
-      expect(complexContent.type).toEqual("text");
-      const content = complexContent as MessageContentText;
-      expect(content.text).toEqual("T");
+      expect(aiMessage.content).toBe("T");
     }
 
     expect(caught).toEqual(true);
@@ -386,10 +363,7 @@ describe("Mock ChatGoogle", () => {
     expect(parts[1].inlineData).toHaveProperty("mimeType");
     expect(parts[1].inlineData).toHaveProperty("data");
 
-    expect(result.content[0]).toHaveProperty("text");
-    expect((result.content[0] as MessageContentText).text).toEqual(
-      "A blue square."
-    );
+    expect(result.content).toBe("A blue square.");
   });
 
   test("4. Functions Bind - Gemini format request", async () => {
@@ -546,78 +520,6 @@ describe("Mock ChatGoogle", () => {
     expect(parameters.required[0]).toBe("testName");
   });
 
-  test("4. Functions - zod format request", async () => {
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const record: Record<string, any> = {};
-    const projectId = mockId();
-    const authOptions: MockClientAuthInfo = {
-      record,
-      projectId,
-      resultFile: "chat-4-mock.json",
-    };
-
-    const zodSchema = z.object({
-      testName: z.string().describe("The name of the test that should be run."),
-    });
-    const tools: StructuredToolInterface[] = [
-      new FakeTool({
-        name: "test",
-        description:
-          "Run a test with a specific name and get if it passed or failed",
-        schema: zodSchema,
-      }),
-    ];
-
-    const model = new ChatGoogle({
-      authOptions,
-    }).bind({
-      tools,
-    });
-
-    const result = await model.invoke("What?");
-
-    const toolsResult = record?.opts?.data?.tools;
-    console.log("toolsResult", JSON.stringify(toolsResult, null, 1));
-    expect(toolsResult).toBeDefined();
-    expect(Array.isArray(toolsResult)).toBeTruthy();
-    expect(toolsResult).toHaveLength(1);
-
-    const toolResult = toolsResult[0];
-    expect(toolResult).toBeDefined();
-    expect(toolResult).toHaveProperty("functionDeclarations");
-    expect(Array.isArray(toolResult.functionDeclarations)).toBeTruthy();
-    expect(toolResult.functionDeclarations).toHaveLength(1);
-
-    const functionDeclaration = toolResult.functionDeclarations[0];
-    expect(functionDeclaration.name).toBe("test");
-    expect(functionDeclaration.description).toBe(
-      "Run a test with a specific name and get if it passed or failed"
-    );
-    expect(functionDeclaration.parameters).toBeDefined();
-    expect(typeof functionDeclaration.parameters).toBe("object");
-
-    const parameters = functionDeclaration?.parameters;
-    expect(parameters.type).toBe("object");
-    expect(parameters).toHaveProperty("properties");
-    expect(parameters).not.toHaveProperty("additionalProperties");
-    expect(parameters).not.toHaveProperty("$schema");
-    expect(typeof parameters.properties).toBe("object");
-
-    expect(parameters.properties.testName).toBeDefined();
-    expect(typeof parameters.properties.testName).toBe("object");
-    expect(parameters.properties.testName.type).toBe("string");
-    expect(parameters.properties.testName.description).toBe(
-      "The name of the test that should be run."
-    );
-
-    expect(parameters.required).toBeDefined();
-    expect(Array.isArray(parameters.required)).toBeTruthy();
-    expect(parameters.required).toHaveLength(1);
-    expect(parameters.required[0]).toBe("testName");
-
-    console.log(result);
-  });
-
   test("4. Functions - results", async () => {
     // eslint-disable-next-line @typescript-eslint/no-explicit-any
     const record: Record<string, any> = {};
@@ -660,8 +562,7 @@ describe("Mock ChatGoogle", () => {
 
     console.log(JSON.stringify(result, null, 1));
     expect(result).toHaveProperty("content");
-    expect(Array.isArray(result.content)).toBeTruthy();
-    expect(result.content).toHaveLength(0);
+    expect(result.content).toBe("");
     const args = result?.lc_kwargs?.additional_kwargs;
     expect(args).toBeDefined();
     expect(args).toHaveProperty("tool_calls");
diff --git a/libs/langchain-google-common/src/utils/common.ts b/libs/langchain-google-common/src/utils/common.ts
@@ -1,4 +1,6 @@
+import { StructuredToolInterface } from "@langchain/core/tools";
 import type {
+  GeminiTool,
   GoogleAIBaseLanguageModelCallOptions,
   GoogleAIModelParams,
   GoogleAIModelRequestParams,
@@ -35,6 +37,64 @@ export function copyAIModelParamsInto(
     options?.safetySettings ?? params?.safetySettings ?? target.safetySettings;
 
   ret.tools = options?.tools;
+  // Ensure tools are formatted properly for Gemini
+  const geminiTools = options?.tools
+    ?.map((tool) => {
+      if (
+        "function" in tool &&
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        "parameters" in (tool.function as Record<string, any>)
+      ) {
+        // Tool is in OpenAI format. Convert to Gemini then return.
+
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const castTool = tool.function as Record<string, any>;
+        const cleanedParameters = castTool.parameters;
+        if ("$schema" in cleanedParameters) {
+          delete cleanedParameters.$schema;
+        }
+        if ("additionalProperties" in cleanedParameters) {
+          delete cleanedParameters.additionalProperties;
+        }
+        const toolInGeminiFormat: GeminiTool = {
+          functionDeclarations: [
+            {
+              name: castTool.name,
+              description: castTool.description,
+              parameters: cleanedParameters,
+            },
+          ],
+        };
+        return toolInGeminiFormat;
+      } else if ("functionDeclarations" in tool) {
+        return tool;
+      } else {
+        return null;
+      }
+    })
+    .filter((tool): tool is GeminiTool => tool !== null);
+
+  const structuredOutputTools = options?.tools
+    ?.map((tool) => {
+      if ("lc_namespace" in tool) {
+        return tool;
+      } else {
+        return null;
+      }
+    })
+    .filter((tool): tool is StructuredToolInterface => tool !== null);
+
+  if (
+    structuredOutputTools &&
+    structuredOutputTools.length > 0 &&
+    geminiTools &&
+    geminiTools.length > 0
+  ) {
+    throw new Error(
+      `Cannot mix structured tools with Gemini tools.\nReceived ${structuredOutputTools.length} structured tools and ${geminiTools.length} Gemini tools.`
+    );
+  }
+  ret.tools = geminiTools ?? structuredOutputTools;
 
   return ret;
 }
diff --git a/libs/langchain-google-common/src/utils/gemini.ts b/libs/langchain-google-common/src/utils/gemini.ts
@@ -179,20 +179,39 @@ function toolMessageToContent(message: ToolMessage): GeminiContent[] {
           },
           ""
         );
-  const content = JSON.parse(contentStr);
-  return [
-    {
-      role: "function",
-      parts: [
-        {
-          functionResponse: {
-            name: message.tool_call_id,
-            response: content,
+
+  try {
+    const content = JSON.parse(contentStr);
+    return [
+      {
+        role: "function",
+        parts: [
+          {
+            functionResponse: {
+              name: message.tool_call_id,
+              response: content,
+            },
           },
-        },
-      ],
-    },
-  ];
+        ],
+      },
+    ];
+  } catch (_) {
+    return [
+      {
+        role: "function",
+        parts: [
+          {
+            functionResponse: {
+              name: message.tool_call_id,
+              response: {
+                response: contentStr,
+              },
+            },
+          },
+        ],
+      },
+    ];
+  }
 }
 
 export function baseMessageToContent(message: BaseMessage): GeminiContent[] {
@@ -445,6 +464,17 @@ export function chunkToString(chunk: BaseMessageChunk): string {
 
 export function partToMessage(part: GeminiPart): BaseMessageChunk {
   const fields = partsToBaseMessageFields([part]);
+  if (typeof fields.content === "string") {
+    return new AIMessageChunk(fields);
+  } else if (fields.content.every((item) => item.type === "text")) {
+    const newContent = fields.content
+      .map((item) => ("text" in item ? item.text : ""))
+      .join("");
+    return new AIMessageChunk({
+      ...fields,
+      content: newContent,
+    });
+  }
   return new AIMessageChunk(fields);
 }
 
diff --git a/libs/langchain-google-gauth/src/tests/chat_models.int.test.ts b/libs/langchain-google-gauth/src/tests/chat_models.int.test.ts
diff --git a/libs/langchain-google-gauth/src/tests/llms.int.test.ts b/libs/langchain-google-gauth/src/tests/llms.int.test.ts
diff --git a/libs/langchain-google-webauth/src/tests/chat_models.int.test.ts b/libs/langchain-google-webauth/src/tests/chat_models.int.test.ts