From 7f90740be19e12d55f8a4b4606e83d04041a5c33 Mon Sep 17 00:00:00 2001 From: jemeza-codegen Date: Mon, 17 Mar 2025 15:24:15 -0700 Subject: [PATCH] feature: agent can now take in images in the form of urls --- src/codegen/agents/code_agent.py | 16 +++++++++------- src/codegen/agents/scratch.ipynb | 25 ++++++++++++++++--------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/src/codegen/agents/code_agent.py b/src/codegen/agents/code_agent.py index 99406ef40..693c0cd44 100644 --- a/src/codegen/agents/code_agent.py +++ b/src/codegen/agents/code_agent.py @@ -106,11 +106,12 @@ def __init__( **metadata, } - def run(self, prompt: str) -> str: - """Run the agent with a prompt. + def run(self, prompt: str, image_urls: Optional[list[str]] = None) -> str: + """Run the agent with a prompt and optional images. Args: prompt: The prompt to run + image_urls: Optional list of base64-encoded image strings. Example: ["data:image/png;base64,"] thread_id: Optional thread ID for message history Returns: @@ -124,14 +125,15 @@ def run(self, prompt: str) -> str: "recursion_limit": 100, } - # this message has a reducer which appends the current message to the existing history - # see more https://langchain-ai.github.io/langgraph/concepts/low_level/#reducers - input = {"query": prompt} + # Prepare content with prompt and images if provided + content = [{"type": "text", "text": prompt}] + if image_urls: + content += [{"type": "image_url", "image_url": {"url": image_url}} for image_url in image_urls] config = RunnableConfig(configurable={"thread_id": self.thread_id}, tags=self.tags, metadata=self.metadata, recursion_limit=200) # we stream the steps instead of invoke because it allows us to access intermediate nodes - stream = self.agent.stream(input, config=config, stream_mode="values") + stream = self.agent.stream({"messages": [HumanMessage(content=content)]}, config=config, stream_mode="values") _tracer = MessageStreamTracer(logger=self.logger) @@ -143,7 +145,7 @@ def run(self, prompt: str) -> str: for s in traced_stream: if len(s["messages"]) == 0 or isinstance(s["messages"][-1], HumanMessage): - message = HumanMessage(content=prompt) + message = HumanMessage(content=content) else: message = s["messages"][-1] diff --git a/src/codegen/agents/scratch.ipynb b/src/codegen/agents/scratch.ipynb index 1b50f8920..d39e4874f 100644 --- a/src/codegen/agents/scratch.ipynb +++ b/src/codegen/agents/scratch.ipynb @@ -6,10 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "from codegen.agents.code_agent import CodeAgent\n", - "\n", - "\n", - "CodeAgent" + "from codegen.agents.code_agent import CodeAgent" ] }, { @@ -46,8 +43,7 @@ "metadata": {}, "outputs": [], "source": [ - "agent = CodeAgent(codebase)\n", - "agent.run(\"What is the main character's name? also show the source code where you find the answer\", logger=ConsoleLogger())" + "image = \"\"" ] }, { @@ -56,7 +52,16 @@ "metadata": {}, "outputs": [], "source": [ - "agent.run(\"What is the main character's name?\")" + "agent = CodeAgent(codebase)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent.run(\"Tell me about the images you see.\", image_urls=[f\"data:image/png;base64,{image}\", f\"data:image/png;base64,{image}\"])" ] }, { @@ -64,7 +69,9 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "agent.run(\"What is the main character's name?\")" + ] } ], "metadata": { @@ -83,7 +90,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.0" + "version": "3.13.1" } }, "nbformat": 4,