Skip to content

Commit e77b1aa

Browse files
feature: agent can now take in images in the form of urls (#884)
# Motivation Allowing the agent to take in images will allow it to be more helpful # Content Added `image_urls` to the parameters of `CodeAgent.run` and modified the way we construct initial messages so that it can include images. # Testing <!-- How was the change tested? --> # Please check the following before marking your PR as ready for review - [ ] I have added tests for my changes - [ ] I have updated the documentation or added new documentation as needed
1 parent eac44c1 commit e77b1aa

File tree

2 files changed

+25
-16
lines changed

2 files changed

+25
-16
lines changed

src/codegen/agents/code_agent.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,12 @@ def __init__(
106106
**metadata,
107107
}
108108

109-
def run(self, prompt: str) -> str:
110-
"""Run the agent with a prompt.
109+
def run(self, prompt: str, image_urls: Optional[list[str]] = None) -> str:
110+
"""Run the agent with a prompt and optional images.
111111
112112
Args:
113113
prompt: The prompt to run
114+
image_urls: Optional list of base64-encoded image strings. Example: ["data:image/png;base64,<base64_str>"]
114115
thread_id: Optional thread ID for message history
115116
116117
Returns:
@@ -124,14 +125,15 @@ def run(self, prompt: str) -> str:
124125
"recursion_limit": 100,
125126
}
126127

127-
# this message has a reducer which appends the current message to the existing history
128-
# see more https://langchain-ai.github.io/langgraph/concepts/low_level/#reducers
129-
input = {"query": prompt}
128+
# Prepare content with prompt and images if provided
129+
content = [{"type": "text", "text": prompt}]
130+
if image_urls:
131+
content += [{"type": "image_url", "image_url": {"url": image_url}} for image_url in image_urls]
130132

131133
config = RunnableConfig(configurable={"thread_id": self.thread_id}, tags=self.tags, metadata=self.metadata, recursion_limit=200)
132134
# we stream the steps instead of invoke because it allows us to access intermediate nodes
133135

134-
stream = self.agent.stream(input, config=config, stream_mode="values")
136+
stream = self.agent.stream({"messages": [HumanMessage(content=content)]}, config=config, stream_mode="values")
135137

136138
_tracer = MessageStreamTracer(logger=self.logger)
137139

@@ -143,7 +145,7 @@ def run(self, prompt: str) -> str:
143145

144146
for s in traced_stream:
145147
if len(s["messages"]) == 0 or isinstance(s["messages"][-1], HumanMessage):
146-
message = HumanMessage(content=prompt)
148+
message = HumanMessage(content=content)
147149
else:
148150
message = s["messages"][-1]
149151

src/codegen/agents/scratch.ipynb

+16-9
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,7 @@
66
"metadata": {},
77
"outputs": [],
88
"source": [
9-
"from codegen.agents.code_agent import CodeAgent\n",
10-
"\n",
11-
"\n",
12-
"CodeAgent"
9+
"from codegen.agents.code_agent import CodeAgent"
1310
]
1411
},
1512
{
@@ -46,8 +43,7 @@
4643
"metadata": {},
4744
"outputs": [],
4845
"source": [
49-
"agent = CodeAgent(codebase)\n",
50-
"agent.run(\"What is the main character's name? also show the source code where you find the answer\", logger=ConsoleLogger())"
46+
"image = \"\""
5147
]
5248
},
5349
{
@@ -56,15 +52,26 @@
5652
"metadata": {},
5753
"outputs": [],
5854
"source": [
59-
"agent.run(\"What is the main character's name?\")"
55+
"agent = CodeAgent(codebase)"
56+
]
57+
},
58+
{
59+
"cell_type": "code",
60+
"execution_count": null,
61+
"metadata": {},
62+
"outputs": [],
63+
"source": [
64+
"agent.run(\"Tell me about the images you see.\", image_urls=[f\"data:image/png;base64,{image}\", f\"data:image/png;base64,{image}\"])"
6065
]
6166
},
6267
{
6368
"cell_type": "code",
6469
"execution_count": null,
6570
"metadata": {},
6671
"outputs": [],
67-
"source": []
72+
"source": [
73+
"agent.run(\"What is the main character's name?\")"
74+
]
6875
}
6976
],
7077
"metadata": {
@@ -83,7 +90,7 @@
8390
"name": "python",
8491
"nbconvert_exporter": "python",
8592
"pygments_lexer": "ipython3",
86-
"version": "3.13.0"
93+
"version": "3.13.1"
8794
}
8895
},
8996
"nbformat": 4,

0 commit comments

Comments
 (0)