Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: agent can now take in images in the form of urls #884

Merged
merged 3 commits into from
Mar 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions src/codegen/agents/code_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,12 @@ def __init__(
**metadata,
}

def run(self, prompt: str) -> str:
"""Run the agent with a prompt.
def run(self, prompt: str, image_urls: Optional[list[str]] = None) -> str:
"""Run the agent with a prompt and optional images.

Args:
prompt: The prompt to run
image_urls: Optional list of base64-encoded image strings. Example: ["data:image/png;base64,<base64_str>"]
thread_id: Optional thread ID for message history

Returns:
Expand All @@ -124,14 +125,15 @@ def run(self, prompt: str) -> str:
"recursion_limit": 100,
}

# this message has a reducer which appends the current message to the existing history
# see more https://langchain-ai.github.io/langgraph/concepts/low_level/#reducers
input = {"query": prompt}
# Prepare content with prompt and images if provided
content = [{"type": "text", "text": prompt}]
if image_urls:
content += [{"type": "image_url", "image_url": {"url": image_url}} for image_url in image_urls]

config = RunnableConfig(configurable={"thread_id": self.thread_id}, tags=self.tags, metadata=self.metadata, recursion_limit=200)
# we stream the steps instead of invoke because it allows us to access intermediate nodes

stream = self.agent.stream(input, config=config, stream_mode="values")
stream = self.agent.stream({"messages": [HumanMessage(content=content)]}, config=config, stream_mode="values")

_tracer = MessageStreamTracer(logger=self.logger)

Expand All @@ -143,7 +145,7 @@ def run(self, prompt: str) -> str:

for s in traced_stream:
if len(s["messages"]) == 0 or isinstance(s["messages"][-1], HumanMessage):
message = HumanMessage(content=prompt)
message = HumanMessage(content=content)
else:
message = s["messages"][-1]

Expand Down
25 changes: 16 additions & 9 deletions src/codegen/agents/scratch.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
"metadata": {},
"outputs": [],
"source": [
"from codegen.agents.code_agent import CodeAgent\n",
"\n",
"\n",
"CodeAgent"
"from codegen.agents.code_agent import CodeAgent"
]
},
{
Expand Down Expand Up @@ -46,8 +43,7 @@
"metadata": {},
"outputs": [],
"source": [
"agent = CodeAgent(codebase)\n",
"agent.run(\"What is the main character's name? also show the source code where you find the answer\", logger=ConsoleLogger())"
"image = \"\""
]
},
{
Expand All @@ -56,15 +52,26 @@
"metadata": {},
"outputs": [],
"source": [
"agent.run(\"What is the main character's name?\")"
"agent = CodeAgent(codebase)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"agent.run(\"Tell me about the images you see.\", image_urls=[f\"data:image/png;base64,{image}\", f\"data:image/png;base64,{image}\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"agent.run(\"What is the main character's name?\")"
]
}
],
"metadata": {
Expand All @@ -83,7 +90,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.0"
"version": "3.13.1"
}
},
"nbformat": 4,
Expand Down
Loading