Skip to content

Commit 53e774d

Browse files
authoredMar 20, 2025··
Deal with summarization Error for images (#910)
1 parent 812cf15 commit 53e774d

File tree

1 file changed

+28
-7
lines changed
  • src/codegen/extensions/langchain

1 file changed

+28
-7
lines changed
 

‎src/codegen/extensions/langchain/graph.py

+28-7
Original file line numberDiff line numberDiff line change
@@ -155,15 +155,27 @@ def format_header(header_type: str) -> str:
155155

156156
# Format messages with appropriate headers
157157
formatted_messages = []
158-
for msg in to_summarize: # No need for slice when iterating full list
158+
image_urls = [] # Track image URLs for the summary prompt
159+
160+
for msg in to_summarize:
159161
if isinstance(msg, HumanMessage):
160-
formatted_messages.append(format_header("human") + msg.content)
162+
# Now we know content is always a list
163+
for item in msg.content:
164+
if item.get("type") == "text":
165+
text_content = item.get("text", "")
166+
if text_content:
167+
formatted_messages.append(format_header("human") + text_content)
168+
elif item.get("type") == "image_url":
169+
image_url = item.get("image_url", {}).get("url")
170+
if image_url:
171+
# We are not including any string data in the summary for image. The image will be present itself!
172+
image_urls.append({"type": "image_url", "image_url": {"url": image_url}})
161173
elif isinstance(msg, AIMessage):
162174
# Check for summary message using additional_kwargs
163175
if msg.additional_kwargs.get("is_summary"):
164176
formatted_messages.append(format_header("summary") + msg.content)
165177
elif isinstance(msg.content, list) and len(msg.content) > 0 and isinstance(msg.content[0], dict):
166-
for item in msg.content: # No need for slice when iterating full list
178+
for item in msg.content:
167179
if item.get("type") == "text":
168180
formatted_messages.append(format_header("ai") + item["text"])
169181
elif item.get("type") == "tool_use":
@@ -173,16 +185,25 @@ def format_header(header_type: str) -> str:
173185
elif isinstance(msg, ToolMessage):
174186
formatted_messages.append(format_header("tool_response") + msg.content)
175187

176-
conversation = "\n".join(formatted_messages) # No need for slice when joining full list
188+
conversation = "\n".join(formatted_messages)
177189

178190
summary_llm = LLM(
179191
model_provider="anthropic",
180192
model_name="claude-3-5-sonnet-latest",
181193
temperature=0.3,
182194
)
183195

184-
chain = ChatPromptTemplate.from_template(SUMMARIZE_CONVERSATION_PROMPT) | summary_llm
185-
new_summary = chain.invoke({"conversation": conversation}).content
196+
# Choose template based on whether we have images
197+
summarizer_content = [{"type": "text", "text": SUMMARIZE_CONVERSATION_PROMPT}]
198+
for image_url in image_urls:
199+
summarizer_content.append(image_url)
200+
201+
chain = ChatPromptTemplate([("human", summarizer_content)]) | summary_llm
202+
new_summary = chain.invoke(
203+
{
204+
"conversation": conversation,
205+
}
206+
).content
186207

187208
return {"messages": {"type": "summarize", "summary": new_summary, "tail": tail, "head": head}}
188209

@@ -199,7 +220,7 @@ def should_continue(self, state: GraphState) -> Literal["tools", "summarize_conv
199220
return "summarize_conversation"
200221

201222
# Summarize if the last message exceeds the max input tokens of the model - 10000 tokens
202-
elif isinstance(last_message, AIMessage) and not just_summarized and curr_input_tokens > (max_input_tokens - 10000):
223+
elif isinstance(last_message, AIMessage) and not just_summarized and curr_input_tokens > (max_input_tokens - 30000):
203224
return "summarize_conversation"
204225

205226
elif hasattr(last_message, "tool_calls") and last_message.tool_calls:

0 commit comments

Comments
 (0)
Please sign in to comment.