Skip to content

Commit adef40e

Browse files
fix(agent): prevent duplicate tool_use IDs in API requests
When streaming tool_use events with intercalated content (text/thinking), the deduplication logic only checked the last content item, allowing duplicates to slip through. This caused Anthropic API error 400: 'tool_use ids must be unique'. Changes: - thread.rs: Search all content items for existing tool_use with same ID instead of only checking the last item - anthropic.rs: Filter duplicate tool_use IDs when merging consecutive assistant messages Fixes #44211
1 parent a350438 commit adef40e

File tree

3 files changed

+278
-9
lines changed

3 files changed

+278
-9
lines changed

crates/agent/src/tests/mod.rs

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2596,3 +2596,244 @@ fn setup_context_server(
25962596
cx.run_until_parked();
25972597
mcp_tool_calls_rx
25982598
}
2599+
2600+
// Tests for duplicate tool_use IDs (Issue #44211)
2601+
2602+
#[gpui::test]
2603+
async fn test_no_duplicate_tool_use_with_intercalated_content(cx: &mut TestAppContext) {
2604+
let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2605+
let fake_model = model.as_fake();
2606+
2607+
thread.update(cx, |thread, _| thread.add_tool(EchoTool));
2608+
thread
2609+
.update(cx, |thread, cx| {
2610+
thread.send(UserMessageId::new(), ["Test tool"], cx)
2611+
})
2612+
.unwrap();
2613+
cx.run_until_parked();
2614+
2615+
// Send first tool_use event (partial)
2616+
let tool_use_partial = LanguageModelToolUse {
2617+
id: "tool_1".into(),
2618+
name: EchoTool::name().into(),
2619+
raw_input: "{}".into(),
2620+
input: json!({}),
2621+
is_input_complete: false,
2622+
thought_signature: None,
2623+
};
2624+
fake_model
2625+
.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use_partial));
2626+
cx.run_until_parked();
2627+
2628+
// Send intercalated text content
2629+
fake_model.send_last_completion_stream_text_chunk("Thinking about this...");
2630+
cx.run_until_parked();
2631+
2632+
// Send the same tool_use ID again (complete)
2633+
let tool_use_complete = LanguageModelToolUse {
2634+
id: "tool_1".into(),
2635+
name: EchoTool::name().into(),
2636+
raw_input: json!({"text": "hello"}).to_string(),
2637+
input: json!({"text": "hello"}),
2638+
is_input_complete: true,
2639+
thought_signature: None,
2640+
};
2641+
fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2642+
tool_use_complete,
2643+
));
2644+
fake_model
2645+
.send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::ToolUse));
2646+
fake_model.end_last_completion_stream();
2647+
cx.run_until_parked();
2648+
2649+
// Verify: should have only ONE tool_use with id "tool_1"
2650+
thread.read_with(cx, |thread, _| {
2651+
let last_message = thread.last_message().expect("should have a message");
2652+
let agent_message = last_message
2653+
.as_agent_message()
2654+
.expect("should be agent message");
2655+
2656+
let tool_use_count = agent_message
2657+
.content
2658+
.iter()
2659+
.filter(
2660+
|c| matches!(c, AgentMessageContent::ToolUse(t) if t.id.to_string() == "tool_1"),
2661+
)
2662+
.count();
2663+
2664+
assert_eq!(
2665+
tool_use_count, 1,
2666+
"Should have exactly one tool_use with id 'tool_1', found {}. Content: {:?}",
2667+
tool_use_count, agent_message.content
2668+
);
2669+
});
2670+
}
2671+
2672+
#[gpui::test]
2673+
async fn test_no_duplicate_tool_use_ids_in_request(cx: &mut TestAppContext) {
2674+
let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2675+
let fake_model = model.as_fake();
2676+
2677+
thread.update(cx, |thread, _| thread.add_tool(EchoTool));
2678+
thread
2679+
.update(cx, |thread, cx| {
2680+
thread.send(UserMessageId::new(), ["Test"], cx)
2681+
})
2682+
.unwrap();
2683+
cx.run_until_parked();
2684+
2685+
let tool_use = LanguageModelToolUse {
2686+
id: "tool_1".into(),
2687+
name: EchoTool::name().into(),
2688+
raw_input: json!({"text": "test"}).to_string(),
2689+
input: json!({"text": "test"}),
2690+
is_input_complete: true,
2691+
thought_signature: None,
2692+
};
2693+
fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use));
2694+
fake_model
2695+
.send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::ToolUse));
2696+
fake_model.end_last_completion_stream();
2697+
cx.run_until_parked();
2698+
2699+
let completion = fake_model
2700+
.pending_completions()
2701+
.pop()
2702+
.expect("should have pending completion");
2703+
2704+
let mut seen_tool_use_ids = std::collections::HashSet::new();
2705+
for message in &completion.messages {
2706+
for content in &message.content {
2707+
if let MessageContent::ToolUse(tool_use) = content {
2708+
assert!(
2709+
seen_tool_use_ids.insert(tool_use.id.clone()),
2710+
"Duplicate tool_use ID found in request: {:?}",
2711+
tool_use.id
2712+
);
2713+
}
2714+
}
2715+
}
2716+
}
2717+
2718+
#[gpui::test]
2719+
async fn test_no_duplicate_tool_use_after_cancel(cx: &mut TestAppContext) {
2720+
let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2721+
let fake_model = model.as_fake();
2722+
2723+
thread.update(cx, |thread, _| thread.add_tool(InfiniteTool));
2724+
2725+
thread
2726+
.update(cx, |thread, cx| {
2727+
thread.send(UserMessageId::new(), ["Test tool"], cx)
2728+
})
2729+
.unwrap();
2730+
cx.run_until_parked();
2731+
2732+
let tool_use = LanguageModelToolUse {
2733+
id: "tool_1".into(),
2734+
name: InfiniteTool::name().into(),
2735+
raw_input: json!({}).to_string(),
2736+
input: json!({}),
2737+
is_input_complete: true,
2738+
thought_signature: None,
2739+
};
2740+
fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use));
2741+
fake_model
2742+
.send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::ToolUse));
2743+
fake_model.end_last_completion_stream();
2744+
cx.run_until_parked();
2745+
2746+
// Cancel (calls flush_pending_message)
2747+
thread.update(cx, |thread, cx| thread.cancel(cx));
2748+
cx.run_until_parked();
2749+
2750+
// Resume - this will create a new request
2751+
thread
2752+
.update(cx, |thread, cx| thread.resume(cx))
2753+
.expect("resume should succeed");
2754+
cx.run_until_parked();
2755+
2756+
let completion = fake_model
2757+
.pending_completions()
2758+
.pop()
2759+
.expect("should have pending completion after resume");
2760+
2761+
let mut seen_tool_use_ids = std::collections::HashSet::new();
2762+
for message in &completion.messages {
2763+
for content in &message.content {
2764+
if let MessageContent::ToolUse(tool_use) = content {
2765+
assert!(
2766+
seen_tool_use_ids.insert(tool_use.id.clone()),
2767+
"Duplicate tool_use ID found after cancel/resume: {:?}",
2768+
tool_use.id
2769+
);
2770+
}
2771+
}
2772+
}
2773+
}
2774+
2775+
#[gpui::test]
2776+
async fn test_multiple_distinct_tool_uses_work(cx: &mut TestAppContext) {
2777+
let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2778+
let fake_model = model.as_fake();
2779+
2780+
thread.update(cx, |thread, _| thread.add_tool(EchoTool));
2781+
thread
2782+
.update(cx, |thread, cx| {
2783+
thread.send(UserMessageId::new(), ["Test multiple tools"], cx)
2784+
})
2785+
.unwrap();
2786+
cx.run_until_parked();
2787+
2788+
// Send two distinct tool_uses
2789+
for i in 1..=2 {
2790+
let tool_use = LanguageModelToolUse {
2791+
id: format!("tool_{}", i).into(),
2792+
name: EchoTool::name().into(),
2793+
raw_input: json!({"text": format!("test{}", i)}).to_string(),
2794+
input: json!({"text": format!("test{}", i)}),
2795+
is_input_complete: true,
2796+
thought_signature: None,
2797+
};
2798+
fake_model
2799+
.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use));
2800+
}
2801+
fake_model
2802+
.send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::ToolUse));
2803+
fake_model.end_last_completion_stream();
2804+
cx.run_until_parked();
2805+
2806+
thread.read_with(cx, |thread, _| {
2807+
let last_message = thread.last_message().expect("should have a message");
2808+
let agent_message = last_message
2809+
.as_agent_message()
2810+
.expect("should be agent message");
2811+
2812+
let tool_use_ids: Vec<_> = agent_message
2813+
.content
2814+
.iter()
2815+
.filter_map(|c| {
2816+
if let AgentMessageContent::ToolUse(t) = c {
2817+
Some(t.id.to_string())
2818+
} else {
2819+
None
2820+
}
2821+
})
2822+
.collect();
2823+
2824+
assert_eq!(
2825+
tool_use_ids.len(),
2826+
2,
2827+
"Should have exactly 2 tool_uses, found: {:?}",
2828+
tool_use_ids
2829+
);
2830+
assert!(
2831+
tool_use_ids.contains(&"tool_1".to_string()),
2832+
"Should contain tool_1"
2833+
);
2834+
assert!(
2835+
tool_use_ids.contains(&"tool_2".to_string()),
2836+
"Should contain tool_2"
2837+
);
2838+
});
2839+
}

crates/agent/src/thread.rs

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,21 +1515,27 @@ impl Thread {
15151515
kind = tool.kind();
15161516
}
15171517

1518-
// Ensure the last message ends in the current tool use
1518+
// Find and update existing tool_use with same ID, or determine we need to push new
15191519
let last_message = self.pending_message();
1520-
let push_new_tool_use = last_message.content.last_mut().is_none_or(|content| {
1521-
if let AgentMessageContent::ToolUse(last_tool_use) = content {
1522-
if last_tool_use.id == tool_use.id {
1523-
*last_tool_use = tool_use.clone();
1524-
false
1520+
let existing_tool_use = last_message.content.iter_mut().find_map(|content| {
1521+
if let AgentMessageContent::ToolUse(existing) = content {
1522+
if existing.id == tool_use.id {
1523+
Some(existing)
15251524
} else {
1526-
true
1525+
None
15271526
}
15281527
} else {
1529-
true
1528+
None
15301529
}
15311530
});
15321531

1532+
let push_new_tool_use = if let Some(existing) = existing_tool_use {
1533+
*existing = tool_use.clone();
1534+
false
1535+
} else {
1536+
true
1537+
};
1538+
15331539
if push_new_tool_use {
15341540
event_stream.send_tool_call(
15351541
&tool_use.id,

crates/language_models/src/provider/anthropic.rs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,29 @@ pub fn into_anthropic(
538538
if let Some(last_message) = new_messages.last_mut()
539539
&& last_message.role == anthropic_role
540540
{
541-
last_message.content.extend(anthropic_message_content);
541+
// Collect existing tool_use IDs to prevent duplicates
542+
let existing_tool_use_ids: std::collections::HashSet<_> = last_message
543+
.content
544+
.iter()
545+
.filter_map(|c| {
546+
if let anthropic::RequestContent::ToolUse { id, .. } = c {
547+
Some(id.clone())
548+
} else {
549+
None
550+
}
551+
})
552+
.collect();
553+
554+
// Only extend with content that doesn't have duplicate tool_use IDs
555+
let filtered_content = anthropic_message_content.into_iter().filter(|c| {
556+
if let anthropic::RequestContent::ToolUse { id, .. } = c {
557+
!existing_tool_use_ids.contains(id)
558+
} else {
559+
true
560+
}
561+
});
562+
563+
last_message.content.extend(filtered_content);
542564
continue;
543565
}
544566

0 commit comments

Comments
 (0)