fix(agent): prevent duplicate tool_use IDs in API requests

florian-trehaut-hillcode · florian-trehaut-hillcode · commit adef40e94161 · 2025-12-06T08:23:11.000+07:00
When streaming tool_use events with intercalated content (text/thinking), the deduplication logic only checked the last content item, allowing duplicates to slip through. This caused Anthropic API error 400: 'tool_use ids must be unique'. Changes: - thread.rs: Search all content items for existing tool_use with same ID instead of only checking the last item - anthropic.rs: Filter duplicate tool_use IDs when merging consecutive assistant messages Fixes #44211
diff --git a/crates/agent/src/tests/mod.rs b/crates/agent/src/tests/mod.rs
@@ -2596,3 +2596,244 @@ fn setup_context_server(
     cx.run_until_parked();
     mcp_tool_calls_rx
 }
+
+// Tests for duplicate tool_use IDs (Issue #44211)
+
+#[gpui::test]
+async fn test_no_duplicate_tool_use_with_intercalated_content(cx: &mut TestAppContext) {
+    let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
+    let fake_model = model.as_fake();
+
+    thread.update(cx, |thread, _| thread.add_tool(EchoTool));
+    thread
+        .update(cx, |thread, cx| {
+            thread.send(UserMessageId::new(), ["Test tool"], cx)
+        })
+        .unwrap();
+    cx.run_until_parked();
+
+    // Send first tool_use event (partial)
+    let tool_use_partial = LanguageModelToolUse {
+        id: "tool_1".into(),
+        name: EchoTool::name().into(),
+        raw_input: "{}".into(),
+        input: json!({}),
+        is_input_complete: false,
+        thought_signature: None,
+    };
+    fake_model
+        .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use_partial));
+    cx.run_until_parked();
+
+    // Send intercalated text content
+    fake_model.send_last_completion_stream_text_chunk("Thinking about this...");
+    cx.run_until_parked();
+
+    // Send the same tool_use ID again (complete)
+    let tool_use_complete = LanguageModelToolUse {
+        id: "tool_1".into(),
+        name: EchoTool::name().into(),
+        raw_input: json!({"text": "hello"}).to_string(),
+        input: json!({"text": "hello"}),
+        is_input_complete: true,
+        thought_signature: None,
+    };
+    fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
+        tool_use_complete,
+    ));
+    fake_model
+        .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::ToolUse));
+    fake_model.end_last_completion_stream();
+    cx.run_until_parked();
+
+    // Verify: should have only ONE tool_use with id "tool_1"
+    thread.read_with(cx, |thread, _| {
+        let last_message = thread.last_message().expect("should have a message");
+        let agent_message = last_message
+            .as_agent_message()
+            .expect("should be agent message");
+
+        let tool_use_count = agent_message
+            .content
+            .iter()
+            .filter(
+                |c| matches!(c, AgentMessageContent::ToolUse(t) if t.id.to_string() == "tool_1"),
+            )
+            .count();
+
+        assert_eq!(
+            tool_use_count, 1,
+            "Should have exactly one tool_use with id 'tool_1', found {}. Content: {:?}",
+            tool_use_count, agent_message.content
+        );
+    });
+}
+
+#[gpui::test]
+async fn test_no_duplicate_tool_use_ids_in_request(cx: &mut TestAppContext) {
+    let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
+    let fake_model = model.as_fake();
+
+    thread.update(cx, |thread, _| thread.add_tool(EchoTool));
+    thread
+        .update(cx, |thread, cx| {
+            thread.send(UserMessageId::new(), ["Test"], cx)
+        })
+        .unwrap();
+    cx.run_until_parked();
+
+    let tool_use = LanguageModelToolUse {
+        id: "tool_1".into(),
+        name: EchoTool::name().into(),
+        raw_input: json!({"text": "test"}).to_string(),
+        input: json!({"text": "test"}),
+        is_input_complete: true,
+        thought_signature: None,
+    };
+    fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use));
+    fake_model
+        .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::ToolUse));
+    fake_model.end_last_completion_stream();
+    cx.run_until_parked();
+
+    let completion = fake_model
+        .pending_completions()
+        .pop()
+        .expect("should have pending completion");
+
+    let mut seen_tool_use_ids = std::collections::HashSet::new();
+    for message in &completion.messages {
+        for content in &message.content {
+            if let MessageContent::ToolUse(tool_use) = content {
+                assert!(
+                    seen_tool_use_ids.insert(tool_use.id.clone()),
+                    "Duplicate tool_use ID found in request: {:?}",
+                    tool_use.id
+                );
+            }
+        }
+    }
+}
+
+#[gpui::test]
+async fn test_no_duplicate_tool_use_after_cancel(cx: &mut TestAppContext) {
+    let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
+    let fake_model = model.as_fake();
+
+    thread.update(cx, |thread, _| thread.add_tool(InfiniteTool));
+
+    thread
+        .update(cx, |thread, cx| {
+            thread.send(UserMessageId::new(), ["Test tool"], cx)
+        })
+        .unwrap();
+    cx.run_until_parked();
+
+    let tool_use = LanguageModelToolUse {
+        id: "tool_1".into(),
+        name: InfiniteTool::name().into(),
+        raw_input: json!({}).to_string(),
+        input: json!({}),
+        is_input_complete: true,
+        thought_signature: None,
+    };
+    fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use));
+    fake_model
+        .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::ToolUse));
+    fake_model.end_last_completion_stream();
+    cx.run_until_parked();
+
+    // Cancel (calls flush_pending_message)
+    thread.update(cx, |thread, cx| thread.cancel(cx));
+    cx.run_until_parked();
+
+    // Resume - this will create a new request
+    thread
+        .update(cx, |thread, cx| thread.resume(cx))
+        .expect("resume should succeed");
+    cx.run_until_parked();
+
+    let completion = fake_model
+        .pending_completions()
+        .pop()
+        .expect("should have pending completion after resume");
+
+    let mut seen_tool_use_ids = std::collections::HashSet::new();
+    for message in &completion.messages {
+        for content in &message.content {
+            if let MessageContent::ToolUse(tool_use) = content {
+                assert!(
+                    seen_tool_use_ids.insert(tool_use.id.clone()),
+                    "Duplicate tool_use ID found after cancel/resume: {:?}",
+                    tool_use.id
+                );
+            }
+        }
+    }
+}
+
+#[gpui::test]
+async fn test_multiple_distinct_tool_uses_work(cx: &mut TestAppContext) {
+    let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
+    let fake_model = model.as_fake();
+
+    thread.update(cx, |thread, _| thread.add_tool(EchoTool));
+    thread
+        .update(cx, |thread, cx| {
+            thread.send(UserMessageId::new(), ["Test multiple tools"], cx)
+        })
+        .unwrap();
+    cx.run_until_parked();
+
+    // Send two distinct tool_uses
+    for i in 1..=2 {
+        let tool_use = LanguageModelToolUse {
+            id: format!("tool_{}", i).into(),
+            name: EchoTool::name().into(),
+            raw_input: json!({"text": format!("test{}", i)}).to_string(),
+            input: json!({"text": format!("test{}", i)}),
+            is_input_complete: true,
+            thought_signature: None,
+        };
+        fake_model
+            .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use));
+    }
+    fake_model
+        .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::ToolUse));
+    fake_model.end_last_completion_stream();
+    cx.run_until_parked();
+
+    thread.read_with(cx, |thread, _| {
+        let last_message = thread.last_message().expect("should have a message");
+        let agent_message = last_message
+            .as_agent_message()
+            .expect("should be agent message");
+
+        let tool_use_ids: Vec<_> = agent_message
+            .content
+            .iter()
+            .filter_map(|c| {
+                if let AgentMessageContent::ToolUse(t) = c {
+                    Some(t.id.to_string())
+                } else {
+                    None
+                }
+            })
+            .collect();
+
+        assert_eq!(
+            tool_use_ids.len(),
+            2,
+            "Should have exactly 2 tool_uses, found: {:?}",
+            tool_use_ids
+        );
+        assert!(
+            tool_use_ids.contains(&"tool_1".to_string()),
+            "Should contain tool_1"
+        );
+        assert!(
+            tool_use_ids.contains(&"tool_2".to_string()),
+            "Should contain tool_2"
+        );
+    });
+}
diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs
@@ -1515,21 +1515,27 @@ impl Thread {
             kind = tool.kind();
         }
 
-        // Ensure the last message ends in the current tool use
+        // Find and update existing tool_use with same ID, or determine we need to push new
         let last_message = self.pending_message();
-        let push_new_tool_use = last_message.content.last_mut().is_none_or(|content| {
-            if let AgentMessageContent::ToolUse(last_tool_use) = content {
-                if last_tool_use.id == tool_use.id {
-                    *last_tool_use = tool_use.clone();
-                    false
+        let existing_tool_use = last_message.content.iter_mut().find_map(|content| {
+            if let AgentMessageContent::ToolUse(existing) = content {
+                if existing.id == tool_use.id {
+                    Some(existing)
                 } else {
-                    true
+                    None
                 }
             } else {
-                true
+                None
             }
         });
 
+        let push_new_tool_use = if let Some(existing) = existing_tool_use {
+            *existing = tool_use.clone();
+            false
+        } else {
+            true
+        };
+
         if push_new_tool_use {
             event_stream.send_tool_call(
                 &tool_use.id,
diff --git a/crates/language_models/src/provider/anthropic.rs b/crates/language_models/src/provider/anthropic.rs
@@ -538,7 +538,29 @@ pub fn into_anthropic(
                 if let Some(last_message) = new_messages.last_mut()
                     && last_message.role == anthropic_role
                 {
-                    last_message.content.extend(anthropic_message_content);
+                    // Collect existing tool_use IDs to prevent duplicates
+                    let existing_tool_use_ids: std::collections::HashSet<_> = last_message
+                        .content
+                        .iter()
+                        .filter_map(|c| {
+                            if let anthropic::RequestContent::ToolUse { id, .. } = c {
+                                Some(id.clone())
+                            } else {
+                                None
+                            }
+                        })
+                        .collect();
+
+                    // Only extend with content that doesn't have duplicate tool_use IDs
+                    let filtered_content = anthropic_message_content.into_iter().filter(|c| {
+                        if let anthropic::RequestContent::ToolUse { id, .. } = c {
+                            !existing_tool_use_ids.contains(id)
+                        } else {
+                            true
+                        }
+                    });
+
+                    last_message.content.extend(filtered_content);
                     continue;
                 }