Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions doc/configuration/adapters.md
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,47 @@ require("codecompanion").setup({
}),
```

## Setup: Gemini
> This is NOT the Gemini-CLI ACP adapter!

The Gemini adapter uses Google's official [openai-compatible](https://ai.google.dev/gemini-api/docs/openai) endpoint, with one difference: the `reasoning_effort` key is replaced by `thinking_budget`, which controls the number of tokens used by the reasoning process.

```lua
require("codecompanion").setup({
adapters = {
http = {
gemini = function()
return require("codecompanion.adapters").extend("gemini", {
schema = {
thinking_budget = {
default = -1,
},
},
})
end,
},
},
strategies = {
chat = {
adapter = "gemini",
},
inline = {
adapter = "gemini",
},
},
}),
```

The value of `thinking_budget` can be the following:

- `-1`: dynamic thinking (default).
- `0`: disable reasoning.
- A valid positive integer (see [Google's documentation](https://ai.google.dev/gemini-api/docs/thinking#set-budget) for supported thinking budgets for each models).

Note that, according to [Gemini documentation](https://ai.google.dev/gemini-api/docs/thinking#summaries), their API endpoints only return thinking _summaries_, not _raw thinking tokens_.
Only the raw tokens are bounded by the `thinking_budget` parameter.
The summary that we can see are not bounded by the budget.

## Setup: OpenRouter with Reasoning Output

```lua
Expand Down
127 changes: 110 additions & 17 deletions lua/codecompanion/adapters/http/gemini.lua
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
local openai = require("codecompanion.adapters.http.openai")

local CONSTANTS = { thinking_start = "<thought>", thinking_end = "</thought>" }

---@param message string?
---@return string?
local function strip_thinking_tags(message)
if message then
local result = message:gsub("^" .. CONSTANTS.thinking_start, ""):gsub("^" .. CONSTANTS.thinking_end, "")
return result
end
end

---@class CodeCompanion.HTTPAdapter.Gemini : CodeCompanion.HTTPAdapter
return {
name = "gemini",
Expand Down Expand Up @@ -51,7 +62,15 @@ return {
return openai.handlers.tokens(self, data)
end,
form_parameters = function(self, params, messages)
return openai.handlers.form_parameters(self, params, messages)
local processed_params = openai.handlers.form_parameters(self, params, messages)
-- https://ai.google.dev/gemini-api/docs/openai#thinking
processed_params.extra_body =
vim.tbl_deep_extend("force", processed_params.extra_body or {}, { google = { thinking_config = {} } })
local thinking_config = processed_params.extra_body.google.thinking_config
thinking_config.include_thoughts = thinking_config.thinking_budget ~= 0
or type(thinking_config.thinking_level) == "string"

return processed_params
end,
form_tools = function(self, tools)
return openai.handlers.form_tools(self, tools)
Expand Down Expand Up @@ -95,8 +114,49 @@ return {
return result
end,
chat_output = function(self, data, tools)
return openai.handlers.chat_output(self, data, tools)
local _data = openai.handlers.chat_output(self, data, tools)
if _data then
if _data.output and _data.output.content and _data.output.content:find("^" .. CONSTANTS.thinking_end) then
-- The first non-thinking delta in a streamed response following the reasoning delta will have the thinking tag.
-- strip it.
_data.output.content = strip_thinking_tags(_data.output.content)
end
end
return _data
end,

parse_message_meta = function(self, data)
-- https://ai.google.dev/gemini-api/docs/openai#thinking
local extra_content = data.extra.extra_content
local has_thinking = extra_content and extra_content.google and extra_content.google.thought

if not has_thinking then
-- this delta is either the actual answer after a reasoning sequence, or with reasoning off.
-- in the former case, the sequence might start with a `</thought>` tag. strip it.
return {
status = data.status,
output = { content = strip_thinking_tags(data.output.content), role = data.output.role },
}
end

if self.opts.stream then
-- the `content` field contains the reasoning summary.
-- put it in the `reasoning` field and erase `content` so that it's not mistaken as the response
local reasoning = strip_thinking_tags(data.output.content)
data.output.reasoning = { content = reasoning }
data.output.content = nil
else
-- when not streaming, the reasoning summary and final answer are sent in one big chunk,
-- with the reasoning wrapped in the `<thought></thought>` tags.
local reasoning =
data.output.content:match(string.format("^%s(.*)%s", CONSTANTS.thinking_start, CONSTANTS.thinking_end))
data.output.reasoning = { content = reasoning }
data.output.content = data.output.content:gsub(".*" .. CONSTANTS.thinking_end, "")
end

return data
end,

tools = {
format_tool_calls = function(self, tools)
return openai.handlers.tools.format_tool_calls(self, tools)
Expand All @@ -106,7 +166,11 @@ return {
end,
},
inline_output = function(self, data, context)
return openai.handlers.inline_output(self, data, context)
local inline_output = openai.handlers.inline_output(self, data, context)
if inline_output then
return { status = inline_output.status, output = inline_output.output:gsub("^<thought>.*</thought>", "") }
end
return nil
end,
on_exit = function(self, data)
return openai.handlers.on_exit(self, data)
Expand All @@ -121,12 +185,24 @@ return {
desc = "The model that will complete your prompt. See https://ai.google.dev/gemini-api/docs/models/gemini#model-variations for additional details and options.",
default = "gemini-2.5-flash",
choices = {
["gemini-3-pro-preview"] = { formatted_name = "Gemini 3 Pro", opts = { can_reason = true, has_vision = true } },
["gemini-2.5-pro"] = { formatted_name = "Gemini 2.5 Pro", opts = { can_reason = true, has_vision = true } },
["gemini-2.5-flash"] = { formatted_name = "Gemini 2.5 Flash", opts = { can_reason = true, has_vision = true } },
["gemini-3-pro-preview"] = {
formatted_name = "Gemini 3 Pro",
opts = { can_reason = true, has_vision = true },
},
["gemini-2.5-pro"] = {
formatted_name = "Gemini 2.5 Pro",
opts = { can_reason = true, has_vision = true },
thinking_budget = { low = 128, high = 32768 },
},
["gemini-2.5-flash"] = {
formatted_name = "Gemini 2.5 Flash",
opts = { can_reason = true, has_vision = true },
thinking_budget = { low = 0, high = 24576 },
},
["gemini-2.5-flash-preview-05-20"] = {
formatted_name = "Gemini 2.5 Flash Preview",
opts = { can_reason = true, has_vision = true },
thinking_budget = { low = 0, high = 24576 },
},
["gemini-2.0-flash"] = { formatted_name = "Gemini 2.0 Flash", opts = { has_vision = true } },
["gemini-2.0-flash-lite"] = { formatted_name = "Gemini 2.0 Flash Lite", opts = { has_vision = true } },
Expand Down Expand Up @@ -171,10 +247,11 @@ return {
end,
},
---@type CodeCompanion.Schema
reasoning_effort = {
thinking_budget = {
-- https://ai.google.dev/gemini-api/docs/thinking#set-budget
order = 5,
mapping = "parameters",
type = "string",
mapping = "parameters.extra_body.google.thinking_config",
type = "integer",
optional = true,
---@param self CodeCompanion.HTTPAdapter
condition = function(self)
Expand All @@ -187,14 +264,30 @@ return {
end
return false
end,
default = "medium",
desc = "Constrains effort on reasoning for reasoning models. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.",
choices = {
"high",
"medium",
"low",
"none",
},
-- for models that supports reasoning, this'll be 'dynamic thinking'
default = nil,
-- TODO: validate requires having `self` in the params.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since different models have different thinking budget, we might need to change validate so that it also accepts self in the parameters (something like validate(self, n) instead of the current validate(n). @olimorris would that be alright?

desc = "The thinkingBudget parameter guides the model on the number of thinking tokens to use when generating a response.",
},
---@type CodeCompanion.Schema
thinking_level = {
-- https://ai.google.dev/gemini-api/docs/thinking#thinking-levels
order = 5,
mapping = "parameters.extra_body.google.thinking_config",
type = "enum",
choices = { "low", "high" },
optional = true,
---@param self CodeCompanion.HTTPAdapter
condition = function(self)
local model = self.schema.model.default
if type(model) == "function" then
model = model()
end
return vim.list_contains({ "gemini-3-pro-preview" }, model)
end,
-- for models that supports reasoning, this'll be 'dynamic thinking'
default = "low",
desc = "The thinkingLevel parameter, recommended for Gemini 3 models and onwards, lets you control reasoning behavior.",
},
},
}
21 changes: 16 additions & 5 deletions lua/codecompanion/strategies/chat/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1216,12 +1216,23 @@ function Chat:done(output, reasoning, tools, meta, opts)

local reasoning_content = nil
if reasoning and not vim.tbl_isempty(reasoning) then
if vim.iter(reasoning):any(function(item)
return item and type(item) ~= "string"
end) then
reasoning_content = adapters.call_handler(self.adapter, "build_reasoning", reasoning)
local build_reasoning_handler = adapters.get_handler(self.adapter, "build_reasoning")
if type(build_reasoning_handler) == "function" then
reasoning_content = build_reasoning_handler(self.adapter, reasoning)
else
reasoning_content = table.concat(reasoning, "")
-- Assume trivial `reasoning` structure: string or `{content: string}`
reasoning_content = {
content = vim
.iter(reasoning)
:map(function(item)
if type(item) == "string" then
return item
elseif type(item) == "table" and item.content then
return item.content
end
end)
:join(""),
}
end
end

Expand Down
25 changes: 25 additions & 0 deletions tests/adapters/http/stubs/gemini_reasoning_no_streaming.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "<thought>This is a dummy thinking summary</thought>Elegant, dynamic.\\n\\nNext question?\\n",
"role": "assistant",
"extra_content": {
"google": {
"thought": true
}
}
}
}
],
"created": 1743460357,
"model": "gemini-2.5-flash",
"object": "chat.completion",
"usage": {
"completion_tokens": 9,
"prompt_tokens": 419,
"total_tokens": 428
}
}
6 changes: 6 additions & 0 deletions tests/adapters/http/stubs/gemini_reasoning_streaming.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
data: {"choices":[{"delta":{"content":"<thought>This is a dummy reasoning segment","role":"assistant", "extra_content":{"google":{"thought":true}}},"index":0}],"created":1743460108,"model":"gemini-2.5-flash","object":"chat.completion.chunk","usage":{"completion_tokens":0,"prompt_tokens":425,"total_tokens":425}}
data: {"choices":[{"delta":{"content":"</thought>Elegant","role":"assistant"},"index":0}],"created":1743460109,"model":"gemini-2.5-flash","object":"chat.completion.chunk","usage":{"completion_tokens":0,"prompt_tokens":425,"total_tokens":425}}
Tokens: 425
data: {"choices":[{"delta":{"content":", dynamic.\n\nNext question?\n","role":"assistant"},"finish_reason":"stop","index":0}],"created":1743460109,"model":"gemini-2.5-flash","object":"chat.completion.chunk","usage":{"completion_tokens":9,"prompt_tokens":419,"total_tokens":428}}
Tokens: 428
data: [DONE]
39 changes: 39 additions & 0 deletions tests/adapters/http/test_gemini.lua
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,29 @@ T["Gemini adapter"]["Streaming"]["can output streamed data into the chat buffer"
h.expect_starts_with("Elegant, dynamic", output)
end

T["Gemini adapter"]["Streaming"]["can output streamed data with reasoning into the chat buffer"] = function()
local output = ""
local reasoning = ""
local lines = vim.fn.readfile("tests/adapters/http/stubs/gemini_reasoning_streaming.txt")
for _, line in ipairs(lines) do
local chat_output = adapter.handlers.chat_output(adapter, line)
if chat_output and chat_output.extra then
chat_output = adapter.handlers.parse_message_meta(adapter, chat_output)
end
if chat_output then
if chat_output.output.content then
output = output .. chat_output.output.content
end
if chat_output.output.reasoning then
reasoning = reasoning .. chat_output.output.reasoning.content
end
end
end

h.expect_starts_with("Elegant, dynamic", output)
h.expect_starts_with("This is a dummy reasoning segment", reasoning)
end

T["Gemini adapter"]["Streaming"]["can process tools"] = function()
local tools = {}
local lines = vim.fn.readfile("tests/adapters/http/stubs/gemini_tools_streaming.txt")
Expand Down Expand Up @@ -198,6 +221,22 @@ T["Gemini adapter"]["No Streaming"]["can output for the chat buffer"] = function
h.expect_starts_with("Elegant, dynamic.", adapter.handlers.chat_output(adapter, json).output.content)
end

T["Gemini adapter"]["No Streaming"]["can output for the chat buffer with reasoning"] = function()
adapter.opts.stream = false
local data = vim.fn.readfile("tests/adapters/http/stubs/gemini_reasoning_no_streaming.txt")
data = table.concat(data, "\n")

-- Match the format of the actual request
local json = { body = data }

local chat_output = adapter.handlers.chat_output(adapter, json)
if chat_output and chat_output.extra then
chat_output = adapter.handlers.parse_message_meta(adapter, chat_output)
end
h.expect_starts_with("Elegant, dynamic.", chat_output.output.content)
h.expect_starts_with("This is a dummy thinking summary", chat_output.output.reasoning.content)
end

T["Gemini adapter"]["No Streaming"]["can process tools"] = function()
local data = vim.fn.readfile("tests/adapters/http/stubs/gemini_tools_no_streaming.txt")
data = table.concat(data, "\n")
Expand Down