Revert "fix max_tokens for reasoning models (#1819)" (#1839)

* Revert "fix max_tokens for reasoning models (#1819)" This reverts commit 1e2e233ff5. * Revert "fix: revert max_completion_tokens to max_tokens (#1741)" This reverts commit cd13eeb7d9. * fix: nvim_version
2025-04-09 16:58:54 +08:00
parent 1fc57ab1ae
commit 04336913b3
6 changed files with 8 additions and 12 deletions
--- a/README.md
+++ b/README.md
@@ -69,7 +69,7 @@ For building binary if you wish to build from source, then `cargo` is required.
      model = "gpt-4o", -- your desired model (or use gpt-4o, etc.)
      timeout = 30000, -- Timeout in milliseconds, increase this for reasoning models
      temperature = 0,
-      max_tokens = 8192, -- Increase this to include reasoning tokens (for reasoning models)
+      max_completion_tokens = 8192, -- Increase this to include reasoning tokens (for reasoning models)
      --reasoning_effort = "medium", -- low|medium|high, only used for reasoning models
    },
  },
--- a/cursor-planning-mode.md
+++ b/cursor-planning-mode.md
@@ -35,7 +35,7 @@ Then enable it in avante.nvim:
            api_key_name = 'GROQ_API_KEY',
            endpoint = 'https://api.groq.com/openai/v1/',
            model = 'llama-3.3-70b-versatile',
-            max_tokens = 32768, -- remember to increase this value, otherwise it will stop generating halfway
+            max_completion_tokens = 32768, -- remember to increase this value, otherwise it will stop generating halfway
        },
    },
    --- ... existing configurations
--- a/lua/avante/config.lua
+++ b/lua/avante/config.lua
@@ -219,7 +219,7 @@ M._defaults = {
    model = "gpt-4o",
    timeout = 30000, -- Timeout in milliseconds, increase this for reasoning models
    temperature = 0,
-    max_tokens = 16384, -- Increase this to include reasoning tokens (for reasoning models)
+    max_completion_tokens = 16384, -- Increase this to include reasoning tokens (for reasoning models)
    reasoning_effort = "medium", -- low|medium|high, only used for reasoning models
  },
  ---@type AvanteSupportedProvider
@@ -239,7 +239,7 @@ M._defaults = {
    api_version = "2024-12-01-preview",
    timeout = 30000, -- Timeout in milliseconds, increase this for reasoning models
    temperature = 0,
-    max_tokens = 20480, -- Increase this to include reasoning tokens (for reasoning models)
+    max_completion_tokens = 20480, -- Increase this to include reasoning tokens (for reasoning models)
    reasoning_effort = "medium", -- low|medium|high, only used for reasoning models
  },
  ---@type AvanteSupportedProvider
--- a/lua/avante/providers/azure.lua
+++ b/lua/avante/providers/azure.lua
@@ -2,7 +2,7 @@
 ---@field deployment string
 ---@field api_version string
 ---@field temperature number
---@field max_tokens number
+---@field max_completion_tokens number
 ---@field reasoning_effort? string
 local Utils = require("avante.utils")
--- a/lua/avante/providers/openai.lua
+++ b/lua/avante/providers/openai.lua
@@ -67,13 +67,8 @@ function M.set_allowed_params(provider_conf, request_body)
  else
    request_body.reasoning_effort = nil
  end
-
+  -- If max_tokens is set in config, unset max_completion_tokens
-  if M.is_reasoning_model(provider_conf.model) then
+  if request_body.max_tokens then request_body.max_completion_tokens = nil end
    if request_body.max_tokens then
      request_body.max_completion_tokens = request_body.max_tokens
      request_body.max_tokens = nil
    end
  end
 end
 function M:parse_messages(opts)
--- a/lua/avante/types.lua
+++ b/lua/avante/types.lua
@@ -217,6 +217,7 @@ vim.g.avante_login = vim.g.avante_login
 ---@field __inherited_from? string
 ---@field temperature? number
 ---@field max_tokens? number
 ---@field max_completion_tokens? number
 ---@field reasoning_effort? string
 ---@field display_name? string
 ---