diff --git a/README.md b/README.md
index 8a50cd4..5f40e28 100644
--- a/README.md
+++ b/README.md
@@ -69,7 +69,7 @@ For building binary if you wish to build from source, then `cargo` is required.
       model = "gpt-4o", -- your desired model (or use gpt-4o, etc.)
       timeout = 30000, -- Timeout in milliseconds, increase this for reasoning models
       temperature = 0,
-      max_tokens = 8192, -- Increase this to include reasoning tokens (for reasoning models)
+      max_completion_tokens = 8192, -- Increase this to include reasoning tokens (for reasoning models)
       --reasoning_effort = "medium", -- low|medium|high, only used for reasoning models
     },
   },
diff --git a/cursor-planning-mode.md b/cursor-planning-mode.md
index 4a7d99f..a78d3d9 100644
--- a/cursor-planning-mode.md
+++ b/cursor-planning-mode.md
@@ -35,7 +35,7 @@ Then enable it in avante.nvim:
             api_key_name = 'GROQ_API_KEY',
             endpoint = 'https://api.groq.com/openai/v1/',
             model = 'llama-3.3-70b-versatile',
-            max_tokens = 32768, -- remember to increase this value, otherwise it will stop generating halfway
+            max_completion_tokens = 32768, -- remember to increase this value, otherwise it will stop generating halfway
         },
     },
     --- ... existing configurations
diff --git a/lua/avante/config.lua b/lua/avante/config.lua
index 0123ac0..7719188 100644
--- a/lua/avante/config.lua
+++ b/lua/avante/config.lua
@@ -219,7 +219,7 @@ M._defaults = {
     model = "gpt-4o",
     timeout = 30000, -- Timeout in milliseconds, increase this for reasoning models
     temperature = 0,
-    max_tokens = 16384, -- Increase this to include reasoning tokens (for reasoning models)
+    max_completion_tokens = 16384, -- Increase this to include reasoning tokens (for reasoning models)
     reasoning_effort = "medium", -- low|medium|high, only used for reasoning models
   },
   ---@type AvanteSupportedProvider
@@ -239,7 +239,7 @@ M._defaults = {
     api_version = "2024-12-01-preview",
     timeout = 30000, -- Timeout in milliseconds, increase this for reasoning models
     temperature = 0,
-    max_tokens = 20480, -- Increase this to include reasoning tokens (for reasoning models)
+    max_completion_tokens = 20480, -- Increase this to include reasoning tokens (for reasoning models)
     reasoning_effort = "medium", -- low|medium|high, only used for reasoning models
   },
   ---@type AvanteSupportedProvider
diff --git a/lua/avante/providers/azure.lua b/lua/avante/providers/azure.lua
index bf0de9c..b621f75 100644
--- a/lua/avante/providers/azure.lua
+++ b/lua/avante/providers/azure.lua
@@ -2,7 +2,7 @@
 ---@field deployment string
 ---@field api_version string
 ---@field temperature number
----@field max_tokens number
+---@field max_completion_tokens number
 ---@field reasoning_effort? string
 
 local Utils = require("avante.utils")
diff --git a/lua/avante/providers/openai.lua b/lua/avante/providers/openai.lua
index 55571f9..9b9bbd5 100644
--- a/lua/avante/providers/openai.lua
+++ b/lua/avante/providers/openai.lua
@@ -67,13 +67,8 @@ function M.set_allowed_params(provider_conf, request_body)
   else
     request_body.reasoning_effort = nil
   end
-
-  if M.is_reasoning_model(provider_conf.model) then
-    if request_body.max_tokens then
-      request_body.max_completion_tokens = request_body.max_tokens
-      request_body.max_tokens = nil
-    end
-  end
+  -- If max_tokens is set in config, unset max_completion_tokens
+  if request_body.max_tokens then request_body.max_completion_tokens = nil end
 end
 
 function M:parse_messages(opts)
diff --git a/lua/avante/types.lua b/lua/avante/types.lua
index 911e262..49f74ac 100644
--- a/lua/avante/types.lua
+++ b/lua/avante/types.lua
@@ -217,6 +217,7 @@ vim.g.avante_login = vim.g.avante_login
 ---@field __inherited_from? string
 ---@field temperature? number
 ---@field max_tokens? number
+---@field max_completion_tokens? number
 ---@field reasoning_effort? string
 ---@field display_name? string
 ---