fix: use tree-sitter-markdown to extract code snippets (#1315)
Co-authored-by: yetone <yetoneful@gmail.com>
This commit is contained in:
@@ -73,6 +73,7 @@ For building binary if you wish to build from source, then `cargo` is required.
|
|||||||
build = "make",
|
build = "make",
|
||||||
-- build = "powershell -ExecutionPolicy Bypass -File Build.ps1 -BuildFromSource false" -- for windows
|
-- build = "powershell -ExecutionPolicy Bypass -File Build.ps1 -BuildFromSource false" -- for windows
|
||||||
dependencies = {
|
dependencies = {
|
||||||
|
"nvim-treesitter/nvim-treesitter",
|
||||||
"stevearc/dressing.nvim",
|
"stevearc/dressing.nvim",
|
||||||
"nvim-lua/plenary.nvim",
|
"nvim-lua/plenary.nvim",
|
||||||
"MunifTanjim/nui.nvim",
|
"MunifTanjim/nui.nvim",
|
||||||
@@ -121,6 +122,7 @@ For building binary if you wish to build from source, then `cargo` is required.
|
|||||||
```vim
|
```vim
|
||||||
|
|
||||||
" Deps
|
" Deps
|
||||||
|
Plug 'nvim-treesitter/nvim-treesitter'
|
||||||
Plug 'stevearc/dressing.nvim'
|
Plug 'stevearc/dressing.nvim'
|
||||||
Plug 'nvim-lua/plenary.nvim'
|
Plug 'nvim-lua/plenary.nvim'
|
||||||
Plug 'MunifTanjim/nui.nvim'
|
Plug 'MunifTanjim/nui.nvim'
|
||||||
@@ -153,6 +155,7 @@ add({
|
|||||||
source = 'yetone/avante.nvim',
|
source = 'yetone/avante.nvim',
|
||||||
monitor = 'main',
|
monitor = 'main',
|
||||||
depends = {
|
depends = {
|
||||||
|
'nvim-treesitter/nvim-treesitter',
|
||||||
'stevearc/dressing.nvim',
|
'stevearc/dressing.nvim',
|
||||||
'nvim-lua/plenary.nvim',
|
'nvim-lua/plenary.nvim',
|
||||||
'MunifTanjim/nui.nvim',
|
'MunifTanjim/nui.nvim',
|
||||||
@@ -184,6 +187,7 @@ end)
|
|||||||
```vim
|
```vim
|
||||||
|
|
||||||
-- Required plugins
|
-- Required plugins
|
||||||
|
use 'nvim-treesitter/nvim-treesitter'
|
||||||
use 'stevearc/dressing.nvim'
|
use 'stevearc/dressing.nvim'
|
||||||
use 'nvim-lua/plenary.nvim'
|
use 'nvim-lua/plenary.nvim'
|
||||||
use 'MunifTanjim/nui.nvim'
|
use 'MunifTanjim/nui.nvim'
|
||||||
|
|||||||
@@ -548,63 +548,78 @@ end
|
|||||||
---@field start_line_in_response_buf integer
|
---@field start_line_in_response_buf integer
|
||||||
---@field end_line_in_response_buf integer
|
---@field end_line_in_response_buf integer
|
||||||
---@field filepath string
|
---@field filepath string
|
||||||
---
|
|
||||||
|
---@param source string|integer
|
||||||
|
---@return TSNode[]
|
||||||
|
local function tree_sitter_markdown_parse_code_blocks(source)
|
||||||
|
local query = require("vim.treesitter.query")
|
||||||
|
local parser
|
||||||
|
if type(source) == "string" then
|
||||||
|
parser = vim.treesitter.get_string_parser(source, "markdown")
|
||||||
|
else
|
||||||
|
parser = vim.treesitter.get_parser(source, "markdown")
|
||||||
|
end
|
||||||
|
local tree = parser:parse()[1]
|
||||||
|
local root = tree:root()
|
||||||
|
local code_block_query = query.parse(
|
||||||
|
"markdown",
|
||||||
|
[[ (fenced_code_block
|
||||||
|
(info_string
|
||||||
|
(language) @language)?
|
||||||
|
(code_fence_content) @code) ]]
|
||||||
|
)
|
||||||
|
local nodes = {}
|
||||||
|
for _, node in code_block_query:iter_captures(root, source) do
|
||||||
|
table.insert(nodes, node)
|
||||||
|
end
|
||||||
|
return nodes
|
||||||
|
end
|
||||||
|
|
||||||
---@param response_content string
|
---@param response_content string
|
||||||
---@return table<string, AvanteCodeSnippet[]>
|
---@return table<string, AvanteCodeSnippet[]>
|
||||||
local function extract_cursor_planning_code_snippets_map(response_content, current_filepath, current_filetype)
|
local function extract_cursor_planning_code_snippets_map(response_content, current_filepath, current_filetype)
|
||||||
local snippets = {}
|
local snippets = {}
|
||||||
local current_snippet = {}
|
|
||||||
local in_code_block = false
|
|
||||||
local lang, filepath, start_line_in_response_buf
|
|
||||||
|
|
||||||
local lines = vim.split(response_content, "\n")
|
local lines = vim.split(response_content, "\n")
|
||||||
|
local cumulated_content = ""
|
||||||
|
|
||||||
local idx = 1
|
-- use tree-sitter-markdown to parse all code blocks in response_content
|
||||||
local line_count = #lines
|
local lang = "unknown"
|
||||||
|
for _, node in ipairs(tree_sitter_markdown_parse_code_blocks(response_content)) do
|
||||||
while idx <= line_count do
|
if node:type() == "language" then
|
||||||
local line = lines[idx]
|
lang = vim.treesitter.get_node_text(node, response_content)
|
||||||
if line:match("^%s*```") then
|
lang = vim.split(lang, ":")[1]
|
||||||
if in_code_block then
|
elseif node:type() == "code_fence_content" then
|
||||||
in_code_block = false
|
local start_line, _ = node:start()
|
||||||
if filepath == nil or filepath == "" then
|
local end_line, _ = node:end_()
|
||||||
if lang == current_filetype then
|
local filepath, skip_next_line = obtain_filepath_from_codeblock(lines, start_line)
|
||||||
filepath = current_filepath
|
if filepath == nil or filepath == "" then
|
||||||
else
|
if lang == current_filetype then
|
||||||
Utils.warn(
|
filepath = current_filepath
|
||||||
string.format(
|
else
|
||||||
"Failed to parse filepath from code block, and current_filetype `%s` is not the same as the filetype `%s` of the current code block, so ignore this code block",
|
Utils.warn(
|
||||||
current_filetype,
|
string.format(
|
||||||
lang
|
"Failed to parse filepath from code block, and current_filetype `%s` is not the same as the filetype `%s` of the current code block, so ignore this code block",
|
||||||
)
|
current_filetype,
|
||||||
|
lang
|
||||||
)
|
)
|
||||||
goto continue
|
)
|
||||||
end
|
lang = "unknown"
|
||||||
end
|
goto continue
|
||||||
table.insert(snippets, {
|
|
||||||
range = { 0, 0 },
|
|
||||||
content = table.concat(current_snippet, "\n"),
|
|
||||||
lang = lang,
|
|
||||||
filepath = filepath,
|
|
||||||
start_line_in_response_buf = start_line_in_response_buf,
|
|
||||||
end_line_in_response_buf = idx,
|
|
||||||
})
|
|
||||||
else
|
|
||||||
in_code_block = true
|
|
||||||
start_line_in_response_buf = idx
|
|
||||||
local lang_ = line:match("^%s*```(%w+)")
|
|
||||||
lang = lang_ or "unknown"
|
|
||||||
local filepath_, skip_next_line = obtain_filepath_from_codeblock(lines, idx)
|
|
||||||
if filepath_ then
|
|
||||||
filepath = filepath_
|
|
||||||
if skip_next_line then idx = idx + 1 end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
elseif in_code_block then
|
if skip_next_line then start_line = start_line + 1 end
|
||||||
table.insert(current_snippet, line)
|
local this_content = table.concat(vim.list_slice(lines, start_line + 1, end_line), "\n")
|
||||||
|
cumulated_content = cumulated_content .. "\n" .. this_content
|
||||||
|
table.insert(snippets, {
|
||||||
|
range = { 0, 0 },
|
||||||
|
content = cumulated_content,
|
||||||
|
lang = lang,
|
||||||
|
filepath = filepath,
|
||||||
|
start_line_in_response_buf = start_line,
|
||||||
|
end_line_in_response_buf = end_line + 1,
|
||||||
|
})
|
||||||
end
|
end
|
||||||
::continue::
|
::continue::
|
||||||
idx = idx + 1
|
|
||||||
end
|
end
|
||||||
|
|
||||||
local snippets_map = {}
|
local snippets_map = {}
|
||||||
@@ -620,62 +635,61 @@ end
|
|||||||
---@return table<string, AvanteCodeSnippet[]>
|
---@return table<string, AvanteCodeSnippet[]>
|
||||||
local function extract_code_snippets_map(response_content)
|
local function extract_code_snippets_map(response_content)
|
||||||
local snippets = {}
|
local snippets = {}
|
||||||
local current_snippet = {}
|
|
||||||
local in_code_block = false
|
|
||||||
local lang, start_line, end_line, start_line_in_response_buf
|
|
||||||
local explanation = ""
|
|
||||||
|
|
||||||
local lines = vim.split(response_content, "\n")
|
local lines = vim.split(response_content, "\n")
|
||||||
|
|
||||||
for idx, line in ipairs(lines) do
|
-- use tree-sitter-markdown to parse all code blocks in response_content
|
||||||
local _, start_line_str, end_line_str =
|
local lang = "text"
|
||||||
line:match("^%s*(%d*)[%.%)%s]*[Aa]?n?d?%s*[Rr]eplace%s+[Ll]ines:?%s*(%d+)%-(%d+)")
|
local explanation_start_line = 0
|
||||||
if start_line_str ~= nil and end_line_str ~= nil then
|
for _, node in ipairs(tree_sitter_markdown_parse_code_blocks(response_content)) do
|
||||||
start_line = tonumber(start_line_str)
|
local start_line_in_response_buf, _ = node:start()
|
||||||
end_line = tonumber(end_line_str)
|
local end_line_in_response_buf, _ = node:end_()
|
||||||
else
|
if node:type() == "language" then
|
||||||
_, start_line_str = line:match("^%s*(%d*)[%.%)%s]*[Aa]?n?d?%s*[Rr]eplace%s+[Ll]ine:?%s*(%d+)")
|
lang = vim.treesitter.get_node_text(node, response_content)
|
||||||
if start_line_str ~= nil then
|
elseif node:type() == "code_fence_content" and start_line_in_response_buf > 1 then
|
||||||
|
local number_line = lines[start_line_in_response_buf - 1]
|
||||||
|
local start_line, end_line
|
||||||
|
|
||||||
|
local _, start_line_str, end_line_str =
|
||||||
|
number_line:match("^%s*(%d*)[%.%)%s]*[Aa]?n?d?%s*[Rr]eplace%s+[Ll]ines:?%s*(%d+)%-(%d+)")
|
||||||
|
if start_line_str ~= nil and end_line_str ~= nil then
|
||||||
start_line = tonumber(start_line_str)
|
start_line = tonumber(start_line_str)
|
||||||
end_line = tonumber(start_line_str)
|
end_line = tonumber(end_line_str)
|
||||||
else
|
else
|
||||||
start_line_str = line:match("[Aa]fter%s+[Ll]ine:?%s*(%d+)")
|
_, start_line_str = number_line:match("^%s*(%d*)[%.%)%s]*[Aa]?n?d?%s*[Rr]eplace%s+[Ll]ine:?%s*(%d+)")
|
||||||
if start_line_str ~= nil then
|
if start_line_str ~= nil then
|
||||||
start_line = tonumber(start_line_str) + 1
|
start_line = tonumber(start_line_str)
|
||||||
end_line = tonumber(start_line_str) + 1
|
end_line = tonumber(start_line_str)
|
||||||
|
else
|
||||||
|
start_line_str = number_line:match("[Aa]fter%s+[Ll]ine:?%s*(%d+)")
|
||||||
|
if start_line_str ~= nil then
|
||||||
|
start_line = tonumber(start_line_str) + 1
|
||||||
|
end_line = tonumber(start_line_str) + 1
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
if line:match("^%s*```") then
|
if start_line ~= nil and end_line ~= nil then
|
||||||
if in_code_block then
|
local filepath = lines[start_line_in_response_buf - 2]
|
||||||
if start_line ~= nil and end_line ~= nil then
|
if filepath:match("^[Ff]ilepath:") then filepath = filepath:match("^[Ff]ilepath:%s*(.+)") end
|
||||||
local filepath = lines[start_line_in_response_buf - 2]
|
local content = vim.treesitter.get_node_text(node, response_content)
|
||||||
if filepath:match("^[Ff]ilepath:") then filepath = filepath:match("^[Ff]ilepath:%s*(.+)") end
|
local explanation = ""
|
||||||
local snippet = {
|
if start_line_in_response_buf > explanation_start_line + 2 then
|
||||||
range = { start_line, end_line },
|
explanation =
|
||||||
content = table.concat(current_snippet, "\n"),
|
table.concat(vim.list_slice(lines, explanation_start_line, start_line_in_response_buf - 3), "\n")
|
||||||
lang = lang,
|
|
||||||
explanation = explanation,
|
|
||||||
start_line_in_response_buf = start_line_in_response_buf,
|
|
||||||
end_line_in_response_buf = idx,
|
|
||||||
filepath = filepath,
|
|
||||||
}
|
|
||||||
table.insert(snippets, snippet)
|
|
||||||
end
|
end
|
||||||
current_snippet = {}
|
local snippet = {
|
||||||
start_line, end_line = nil, nil
|
range = { start_line, end_line },
|
||||||
explanation = ""
|
content = content,
|
||||||
in_code_block = false
|
lang = lang,
|
||||||
else
|
explanation = explanation,
|
||||||
lang = line:match("^%s*```(%w+)")
|
start_line_in_response_buf = start_line_in_response_buf,
|
||||||
if not lang or lang == "" then lang = "text" end
|
end_line_in_response_buf = end_line_in_response_buf + 1,
|
||||||
in_code_block = true
|
filepath = filepath,
|
||||||
start_line_in_response_buf = idx
|
}
|
||||||
|
table.insert(snippets, snippet)
|
||||||
end
|
end
|
||||||
elseif in_code_block then
|
lang = "text"
|
||||||
table.insert(current_snippet, line)
|
explanation_start_line = end_line_in_response_buf + 2
|
||||||
else
|
|
||||||
explanation = explanation .. line .. "\n"
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -843,35 +857,24 @@ end
|
|||||||
---@return AvanteCodeblock[]
|
---@return AvanteCodeblock[]
|
||||||
local function parse_codeblocks(buf, current_filepath, current_filetype)
|
local function parse_codeblocks(buf, current_filepath, current_filetype)
|
||||||
local codeblocks = {}
|
local codeblocks = {}
|
||||||
local in_codeblock = false
|
|
||||||
local start_line = nil
|
|
||||||
local lang = nil
|
|
||||||
|
|
||||||
local lines = Utils.get_buf_lines(0, -1, buf)
|
local lines = Utils.get_buf_lines(0, -1, buf)
|
||||||
for i, line in ipairs(lines) do
|
local lang, valid
|
||||||
if line:match("^%s*```") then
|
for _, node in ipairs(tree_sitter_markdown_parse_code_blocks(buf)) do
|
||||||
-- parse language
|
if node:type() == "language" then
|
||||||
local lang_ = line:match("^%s*```(%w+)")
|
lang = vim.treesitter.get_node_text(node, buf)
|
||||||
if in_codeblock and not lang_ then
|
elseif node:type() == "code_fence_content" then
|
||||||
table.insert(codeblocks, { start_line = start_line, end_line = i, lang = lang })
|
local start_line, _ = node:start()
|
||||||
in_codeblock = false
|
local end_line, _ = node:end_()
|
||||||
elseif lang_ then
|
if Config.behaviour.enable_cursor_planning_mode then
|
||||||
if Config.behaviour.enable_cursor_planning_mode then
|
local filepath = obtain_filepath_from_codeblock(lines, start_line)
|
||||||
local filepath = obtain_filepath_from_codeblock(lines, i)
|
if not filepath and lang == current_filetype then filepath = current_filepath end
|
||||||
if not filepath and lang_ == current_filetype then filepath = current_filepath end
|
if filepath then valid = true end
|
||||||
if filepath then
|
else
|
||||||
lang = lang_
|
if lines[start_line - 1]:match("^%s*(%d*)[%.%)%s]*[Aa]?n?d?%s*[Rr]eplace%s+[Ll]ines:?%s*(%d+)%-(%d+)") then
|
||||||
start_line = i
|
valid = true
|
||||||
in_codeblock = true
|
|
||||||
end
|
|
||||||
else
|
|
||||||
if lines[i - 1]:match("^%s*(%d*)[%.%)%s]*[Aa]?n?d?%s*[Rr]eplace%s+[Ll]ines:?%s*(%d+)%-(%d+)") then
|
|
||||||
lang = lang_
|
|
||||||
start_line = i
|
|
||||||
in_codeblock = true
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
if valid then table.insert(codeblocks, { start_line = start_line, end_line = end_line + 1, lang = lang }) end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user