feat: fetch tool (#1196)

This commit is contained in:
yetone
2025-02-06 19:13:47 +08:00
committed by GitHub
parent 77e20fd088
commit 1ec12907a2
10 changed files with 1037 additions and 11 deletions

864
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -12,6 +12,7 @@ version = "0.1.0"
avante-tokenizers = { path = "crates/avante-tokenizers" }
avante-templates = { path = "crates/avante-templates" }
avante-repo-map = { path = "crates/avante-repo-map" }
avante-html2md = { path = "crates/avante-html2md" }
minijinja = { version = "2.4.0", features = [
"loader",
"json",

View File

@@ -22,15 +22,17 @@ all: luajit
define make_definitions
ifeq ($(BUILD_FROM_SOURCE),true)
ifeq ($(TARGET_LIBRARY), all)
$1: $(BUILD_DIR)/libAvanteTokenizers-$1.$(EXT) $(BUILD_DIR)/libAvanteTemplates-$1.$(EXT) $(BUILD_DIR)/libAvanteRepoMap-$1.$(EXT)
$1: $(BUILD_DIR)/libAvanteTokenizers-$1.$(EXT) $(BUILD_DIR)/libAvanteTemplates-$1.$(EXT) $(BUILD_DIR)/libAvanteRepoMap-$1.$(EXT) $(BUILD_DIR)/libAvanteHtml2md-$1.$(EXT)
else ifeq ($(TARGET_LIBRARY), tokenizers)
$1: $(BUILD_DIR)/libAvanteTokenizers-$1.$(EXT)
else ifeq ($(TARGET_LIBRARY), templates)
$1: $(BUILD_DIR)/libAvanteTemplates-$1.$(EXT)
else ifeq ($(TARGET_LIBRARY), repo-map)
$1: $(BUILD_DIR)/libAvanteRepoMap-$1.$(EXT)
else ifeq ($(TARGET_LIBRARY), html2md)
$1: $(BUILD_DIR)/libAvanteHtml2md-$1.$(EXT)
else
$$(error TARGET_LIBRARY must be one of all, tokenizers, templates, repo-map)
$$(error TARGET_LIBRARY must be one of all, tokenizers, templates, repo-map, html2md)
endif
else
$1:
@@ -50,11 +52,13 @@ define build_targets
$(BUILD_DIR)/libAvanteTokenizers-$1.$(EXT): $(BUILD_DIR) $1-tokenizers
$(BUILD_DIR)/libAvanteTemplates-$1.$(EXT): $(BUILD_DIR) $1-templates
$(BUILD_DIR)/libAvanteRepoMap-$1.$(EXT): $(BUILD_DIR) $1-repo-map
$(BUILD_DIR)/libAvanteHtml2md-$1.$(EXT): $(BUILD_DIR) $1-html2md
endef
$(foreach lua_version,$(LUA_VERSIONS),$(eval $(call build_package,$(lua_version),tokenizers)))
$(foreach lua_version,$(LUA_VERSIONS),$(eval $(call build_package,$(lua_version),templates)))
$(foreach lua_version,$(LUA_VERSIONS),$(eval $(call build_package,$(lua_version),repo-map)))
$(foreach lua_version,$(LUA_VERSIONS),$(eval $(call build_package,$(lua_version),html2md)))
$(foreach lua_version,$(LUA_VERSIONS),$(eval $(call build_targets,$(lua_version))))
$(BUILD_DIR):

View File

@@ -0,0 +1,25 @@
[lib]
crate-type = ["cdylib"]
[package]
name = "avante-html2md"
edition.workspace = true
rust-version.workspace = true
license.workspace = true
version.workspace = true
[dependencies]
htmd = "0.1.6"
html2md = "0.2.15"
mlua.workspace = true
reqwest = { version = "0.12.12", features = ["blocking"] }
[lints]
workspace = true
[features]
lua51 = ["mlua/lua51"]
lua52 = ["mlua/lua52"]
lua53 = ["mlua/lua53"]
lua54 = ["mlua/lua54"]
luajit = ["mlua/luajit"]

View File

@@ -0,0 +1,81 @@
use htmd::HtmlToMarkdown;
use mlua::prelude::*;
use std::error::Error;
#[derive(Debug)]
enum MyError {
HtmlToMd(String),
Request(String),
}
impl std::fmt::Display for MyError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
MyError::HtmlToMd(e) => write!(f, "HTML to Markdown error: {e}"),
MyError::Request(e) => write!(f, "Request error: {e}"),
}
}
}
impl Error for MyError {}
fn do_html2md(html: &str) -> Result<String, MyError> {
let converter = HtmlToMarkdown::builder()
.skip_tags(vec!["script", "style", "header", "footer"])
.build();
let md = converter
.convert(html)
.map_err(|e| MyError::HtmlToMd(e.to_string()))?;
Ok(md)
}
fn do_fetch_md(url: &str) -> Result<String, MyError> {
let mut headers = reqwest::header::HeaderMap::new();
headers.insert(
reqwest::header::USER_AGENT,
reqwest::header::HeaderValue::from_static("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"),
);
let client = reqwest::blocking::Client::builder()
.default_headers(headers)
.build()
.map_err(|e| MyError::Request(e.to_string()))?;
let response = client
.get(url)
.send()
.map_err(|e| MyError::Request(e.to_string()))?;
let body = response
.text()
.map_err(|e| MyError::Request(e.to_string()))?;
let html = body.trim().to_string();
let md = do_html2md(&html)?;
Ok(md)
}
#[mlua::lua_module]
fn avante_html2md(lua: &Lua) -> LuaResult<LuaTable> {
let exports = lua.create_table()?;
exports.set(
"fetch_md",
lua.create_function(move |_, url: String| -> LuaResult<String> {
do_fetch_md(&url).map_err(|e| mlua::Error::RuntimeError(e.to_string()))
})?,
)?;
exports.set(
"html2md",
lua.create_function(move |_, html: String| -> LuaResult<String> {
do_html2md(&html).map_err(|e| mlua::Error::RuntimeError(e.to_string()))
})?,
)?;
Ok(exports)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fetch_md() {
let md = do_fetch_md("https://github.com/yetone/avante.nvim").unwrap();
println!("{md}");
}
}

27
lua/avante/html2md.lua Normal file
View File

@@ -0,0 +1,27 @@
---@class AvanteHtml2Md
---@field fetch_md fun(url: string): string
local _html2md_lib = nil
local M = {}
---@return AvanteHtml2Md|nil
function M._init_html2md_lib()
if _html2md_lib ~= nil then return _html2md_lib end
local ok, core = pcall(require, "avante_html2md")
if not ok then return nil end
_html2md_lib = core
return _html2md_lib
end
function M.setup() vim.defer_fn(M._init_html2md_lib, 1000) end
function M.fetch_md(url)
local html2md_lib = M._init_html2md_lib()
if not html2md_lib then return "", "Failed to load avante_html2md" end
return html2md_lib.fetch_md(url)
end
return M

View File

@@ -360,6 +360,7 @@ function M.setup(opts)
H.load_path()
require("avante.html2md").setup()
require("avante.repo_map").setup()
require("avante.path").setup()
require("avante.highlights").setup()

View File

@@ -308,6 +308,18 @@ function M.web_search(opts, on_log)
end
end
---@param opts { url: string }
---@param on_log? fun(log: string): nil
---@return string|nil result
---@return string|nil error
function M.fetch(opts, on_log)
if on_log then on_log("url: " .. opts.url) end
local Html2Md = require("avante.html2md")
local res = Html2Md.fetch_md(opts.url)
if res == nil then return nil, "Failed to fetch markdown" end
return res, nil
end
---@class AvanteLLMTool
---@field name string
---@field description string
@@ -715,6 +727,33 @@ M.tools = {
},
},
},
{
name = "fetch",
description = "Fetch markdown from a url",
param = {
type = "table",
fields = {
{
name = "url",
description = "Url to fetch markdown from",
type = "string",
},
},
},
returns = {
{
name = "result",
description = "Result of the fetch",
type = "string",
},
{
name = "error",
description = "Error message if the fetch was not successful",
type = "string",
optional = true,
},
},
},
}
---@param tools AvanteLLMTool[]

View File

@@ -13,7 +13,6 @@ local filetype_map = {
---@field stringify_definitions fun(lang: string, source: string): string
local repo_map_lib = nil
---@class avante.utils.repo_map
local RepoMap = {}
---@return AvanteRepoMap|nil

View File

@@ -7,7 +7,6 @@ local lsp = vim.lsp
---@class avante.utils: LazyUtilCore
---@field tokens avante.utils.tokens
---@field root avante.utils.root
---@field repo_map avante.utils.repo_map
---@field file avante.utils.file
local M = {}