feat(tokenizers): support parsing from public URL (#765)
This commit is contained in:
97
Cargo.lock
generated
97
Cargo.lock
generated
@@ -64,9 +64,13 @@ dependencies = [
|
||||
name = "avante-tokenizers"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"dirs",
|
||||
"hf-hub",
|
||||
"mlua",
|
||||
"regex",
|
||||
"tiktoken-rs",
|
||||
"tokenizers",
|
||||
"ureq",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -343,16 +347,6 @@ dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fancy-regex"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05"
|
||||
dependencies = [
|
||||
"bit-set",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fancy-regex"
|
||||
version = "0.13.0"
|
||||
@@ -585,9 +579,9 @@ checksum = "38d1115007560874e373613744c6fba374c17688327a71c1476d1a5954cc857b"
|
||||
|
||||
[[package]]
|
||||
name = "minijinja"
|
||||
version = "2.2.0"
|
||||
version = "2.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d7d3e3a3eece1fa4618237ad41e1de855ced47eab705cec1c9a920e1d1c5aad"
|
||||
checksum = "c9ca8daf4b0b4029777f1bc6e1aedd1aec7b74c276a43bc6f620a8e1a1c0a90e"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memo-map",
|
||||
@@ -616,10 +610,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mlua"
|
||||
version = "0.10.0-beta.1"
|
||||
source = "git+https://github.com/mlua-rs/mlua.git?branch=main#1634c43f0afaf7a71dc555cb6b3624250e5ff209"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0f6ddbd668297c46be4bdea6c599dcc1f001a129586272d53170b7ac0a62961e"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"either",
|
||||
"erased-serde",
|
||||
"mlua-sys",
|
||||
"mlua_derive",
|
||||
@@ -632,8 +628,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mlua-sys"
|
||||
version = "0.6.2"
|
||||
source = "git+https://github.com/mlua-rs/mlua.git?branch=main#1634c43f0afaf7a71dc555cb6b3624250e5ff209"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e9eebac25c35a13285456c88ee2fde93d9aee8bcfdaf03f9d6d12be3391351ec"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
@@ -642,8 +639,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mlua_derive"
|
||||
version = "0.9.3"
|
||||
source = "git+https://github.com/mlua-rs/mlua.git?branch=main#1634c43f0afaf7a71dc555cb6b3624250e5ff209"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2cfc5faa2e0d044b3f5f0879be2920e0a711c97744c42cf1c295cb183668933e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -957,9 +955,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.10.6"
|
||||
version = "1.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
|
||||
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
@@ -969,9 +967,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.7"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
|
||||
checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
@@ -980,9 +978,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.4"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
|
||||
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
@@ -1160,6 +1158,17 @@ version = "1.13.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
|
||||
|
||||
[[package]]
|
||||
name = "socks"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
version = "0.9.8"
|
||||
@@ -1216,18 +1225,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.63"
|
||||
version = "1.0.65"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
|
||||
checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.63"
|
||||
version = "1.0.65"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
|
||||
checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -1236,16 +1245,17 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tiktoken-rs"
|
||||
version = "0.5.9"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c314e7ce51440f9e8f5a497394682a57b7c323d0f4d0a6b1b13c429056e0e234"
|
||||
checksum = "44075987ee2486402f0808505dd65692163d243a337fc54363d49afac41087f6"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64 0.21.7",
|
||||
"bstr",
|
||||
"fancy-regex 0.12.0",
|
||||
"fancy-regex",
|
||||
"lazy_static",
|
||||
"parking_lot",
|
||||
"regex",
|
||||
"rustc-hash 1.1.0",
|
||||
]
|
||||
|
||||
@@ -1273,7 +1283,7 @@ dependencies = [
|
||||
"aho-corasick",
|
||||
"derive_builder",
|
||||
"esaxx-rs",
|
||||
"fancy-regex 0.13.0",
|
||||
"fancy-regex",
|
||||
"getrandom",
|
||||
"hf-hub",
|
||||
"itertools 0.12.1",
|
||||
@@ -1499,6 +1509,7 @@ dependencies = [
|
||||
"rustls-pki-types",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"socks",
|
||||
"url",
|
||||
"webpki-roots",
|
||||
]
|
||||
@@ -1602,6 +1613,28 @@ dependencies = [
|
||||
"rustls-pki-types",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.48.0"
|
||||
|
||||
Reference in New Issue
Block a user