Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
50d15e321f | ||
|
|
a3e7fd8f0a | ||
|
|
645172747a | ||
|
|
7c4ac1eebc | ||
|
|
4b4301ad49 | ||
|
|
b60e03eb70 | ||
|
|
2c7bda3ff1 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,2 +1,3 @@
|
||||
/target
|
||||
.env
|
||||
/tantivy_indexes
|
||||
|
||||
432
Cargo.lock
generated
432
Cargo.lock
generated
@@ -303,6 +303,15 @@ dependencies = [
|
||||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitpacking"
|
||||
version = "0.9.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c1d3e2bfd8d06048a179f7b17afc3188effa10385e7b00dc65af6aae732ea92"
|
||||
dependencies = [
|
||||
"crunchy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.10.4"
|
||||
@@ -322,6 +331,31 @@ dependencies = [
|
||||
"cipher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bon"
|
||||
version = "3.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ced38439e7a86a4761f7f7d5ded5ff009135939ecb464a24452eaa4c1696af7d"
|
||||
dependencies = [
|
||||
"bon-macros",
|
||||
"rustversion",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bon-macros"
|
||||
version = "3.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ce61d2d3844c6b8d31b2353d9f66cf5e632b3e9549583fe3cac2f4f6136725e"
|
||||
dependencies = [
|
||||
"darling",
|
||||
"ident_case",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.17.0"
|
||||
@@ -361,9 +395,17 @@ version = "1.2.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e3a13707ac958681c13b39b458c073d0d9bc8a22cb1b2f4c8e55eb72c13f362"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
"libc",
|
||||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "census"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
@@ -541,6 +583,15 @@ version = "2.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
|
||||
|
||||
[[package]]
|
||||
name = "crc32fast"
|
||||
version = "1.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam"
|
||||
version = "0.8.4"
|
||||
@@ -622,6 +673,12 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crunchy"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929"
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
version = "0.1.6"
|
||||
@@ -699,6 +756,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e"
|
||||
dependencies = [
|
||||
"powerfmt",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -772,6 +830,12 @@ version = "0.15.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
|
||||
|
||||
[[package]]
|
||||
name = "downcast-rs"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea8a8b81cacc08888170eef4d13b775126db426d0b348bee9d18c2c1eaf123cf"
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.15.0"
|
||||
@@ -825,6 +889,12 @@ dependencies = [
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastdivide"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471"
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.3.0"
|
||||
@@ -884,6 +954,16 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fs4"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8"
|
||||
dependencies = [
|
||||
"rustix 0.38.44",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-channel"
|
||||
version = "0.3.31"
|
||||
@@ -1142,6 +1222,12 @@ dependencies = [
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "htmlescape"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163"
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "1.3.1"
|
||||
@@ -1242,6 +1328,15 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyperloglogplus"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "621debdf94dcac33e50475fdd76d34d5ea9c0362a834b9db08c3024696c1fbe3"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.63"
|
||||
@@ -1520,6 +1615,16 @@ version = "1.0.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.33"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a"
|
||||
dependencies = [
|
||||
"getrandom 0.3.2",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.77"
|
||||
@@ -1566,6 +1671,12 @@ dependencies = [
|
||||
"spin",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "levenshtein_automata"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.172"
|
||||
@@ -1651,6 +1762,12 @@ dependencies = [
|
||||
"hashbrown 0.15.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lz4_flex"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5"
|
||||
|
||||
[[package]]
|
||||
name = "matchit"
|
||||
version = "0.8.4"
|
||||
@@ -1667,18 +1784,42 @@ dependencies = [
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "measure_time"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e"
|
||||
dependencies = [
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.9.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mime"
|
||||
version = "0.3.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
|
||||
|
||||
[[package]]
|
||||
name = "minimal-lexical"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.8.8"
|
||||
@@ -1706,6 +1847,12 @@ version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03"
|
||||
|
||||
[[package]]
|
||||
name = "murmurhash32"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b"
|
||||
|
||||
[[package]]
|
||||
name = "native-tls"
|
||||
version = "0.2.14"
|
||||
@@ -1723,6 +1870,16 @@ dependencies = [
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"minimal-lexical",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu-ansi-term"
|
||||
version = "0.46.0"
|
||||
@@ -1857,6 +2014,12 @@ version = "1.21.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
||||
|
||||
[[package]]
|
||||
name = "oneshot"
|
||||
version = "0.1.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4ce411919553d3f9fa53a0880544cda985a112117a0444d5ff1e870a893d6ea"
|
||||
|
||||
[[package]]
|
||||
name = "openssl"
|
||||
version = "0.10.72"
|
||||
@@ -1913,6 +2076,15 @@ version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
||||
|
||||
[[package]]
|
||||
name = "ownedbytes"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fbd56f7631767e61784dc43f8580f403f4475bd4aaa4da003e6295e1bab4a7e"
|
||||
dependencies = [
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking"
|
||||
version = "2.2.1"
|
||||
@@ -2275,6 +2447,16 @@ dependencies = [
|
||||
"getrandom 0.3.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_distr"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_xoshiro"
|
||||
version = "0.6.0"
|
||||
@@ -2305,6 +2487,26 @@ dependencies = [
|
||||
"unicode-width 0.2.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
|
||||
dependencies = [
|
||||
"either",
|
||||
"rayon-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon-core"
|
||||
version = "1.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
|
||||
dependencies = [
|
||||
"crossbeam-deque",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.5.11"
|
||||
@@ -2444,12 +2646,28 @@ dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rust-stemmers"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "2.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.4.1"
|
||||
@@ -2512,6 +2730,22 @@ version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "search"
|
||||
version = "0.3.13"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"common",
|
||||
"prost",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tantivy",
|
||||
"tokio",
|
||||
"tonic",
|
||||
"tonic-reflection",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "security-framework"
|
||||
version = "2.11.1"
|
||||
@@ -2598,6 +2832,7 @@ dependencies = [
|
||||
name = "server"
|
||||
version = "0.3.13"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bcrypt",
|
||||
"chrono",
|
||||
"common",
|
||||
@@ -2608,11 +2843,13 @@ dependencies = [
|
||||
"prost",
|
||||
"regex",
|
||||
"rstest",
|
||||
"search",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sqlx",
|
||||
"steel-core",
|
||||
"steel-derive 0.5.0 (git+https://github.com/mattwparas/steel.git?branch=master)",
|
||||
"tantivy",
|
||||
"thiserror 2.0.12",
|
||||
"time",
|
||||
"tokio",
|
||||
@@ -2722,6 +2959,15 @@ dependencies = [
|
||||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sketches-ddsketch"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.9"
|
||||
@@ -3162,6 +3408,152 @@ dependencies = [
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca2374a21157427c5faff2d90930f035b6c22a5d7b0e5b0b7f522e988ef33c06"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"arc-swap",
|
||||
"base64",
|
||||
"bitpacking",
|
||||
"bon",
|
||||
"byteorder",
|
||||
"census",
|
||||
"crc32fast",
|
||||
"crossbeam-channel",
|
||||
"downcast-rs",
|
||||
"fastdivide",
|
||||
"fnv",
|
||||
"fs4",
|
||||
"htmlescape",
|
||||
"hyperloglogplus",
|
||||
"itertools 0.14.0",
|
||||
"levenshtein_automata",
|
||||
"log",
|
||||
"lru",
|
||||
"lz4_flex",
|
||||
"measure_time",
|
||||
"memmap2",
|
||||
"once_cell",
|
||||
"oneshot",
|
||||
"rayon",
|
||||
"regex",
|
||||
"rust-stemmers",
|
||||
"rustc-hash",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sketches-ddsketch",
|
||||
"smallvec",
|
||||
"tantivy-bitpacker",
|
||||
"tantivy-columnar",
|
||||
"tantivy-common",
|
||||
"tantivy-fst",
|
||||
"tantivy-query-grammar",
|
||||
"tantivy-stacker",
|
||||
"tantivy-tokenizer-api",
|
||||
"tempfile",
|
||||
"thiserror 2.0.12",
|
||||
"time",
|
||||
"uuid",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-bitpacker"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1adc286a39e089ae9938935cd488d7d34f14502544a36607effd2239ff0e2494"
|
||||
dependencies = [
|
||||
"bitpacking",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-columnar"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6300428e0c104c4f7db6f95b466a6f5c1b9aece094ec57cdd365337908dc7344"
|
||||
dependencies = [
|
||||
"downcast-rs",
|
||||
"fastdivide",
|
||||
"itertools 0.14.0",
|
||||
"serde",
|
||||
"tantivy-bitpacker",
|
||||
"tantivy-common",
|
||||
"tantivy-sstable",
|
||||
"tantivy-stacker",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-common"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e91b6ea6090ce03dc72c27d0619e77185d26cc3b20775966c346c6d4f7e99d7f"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
"ownedbytes",
|
||||
"serde",
|
||||
"time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-fst"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"regex-syntax",
|
||||
"utf8-ranges",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-query-grammar"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e810cdeeebca57fc3f7bfec5f85fdbea9031b2ac9b990eb5ff49b371d52bbe6a"
|
||||
dependencies = [
|
||||
"nom",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-sstable"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "709f22c08a4c90e1b36711c1c6cad5ae21b20b093e535b69b18783dd2cb99416"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"itertools 0.14.0",
|
||||
"tantivy-bitpacker",
|
||||
"tantivy-common",
|
||||
"tantivy-fst",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-stacker"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2bcdebb267671311d1e8891fd9d1301803fdb8ad21ba22e0a30d0cab49ba59c1"
|
||||
dependencies = [
|
||||
"murmurhash32",
|
||||
"rand_distr",
|
||||
"tantivy-common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-tokenizer-api"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dfa942fcee81e213e09715bbce8734ae2180070b97b33839a795ba1de201547d"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.19.1"
|
||||
@@ -3424,9 +3816,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tonic-reflection"
|
||||
version = "0.13.0"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "88fa815be858816dad226a49439ee90b7bcf81ab55bee72fdb217f1e6778c3ca"
|
||||
checksum = "f9687bd5bfeafebdded2356950f278bba8226f0b32109537c4253406e09aafe1"
|
||||
dependencies = [
|
||||
"prost",
|
||||
"prost-types",
|
||||
@@ -3648,6 +4040,12 @@ version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
|
||||
|
||||
[[package]]
|
||||
name = "utf8-ranges"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba"
|
||||
|
||||
[[package]]
|
||||
name = "utf8_iter"
|
||||
version = "1.0.4"
|
||||
@@ -3850,7 +4248,7 @@ version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
|
||||
dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4214,3 +4612,31 @@ dependencies = [
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd"
|
||||
version = "0.13.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
|
||||
dependencies = [
|
||||
"zstd-safe",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-safe"
|
||||
version = "7.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
|
||||
dependencies = [
|
||||
"zstd-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-sys"
|
||||
version = "2.0.15+zstd.1.5.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
27
Cargo.toml
27
Cargo.toml
@@ -1,5 +1,5 @@
|
||||
[workspace]
|
||||
members = ["client", "server", "common"]
|
||||
members = ["client", "server", "common", "search"]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
@@ -16,4 +16,27 @@ categories = ["command-line-interface"]
|
||||
|
||||
# [workspace.metadata]
|
||||
# TODO:
|
||||
# documentation = "https://docs.rs/accounting-client"`
|
||||
# documentation = "https://docs.rs/accounting-client"
|
||||
|
||||
[workspace.dependencies]
|
||||
# Async and gRPC
|
||||
tokio = { version = "1.44.2", features = ["full"] }
|
||||
tonic = "0.13.0"
|
||||
prost = "0.13.5"
|
||||
async-trait = "0.1.88"
|
||||
|
||||
# Data Handling & Serialization
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = "1.0.140"
|
||||
time = "0.3.41"
|
||||
|
||||
# Utilities & Error Handling
|
||||
anyhow = "1.0.98"
|
||||
dotenvy = "0.15.7"
|
||||
lazy_static = "1.5.0"
|
||||
tracing = "0.1.41"
|
||||
|
||||
# Search crate
|
||||
tantivy = "0.24.1"
|
||||
|
||||
common = { path = "./common" }
|
||||
|
||||
@@ -14,6 +14,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
"proto/table_definition.proto",
|
||||
"proto/tables_data.proto",
|
||||
"proto/table_script.proto",
|
||||
"proto/search.proto",
|
||||
],
|
||||
&["proto"],
|
||||
)?;
|
||||
|
||||
20
common/proto/search.proto
Normal file
20
common/proto/search.proto
Normal file
@@ -0,0 +1,20 @@
|
||||
// In common/proto/search.proto
|
||||
syntax = "proto3";
|
||||
package multieko2.search;
|
||||
|
||||
service Searcher {
|
||||
rpc SearchTable(SearchRequest) returns (SearchResponse);
|
||||
}
|
||||
|
||||
message SearchRequest {
|
||||
string table_name = 1;
|
||||
string query = 2;
|
||||
}
|
||||
|
||||
message SearchResponse {
|
||||
message Hit {
|
||||
int64 id = 1; // The PostgreSQL row ID
|
||||
float score = 2;
|
||||
}
|
||||
repeated Hit hits = 1;
|
||||
}
|
||||
@@ -25,6 +25,9 @@ pub mod proto {
|
||||
pub mod table_script {
|
||||
include!("proto/multieko2.table_script.rs");
|
||||
}
|
||||
pub mod search {
|
||||
include!("proto/multieko2.search.rs");
|
||||
}
|
||||
pub const FILE_DESCRIPTOR_SET: &[u8] =
|
||||
include_bytes!("proto/descriptor.bin");
|
||||
}
|
||||
|
||||
Binary file not shown.
315
common/src/proto/multieko2.search.rs
Normal file
315
common/src/proto/multieko2.search.rs
Normal file
@@ -0,0 +1,315 @@
|
||||
// This file is @generated by prost-build.
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct SearchRequest {
|
||||
#[prost(string, tag = "1")]
|
||||
pub table_name: ::prost::alloc::string::String,
|
||||
#[prost(string, tag = "2")]
|
||||
pub query: ::prost::alloc::string::String,
|
||||
}
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct SearchResponse {
|
||||
#[prost(message, repeated, tag = "1")]
|
||||
pub hits: ::prost::alloc::vec::Vec<search_response::Hit>,
|
||||
}
|
||||
/// Nested message and enum types in `SearchResponse`.
|
||||
pub mod search_response {
|
||||
#[derive(Clone, Copy, PartialEq, ::prost::Message)]
|
||||
pub struct Hit {
|
||||
/// The PostgreSQL row ID
|
||||
#[prost(int64, tag = "1")]
|
||||
pub id: i64,
|
||||
#[prost(float, tag = "2")]
|
||||
pub score: f32,
|
||||
}
|
||||
}
|
||||
/// Generated client implementations.
|
||||
pub mod searcher_client {
|
||||
#![allow(
|
||||
unused_variables,
|
||||
dead_code,
|
||||
missing_docs,
|
||||
clippy::wildcard_imports,
|
||||
clippy::let_unit_value,
|
||||
)]
|
||||
use tonic::codegen::*;
|
||||
use tonic::codegen::http::Uri;
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SearcherClient<T> {
|
||||
inner: tonic::client::Grpc<T>,
|
||||
}
|
||||
impl SearcherClient<tonic::transport::Channel> {
|
||||
/// Attempt to create a new client by connecting to a given endpoint.
|
||||
pub async fn connect<D>(dst: D) -> Result<Self, tonic::transport::Error>
|
||||
where
|
||||
D: TryInto<tonic::transport::Endpoint>,
|
||||
D::Error: Into<StdError>,
|
||||
{
|
||||
let conn = tonic::transport::Endpoint::new(dst)?.connect().await?;
|
||||
Ok(Self::new(conn))
|
||||
}
|
||||
}
|
||||
impl<T> SearcherClient<T>
|
||||
where
|
||||
T: tonic::client::GrpcService<tonic::body::Body>,
|
||||
T::Error: Into<StdError>,
|
||||
T::ResponseBody: Body<Data = Bytes> + std::marker::Send + 'static,
|
||||
<T::ResponseBody as Body>::Error: Into<StdError> + std::marker::Send,
|
||||
{
|
||||
pub fn new(inner: T) -> Self {
|
||||
let inner = tonic::client::Grpc::new(inner);
|
||||
Self { inner }
|
||||
}
|
||||
pub fn with_origin(inner: T, origin: Uri) -> Self {
|
||||
let inner = tonic::client::Grpc::with_origin(inner, origin);
|
||||
Self { inner }
|
||||
}
|
||||
pub fn with_interceptor<F>(
|
||||
inner: T,
|
||||
interceptor: F,
|
||||
) -> SearcherClient<InterceptedService<T, F>>
|
||||
where
|
||||
F: tonic::service::Interceptor,
|
||||
T::ResponseBody: Default,
|
||||
T: tonic::codegen::Service<
|
||||
http::Request<tonic::body::Body>,
|
||||
Response = http::Response<
|
||||
<T as tonic::client::GrpcService<tonic::body::Body>>::ResponseBody,
|
||||
>,
|
||||
>,
|
||||
<T as tonic::codegen::Service<
|
||||
http::Request<tonic::body::Body>,
|
||||
>>::Error: Into<StdError> + std::marker::Send + std::marker::Sync,
|
||||
{
|
||||
SearcherClient::new(InterceptedService::new(inner, interceptor))
|
||||
}
|
||||
/// Compress requests with the given encoding.
|
||||
///
|
||||
/// This requires the server to support it otherwise it might respond with an
|
||||
/// error.
|
||||
#[must_use]
|
||||
pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self {
|
||||
self.inner = self.inner.send_compressed(encoding);
|
||||
self
|
||||
}
|
||||
/// Enable decompressing responses.
|
||||
#[must_use]
|
||||
pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self {
|
||||
self.inner = self.inner.accept_compressed(encoding);
|
||||
self
|
||||
}
|
||||
/// Limits the maximum size of a decoded message.
|
||||
///
|
||||
/// Default: `4MB`
|
||||
#[must_use]
|
||||
pub fn max_decoding_message_size(mut self, limit: usize) -> Self {
|
||||
self.inner = self.inner.max_decoding_message_size(limit);
|
||||
self
|
||||
}
|
||||
/// Limits the maximum size of an encoded message.
|
||||
///
|
||||
/// Default: `usize::MAX`
|
||||
#[must_use]
|
||||
pub fn max_encoding_message_size(mut self, limit: usize) -> Self {
|
||||
self.inner = self.inner.max_encoding_message_size(limit);
|
||||
self
|
||||
}
|
||||
pub async fn search_table(
|
||||
&mut self,
|
||||
request: impl tonic::IntoRequest<super::SearchRequest>,
|
||||
) -> std::result::Result<tonic::Response<super::SearchResponse>, tonic::Status> {
|
||||
self.inner
|
||||
.ready()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tonic::Status::unknown(
|
||||
format!("Service was not ready: {}", e.into()),
|
||||
)
|
||||
})?;
|
||||
let codec = tonic::codec::ProstCodec::default();
|
||||
let path = http::uri::PathAndQuery::from_static(
|
||||
"/multieko2.search.Searcher/SearchTable",
|
||||
);
|
||||
let mut req = request.into_request();
|
||||
req.extensions_mut()
|
||||
.insert(GrpcMethod::new("multieko2.search.Searcher", "SearchTable"));
|
||||
self.inner.unary(req, path, codec).await
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Generated server implementations.
|
||||
pub mod searcher_server {
|
||||
#![allow(
|
||||
unused_variables,
|
||||
dead_code,
|
||||
missing_docs,
|
||||
clippy::wildcard_imports,
|
||||
clippy::let_unit_value,
|
||||
)]
|
||||
use tonic::codegen::*;
|
||||
/// Generated trait containing gRPC methods that should be implemented for use with SearcherServer.
|
||||
#[async_trait]
|
||||
pub trait Searcher: std::marker::Send + std::marker::Sync + 'static {
|
||||
async fn search_table(
|
||||
&self,
|
||||
request: tonic::Request<super::SearchRequest>,
|
||||
) -> std::result::Result<tonic::Response<super::SearchResponse>, tonic::Status>;
|
||||
}
|
||||
#[derive(Debug)]
|
||||
pub struct SearcherServer<T> {
|
||||
inner: Arc<T>,
|
||||
accept_compression_encodings: EnabledCompressionEncodings,
|
||||
send_compression_encodings: EnabledCompressionEncodings,
|
||||
max_decoding_message_size: Option<usize>,
|
||||
max_encoding_message_size: Option<usize>,
|
||||
}
|
||||
impl<T> SearcherServer<T> {
|
||||
pub fn new(inner: T) -> Self {
|
||||
Self::from_arc(Arc::new(inner))
|
||||
}
|
||||
pub fn from_arc(inner: Arc<T>) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
accept_compression_encodings: Default::default(),
|
||||
send_compression_encodings: Default::default(),
|
||||
max_decoding_message_size: None,
|
||||
max_encoding_message_size: None,
|
||||
}
|
||||
}
|
||||
pub fn with_interceptor<F>(
|
||||
inner: T,
|
||||
interceptor: F,
|
||||
) -> InterceptedService<Self, F>
|
||||
where
|
||||
F: tonic::service::Interceptor,
|
||||
{
|
||||
InterceptedService::new(Self::new(inner), interceptor)
|
||||
}
|
||||
/// Enable decompressing requests with the given encoding.
|
||||
#[must_use]
|
||||
pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self {
|
||||
self.accept_compression_encodings.enable(encoding);
|
||||
self
|
||||
}
|
||||
/// Compress responses with the given encoding, if the client supports it.
|
||||
#[must_use]
|
||||
pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self {
|
||||
self.send_compression_encodings.enable(encoding);
|
||||
self
|
||||
}
|
||||
/// Limits the maximum size of a decoded message.
|
||||
///
|
||||
/// Default: `4MB`
|
||||
#[must_use]
|
||||
pub fn max_decoding_message_size(mut self, limit: usize) -> Self {
|
||||
self.max_decoding_message_size = Some(limit);
|
||||
self
|
||||
}
|
||||
/// Limits the maximum size of an encoded message.
|
||||
///
|
||||
/// Default: `usize::MAX`
|
||||
#[must_use]
|
||||
pub fn max_encoding_message_size(mut self, limit: usize) -> Self {
|
||||
self.max_encoding_message_size = Some(limit);
|
||||
self
|
||||
}
|
||||
}
|
||||
impl<T, B> tonic::codegen::Service<http::Request<B>> for SearcherServer<T>
|
||||
where
|
||||
T: Searcher,
|
||||
B: Body + std::marker::Send + 'static,
|
||||
B::Error: Into<StdError> + std::marker::Send + 'static,
|
||||
{
|
||||
type Response = http::Response<tonic::body::Body>;
|
||||
type Error = std::convert::Infallible;
|
||||
type Future = BoxFuture<Self::Response, Self::Error>;
|
||||
fn poll_ready(
|
||||
&mut self,
|
||||
_cx: &mut Context<'_>,
|
||||
) -> Poll<std::result::Result<(), Self::Error>> {
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
fn call(&mut self, req: http::Request<B>) -> Self::Future {
|
||||
match req.uri().path() {
|
||||
"/multieko2.search.Searcher/SearchTable" => {
|
||||
#[allow(non_camel_case_types)]
|
||||
struct SearchTableSvc<T: Searcher>(pub Arc<T>);
|
||||
impl<T: Searcher> tonic::server::UnaryService<super::SearchRequest>
|
||||
for SearchTableSvc<T> {
|
||||
type Response = super::SearchResponse;
|
||||
type Future = BoxFuture<
|
||||
tonic::Response<Self::Response>,
|
||||
tonic::Status,
|
||||
>;
|
||||
fn call(
|
||||
&mut self,
|
||||
request: tonic::Request<super::SearchRequest>,
|
||||
) -> Self::Future {
|
||||
let inner = Arc::clone(&self.0);
|
||||
let fut = async move {
|
||||
<T as Searcher>::search_table(&inner, request).await
|
||||
};
|
||||
Box::pin(fut)
|
||||
}
|
||||
}
|
||||
let accept_compression_encodings = self.accept_compression_encodings;
|
||||
let send_compression_encodings = self.send_compression_encodings;
|
||||
let max_decoding_message_size = self.max_decoding_message_size;
|
||||
let max_encoding_message_size = self.max_encoding_message_size;
|
||||
let inner = self.inner.clone();
|
||||
let fut = async move {
|
||||
let method = SearchTableSvc(inner);
|
||||
let codec = tonic::codec::ProstCodec::default();
|
||||
let mut grpc = tonic::server::Grpc::new(codec)
|
||||
.apply_compression_config(
|
||||
accept_compression_encodings,
|
||||
send_compression_encodings,
|
||||
)
|
||||
.apply_max_message_size_config(
|
||||
max_decoding_message_size,
|
||||
max_encoding_message_size,
|
||||
);
|
||||
let res = grpc.unary(method, req).await;
|
||||
Ok(res)
|
||||
};
|
||||
Box::pin(fut)
|
||||
}
|
||||
_ => {
|
||||
Box::pin(async move {
|
||||
let mut response = http::Response::new(
|
||||
tonic::body::Body::default(),
|
||||
);
|
||||
let headers = response.headers_mut();
|
||||
headers
|
||||
.insert(
|
||||
tonic::Status::GRPC_STATUS,
|
||||
(tonic::Code::Unimplemented as i32).into(),
|
||||
);
|
||||
headers
|
||||
.insert(
|
||||
http::header::CONTENT_TYPE,
|
||||
tonic::metadata::GRPC_CONTENT_TYPE,
|
||||
);
|
||||
Ok(response)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T> Clone for SearcherServer<T> {
|
||||
fn clone(&self) -> Self {
|
||||
let inner = self.inner.clone();
|
||||
Self {
|
||||
inner,
|
||||
accept_compression_encodings: self.accept_compression_encodings,
|
||||
send_compression_encodings: self.send_compression_encodings,
|
||||
max_decoding_message_size: self.max_decoding_message_size,
|
||||
max_encoding_message_size: self.max_encoding_message_size,
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Generated gRPC service name
|
||||
pub const SERVICE_NAME: &str = "multieko2.search.Searcher";
|
||||
impl<T> tonic::server::NamedService for SearcherServer<T> {
|
||||
const NAME: &'static str = SERVICE_NAME;
|
||||
}
|
||||
}
|
||||
18
search/Cargo.toml
Normal file
18
search/Cargo.toml
Normal file
@@ -0,0 +1,18 @@
|
||||
[package]
|
||||
name = "search"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license = "AGPL-3.0-or-later"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
prost = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
tonic = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
tantivy = { workspace = true }
|
||||
|
||||
common = { path = "../common" }
|
||||
tonic-reflection = "0.13.1"
|
||||
109
search/src/lib.rs
Normal file
109
search/src/lib.rs
Normal file
@@ -0,0 +1,109 @@
|
||||
// src/lib.rs
|
||||
|
||||
use std::path::Path;
|
||||
use tantivy::{collector::TopDocs, query::QueryParser, Index, TantivyDocument};
|
||||
use tonic::{Request, Response, Status};
|
||||
|
||||
use common::proto::multieko2::search::{
|
||||
search_response::Hit, SearchRequest, SearchResponse,
|
||||
};
|
||||
use common::proto::multieko2::search::searcher_server::Searcher;
|
||||
pub use common::proto::multieko2::search::searcher_server::SearcherServer;
|
||||
use tantivy::schema::Value;
|
||||
|
||||
pub struct SearcherService;
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl Searcher for SearcherService {
|
||||
async fn search_table(
|
||||
&self,
|
||||
request: Request<SearchRequest>,
|
||||
) -> Result<Response<SearchResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
let table_name = req.table_name;
|
||||
let query_str = req.query;
|
||||
|
||||
if query_str.trim().is_empty() {
|
||||
return Err(Status::invalid_argument("Query cannot be empty"));
|
||||
}
|
||||
|
||||
// Open the index for this table
|
||||
let index_path = Path::new("./tantivy_indexes").join(&table_name);
|
||||
|
||||
if !index_path.exists() {
|
||||
return Err(Status::not_found(format!(
|
||||
"No search index found for table '{}'",
|
||||
table_name
|
||||
)));
|
||||
}
|
||||
|
||||
// Open the index
|
||||
let index = Index::open_in_dir(&index_path).map_err(|e| {
|
||||
Status::internal(format!("Failed to open index: {}", e))
|
||||
})?;
|
||||
|
||||
// Create reader and searcher
|
||||
let reader = index.reader().map_err(|e| {
|
||||
Status::internal(format!("Failed to create index reader: {}", e))
|
||||
})?;
|
||||
|
||||
let searcher = reader.searcher();
|
||||
let schema = index.schema();
|
||||
|
||||
// Get the fields we need
|
||||
let all_text_field = match schema.get_field("all_text") {
|
||||
Ok(field) => field,
|
||||
Err(_) => {
|
||||
return Err(Status::internal(
|
||||
"Schema is missing the 'all_text' field.",
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
let pg_id_field = match schema.get_field("pg_id") {
|
||||
Ok(field) => field,
|
||||
Err(_) => {
|
||||
return Err(Status::internal(
|
||||
"Schema is missing the 'pg_id' field.",
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
// Parse the query
|
||||
let query_parser =
|
||||
QueryParser::for_index(&index, vec![all_text_field]);
|
||||
let query = query_parser.parse_query(&query_str).map_err(|e| {
|
||||
Status::invalid_argument(format!("Invalid query: {}", e))
|
||||
})?;
|
||||
|
||||
// Perform the search
|
||||
let top_docs = searcher
|
||||
.search(&query, &TopDocs::with_limit(100))
|
||||
.map_err(|e| Status::internal(format!("Search failed: {}", e)))?;
|
||||
|
||||
// Convert results to our response format
|
||||
let mut hits = Vec::new();
|
||||
for (score, doc_address) in top_docs {
|
||||
let doc: TantivyDocument = searcher.doc(doc_address).map_err(
|
||||
|e| {
|
||||
Status::internal(format!(
|
||||
"Failed to retrieve document: {}",
|
||||
e
|
||||
))
|
||||
},
|
||||
)?;
|
||||
|
||||
if let Some(pg_id_value) = doc.get_first(pg_id_field) {
|
||||
if let Some(pg_id) = pg_id_value.as_u64() {
|
||||
hits.push(Hit {
|
||||
id: pg_id as i64,
|
||||
score,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let response = SearchResponse { hits };
|
||||
Ok(Response::new(response))
|
||||
}
|
||||
}
|
||||
@@ -6,7 +6,10 @@ license = "AGPL-3.0-or-later"
|
||||
|
||||
[dependencies]
|
||||
common = { path = "../common" }
|
||||
search = { path = "../search" }
|
||||
|
||||
anyhow = { workspace = true }
|
||||
tantivy = { workspace = true }
|
||||
chrono = { version = "0.4.40", features = ["serde"] }
|
||||
dotenvy = "0.15.7"
|
||||
prost = "0.13.5"
|
||||
|
||||
83
server/src/bin/manual_indexer.rs
Normal file
83
server/src/bin/manual_indexer.rs
Normal file
@@ -0,0 +1,83 @@
|
||||
// In server/src/bin/manual_indexer.rs
|
||||
|
||||
use sqlx::{PgPool, Row};
|
||||
use tantivy::schema::*;
|
||||
use tantivy::{doc, Index};
|
||||
use std::path::Path;
|
||||
|
||||
// --- CONFIGURATION ---
|
||||
// IMPORTANT: Change this to a table name that actually exists and has data in your test DB.
|
||||
// From your grpcurl output, "2025_test_post" is a good candidate.
|
||||
const TABLE_TO_INDEX: &str = "2025_test_post2";
|
||||
const INDEX_DIR: &str = "./tantivy_indexes";
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
// --- Database Connection ---
|
||||
// This assumes you have a .env file with DATABASE_URL
|
||||
dotenvy::dotenv().ok();
|
||||
let database_url = std::env::var("DATABASE_URL")
|
||||
.expect("DATABASE_URL must be set in your .env file");
|
||||
let pool = PgPool::connect(&database_url).await?;
|
||||
println!("Connected to database.");
|
||||
|
||||
// --- Tantivy Schema Definition ---
|
||||
let mut schema_builder = Schema::builder();
|
||||
// This field will store the original Postgres row ID. It's crucial.
|
||||
schema_builder.add_u64_field("pg_id", INDEXED | STORED);
|
||||
// This field will contain ALL text data from the row, concatenated.
|
||||
schema_builder.add_text_field("all_text", TEXT | STORED);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
// --- Index Creation ---
|
||||
let index_path = Path::new(INDEX_DIR).join(TABLE_TO_INDEX);
|
||||
if index_path.exists() {
|
||||
println!("Removing existing index at: {}", index_path.display());
|
||||
std::fs::remove_dir_all(&index_path)?;
|
||||
}
|
||||
std::fs::create_dir_all(&index_path)?;
|
||||
let index = Index::create_in_dir(&index_path, schema.clone())?;
|
||||
let mut index_writer = index.writer(100_000_000)?; // 100MB heap
|
||||
|
||||
println!("Indexing table: {}", TABLE_TO_INDEX);
|
||||
|
||||
// --- Data Fetching and Indexing ---
|
||||
let qualified_table = format!("gen.\"{}\"", TABLE_TO_INDEX);
|
||||
let query_str = format!("SELECT id, to_jsonb(t) AS data FROM {} t", qualified_table);
|
||||
let rows = sqlx::query(&query_str).fetch_all(&pool).await?;
|
||||
|
||||
if rows.is_empty() {
|
||||
println!("Warning: No rows found in table '{}'. Index will be empty.", TABLE_TO_INDEX);
|
||||
}
|
||||
|
||||
let pg_id_field = schema.get_field("pg_id").unwrap();
|
||||
let all_text_field = schema.get_field("all_text").unwrap();
|
||||
|
||||
for row in &rows {
|
||||
let id: i64 = row.try_get("id")?;
|
||||
let data: serde_json::Value = row.try_get("data")?;
|
||||
|
||||
// Concatenate all text values from the JSON into one big string.
|
||||
let mut full_text = String::new();
|
||||
if let Some(obj) = data.as_object() {
|
||||
for value in obj.values() {
|
||||
if let Some(s) = value.as_str() {
|
||||
full_text.push_str(s);
|
||||
full_text.push(' ');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add the document to Tantivy
|
||||
index_writer.add_document(doc!(
|
||||
pg_id_field => id as u64,
|
||||
all_text_field => full_text
|
||||
))?;
|
||||
}
|
||||
|
||||
// --- Finalize ---
|
||||
index_writer.commit()?;
|
||||
println!("Successfully indexed {} documents into '{}'", rows.len(), index_path.display());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
140
server/src/indexer.rs
Normal file
140
server/src/indexer.rs
Normal file
@@ -0,0 +1,140 @@
|
||||
// src/indexer.rs
|
||||
|
||||
use std::path::Path;
|
||||
use sqlx::{PgPool, Row};
|
||||
use tantivy::schema::{Schema, Term, TEXT, STORED, INDEXED};
|
||||
use tantivy::{doc, Index, IndexWriter};
|
||||
use tokio::sync::mpsc::Receiver;
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
const INDEX_DIR: &str = "./tantivy_indexes";
|
||||
|
||||
/// Defines the commands that can be sent to the indexer task.
|
||||
#[derive(Debug)]
|
||||
pub enum IndexCommand {
|
||||
/// Add a new document or update an existing one.
|
||||
AddOrUpdate(IndexCommandData),
|
||||
/// Remove a document from the index.
|
||||
Delete(IndexCommandData),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct IndexCommandData {
|
||||
pub table_name: String,
|
||||
pub row_id: i64,
|
||||
}
|
||||
|
||||
/// The main loop for the background indexer task.
|
||||
/// It listens for commands on the receiver and updates the Tantivy index.
|
||||
pub async fn indexer_task(pool: PgPool, mut receiver: Receiver<IndexCommand>) {
|
||||
info!("Background indexer task started.");
|
||||
while let Some(command) = receiver.recv().await {
|
||||
info!("Indexer received command: {:?}", command);
|
||||
let result = match command {
|
||||
IndexCommand::AddOrUpdate(data) => {
|
||||
handle_add_or_update(&pool, data).await
|
||||
}
|
||||
IndexCommand::Delete(data) => handle_delete(&pool, data).await,
|
||||
};
|
||||
|
||||
if let Err(e) = result {
|
||||
error!("Failed to process index command: {}", e);
|
||||
}
|
||||
}
|
||||
warn!("Indexer channel closed. Task is shutting down.");
|
||||
}
|
||||
|
||||
/// Handles adding or updating a document in a Tantivy index.
|
||||
async fn handle_add_or_update(
|
||||
pool: &PgPool,
|
||||
data: IndexCommandData,
|
||||
) -> anyhow::Result<()> {
|
||||
// 1. Fetch the full row data from PostgreSQL
|
||||
let qualified_table = format!("gen.\"{}\"", data.table_name);
|
||||
let query_str = format!(
|
||||
"SELECT to_jsonb(t) AS data FROM {} t WHERE id = $1",
|
||||
qualified_table
|
||||
);
|
||||
|
||||
let row = sqlx::query(&query_str)
|
||||
.bind(data.row_id)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
let json_data: serde_json::Value = row.try_get("data")?;
|
||||
|
||||
// 2. Prepare the Tantivy document
|
||||
let mut full_text = String::new();
|
||||
if let Some(obj) = json_data.as_object() {
|
||||
for value in obj.values() {
|
||||
if let Some(s) = value.as_str() {
|
||||
full_text.push_str(s);
|
||||
full_text.push(' ');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Open the index and write the document
|
||||
let (mut writer, schema) = get_index_writer(&data.table_name)?;
|
||||
let pg_id_field = schema.get_field("pg_id").unwrap();
|
||||
let all_text_field = schema.get_field("all_text").unwrap();
|
||||
|
||||
// First, delete any existing document with this ID to handle updates
|
||||
let id_term = Term::from_field_u64(pg_id_field, data.row_id as u64);
|
||||
writer.delete_term(id_term);
|
||||
|
||||
// Add the new document
|
||||
writer.add_document(doc!(
|
||||
pg_id_field => data.row_id as u64,
|
||||
all_text_field => full_text
|
||||
))?;
|
||||
|
||||
// 4. Commit changes
|
||||
writer.commit()?;
|
||||
info!(
|
||||
"Successfully indexed document id:{} for table:{}",
|
||||
data.row_id, data.table_name
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handles deleting a document from a Tantivy index.
|
||||
async fn handle_delete(
|
||||
_pool: &PgPool,
|
||||
data: IndexCommandData,
|
||||
) -> anyhow::Result<()> {
|
||||
let (mut writer, schema) = get_index_writer(&data.table_name)?;
|
||||
let pg_id_field = schema.get_field("pg_id").unwrap();
|
||||
|
||||
let id_term = Term::from_field_u64(pg_id_field, data.row_id as u64);
|
||||
writer.delete_term(id_term);
|
||||
writer.commit()?;
|
||||
|
||||
info!(
|
||||
"Successfully deleted document id:{} from table:{}",
|
||||
data.row_id, data.table_name
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Helper to get or create an index and return its writer and schema.
|
||||
fn get_index_writer(
|
||||
table_name: &str,
|
||||
) -> anyhow::Result<(IndexWriter, Schema)> {
|
||||
let index_path = Path::new(INDEX_DIR).join(table_name);
|
||||
std::fs::create_dir_all(&index_path)?;
|
||||
|
||||
let index = Index::open_in_dir(&index_path).or_else(|_| {
|
||||
// If it doesn't exist, create it with the standard schema
|
||||
let mut schema_builder = Schema::builder();
|
||||
schema_builder.add_u64_field("pg_id", INDEXED | STORED);
|
||||
schema_builder.add_text_field("all_text", TEXT | STORED);
|
||||
let schema = schema_builder.build();
|
||||
Index::create_in_dir(&index_path, schema)
|
||||
})?;
|
||||
|
||||
let schema = index.schema();
|
||||
let writer = index.writer(100_000_000)?; // 100MB heap
|
||||
Ok((writer, schema))
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
// src/lib.rs
|
||||
pub mod db;
|
||||
pub mod auth;
|
||||
pub mod indexer;
|
||||
pub mod server;
|
||||
pub mod adresar;
|
||||
pub mod uctovnictvo;
|
||||
|
||||
@@ -2,6 +2,9 @@
|
||||
use tonic::transport::Server;
|
||||
use tonic_reflection::server::Builder as ReflectionBuilder;
|
||||
|
||||
use tokio::sync::mpsc;
|
||||
use crate::indexer::{indexer_task, IndexCommand};
|
||||
|
||||
use common::proto::multieko2::FILE_DESCRIPTOR_SET;
|
||||
use crate::server::services::{
|
||||
AdresarService,
|
||||
@@ -21,22 +24,35 @@ use common::proto::multieko2::{
|
||||
table_script::table_script_server::TableScriptServer,
|
||||
auth::auth_service_server::AuthServiceServer
|
||||
};
|
||||
use search::{SearcherService, SearcherServer};
|
||||
|
||||
pub async fn run_server(db_pool: sqlx::PgPool) -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Initialize JWT for authentication
|
||||
crate::auth::logic::jwt::init_jwt()?;
|
||||
|
||||
|
||||
let addr = "[::1]:50051".parse()?;
|
||||
println!("Unified Server listening on {}", addr);
|
||||
|
||||
// 1. Create the MPSC channel for indexer commands
|
||||
let (indexer_tx, indexer_rx) = mpsc::channel::<IndexCommand>(100); // Buffer of 100 messages
|
||||
|
||||
// 2. Spawn the background indexer task
|
||||
let indexer_pool = db_pool.clone();
|
||||
tokio::spawn(indexer_task(indexer_pool, indexer_rx));
|
||||
|
||||
let reflection_service = ReflectionBuilder::configure()
|
||||
.register_encoded_file_descriptor_set(FILE_DESCRIPTOR_SET)
|
||||
.build_v1()?;
|
||||
|
||||
// Initialize services
|
||||
// Initialize services, passing the indexer sender to the relevant ones
|
||||
let table_definition_service = TableDefinitionService { db_pool: db_pool.clone() };
|
||||
let tables_data_service = TablesDataService { db_pool: db_pool.clone() };
|
||||
let tables_data_service = TablesDataService {
|
||||
db_pool: db_pool.clone(),
|
||||
indexer_tx: indexer_tx.clone(), // Pass the sender
|
||||
};
|
||||
let table_script_service = TableScriptService { db_pool: db_pool.clone() };
|
||||
let auth_service = AuthServiceImpl { db_pool: db_pool.clone() };
|
||||
let search_service = SearcherService;
|
||||
|
||||
Server::builder()
|
||||
.add_service(AdresarServer::new(AdresarService { db_pool: db_pool.clone() }))
|
||||
@@ -46,6 +62,7 @@ pub async fn run_server(db_pool: sqlx::PgPool) -> Result<(), Box<dyn std::error:
|
||||
.add_service(TablesDataServer::new(tables_data_service))
|
||||
.add_service(TableScriptServer::new(table_script_service))
|
||||
.add_service(AuthServiceServer::new(auth_service))
|
||||
.add_service(SearcherServer::new(search_service))
|
||||
.add_service(reflection_service)
|
||||
.serve(addr)
|
||||
.await?;
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
// src/server/services/tables_data_service.rs
|
||||
|
||||
use tonic::{Request, Response, Status};
|
||||
// Add these imports
|
||||
use tokio::sync::mpsc;
|
||||
use crate::indexer::IndexCommand;
|
||||
|
||||
use common::proto::multieko2::tables_data::tables_data_server::TablesData;
|
||||
use common::proto::multieko2::common::CountResponse;
|
||||
use common::proto::multieko2::tables_data::{
|
||||
@@ -15,6 +20,8 @@ use sqlx::PgPool;
|
||||
#[derive(Debug)]
|
||||
pub struct TablesDataService {
|
||||
pub db_pool: PgPool,
|
||||
// MODIFIED: Add the sender field
|
||||
pub indexer_tx: mpsc::Sender<IndexCommand>,
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
@@ -24,25 +31,34 @@ impl TablesData for TablesDataService {
|
||||
request: Request<PostTableDataRequest>,
|
||||
) -> Result<Response<PostTableDataResponse>, Status> {
|
||||
let request = request.into_inner();
|
||||
let response = post_table_data(&self.db_pool, request).await?;
|
||||
// MODIFIED: Pass the indexer_tx to the handler
|
||||
let response = post_table_data(
|
||||
&self.db_pool,
|
||||
request,
|
||||
&self.indexer_tx,
|
||||
)
|
||||
.await?;
|
||||
Ok(Response::new(response))
|
||||
}
|
||||
|
||||
// Add the new method implementation
|
||||
// You will later apply the same pattern to put_table_data...
|
||||
async fn put_table_data(
|
||||
&self,
|
||||
request: Request<PutTableDataRequest>,
|
||||
) -> Result<Response<PutTableDataResponse>, Status> {
|
||||
let request = request.into_inner();
|
||||
// TODO: Update put_table_data handler to accept and use indexer_tx
|
||||
let response = put_table_data(&self.db_pool, request).await?;
|
||||
Ok(Response::new(response))
|
||||
}
|
||||
|
||||
// ...and delete_table_data
|
||||
async fn delete_table_data(
|
||||
&self,
|
||||
request: Request<DeleteTableDataRequest>,
|
||||
) -> Result<Response<DeleteTableDataResponse>, Status> {
|
||||
let request = request.into_inner();
|
||||
// TODO: Update delete_table_data handler to accept and use indexer_tx
|
||||
let response = delete_table_data(&self.db_pool, request).await?;
|
||||
Ok(Response::new(response))
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// src/tables_data/handlers/post_table_data.rs
|
||||
|
||||
use tonic::Status;
|
||||
use sqlx::{PgPool, Arguments};
|
||||
use sqlx::postgres::PgArguments;
|
||||
@@ -6,22 +7,26 @@ use chrono::{DateTime, Utc};
|
||||
use common::proto::multieko2::tables_data::{PostTableDataRequest, PostTableDataResponse};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use crate::shared::schema_qualifier::qualify_table_name_for_data; // Import schema qualifier
|
||||
use crate::shared::schema_qualifier::qualify_table_name_for_data;
|
||||
|
||||
use crate::steel::server::execution::{self, Value};
|
||||
use crate::steel::server::functions::SteelContext;
|
||||
|
||||
// Add these imports
|
||||
use crate::indexer::{IndexCommand, IndexCommandData};
|
||||
use tokio::sync::mpsc;
|
||||
use tracing::error;
|
||||
|
||||
// MODIFIED: Function signature now accepts the indexer sender
|
||||
pub async fn post_table_data(
|
||||
db_pool: &PgPool,
|
||||
request: PostTableDataRequest,
|
||||
indexer_tx: &mpsc::Sender<IndexCommand>,
|
||||
) -> Result<PostTableDataResponse, Status> {
|
||||
let profile_name = request.profile_name;
|
||||
let table_name = request.table_name;
|
||||
let mut data = HashMap::new();
|
||||
|
||||
// CORRECTED: Process and trim all incoming data values.
|
||||
// We remove the hardcoded validation. We will let the database's
|
||||
// NOT NULL constraints or Steel validation scripts handle required fields.
|
||||
for (key, value) in request.data {
|
||||
data.insert(key, value.trim().to_string());
|
||||
}
|
||||
@@ -243,6 +248,21 @@ pub async fn post_table_data(
|
||||
}
|
||||
};
|
||||
|
||||
// After a successful insert, send a command to the indexer.
|
||||
let command = IndexCommand::AddOrUpdate(IndexCommandData {
|
||||
table_name: table_name.clone(),
|
||||
row_id: inserted_id,
|
||||
});
|
||||
|
||||
if let Err(e) = indexer_tx.send(command).await {
|
||||
// If sending fails, the DB is updated but the index will be stale.
|
||||
// This is a critical situation to log and monitor.
|
||||
error!(
|
||||
"CRITICAL: DB insert for table '{}' (id: {}) succeeded but failed to queue for indexing: {}. Search index is now inconsistent.",
|
||||
table_name, inserted_id, e
|
||||
);
|
||||
}
|
||||
|
||||
Ok(PostTableDataResponse {
|
||||
success: true,
|
||||
message: "Data inserted successfully".into(),
|
||||
|
||||
Reference in New Issue
Block a user