diff --git a/Cargo.lock b/Cargo.lock index f8ebcd6..8100c97 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -303,6 +303,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "bitpacking" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c1d3e2bfd8d06048a179f7b17afc3188effa10385e7b00dc65af6aae732ea92" +dependencies = [ + "crunchy", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -322,6 +331,31 @@ dependencies = [ "cipher", ] +[[package]] +name = "bon" +version = "3.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced38439e7a86a4761f7f7d5ded5ff009135939ecb464a24452eaa4c1696af7d" +dependencies = [ + "bon-macros", + "rustversion", +] + +[[package]] +name = "bon-macros" +version = "3.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce61d2d3844c6b8d31b2353d9f66cf5e632b3e9549583fe3cac2f4f6136725e" +dependencies = [ + "darling", + "ident_case", + "prettyplease", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.100", +] + [[package]] name = "bumpalo" version = "3.17.0" @@ -361,9 +395,17 @@ version = "1.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e3a13707ac958681c13b39b458c073d0d9bc8a22cb1b2f4c8e55eb72c13f362" dependencies = [ + "jobserver", + "libc", "shlex", ] +[[package]] +name = "census" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" + [[package]] name = "cfg-if" version = "1.0.0" @@ -541,6 +583,15 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam" version = "0.8.4" @@ -622,6 +673,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "crunchy" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" + [[package]] name = "crypto-common" version = "0.1.6" @@ -699,6 +756,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" dependencies = [ "powerfmt", + "serde", ] [[package]] @@ -772,6 +830,12 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "downcast-rs" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea8a8b81cacc08888170eef4d13b775126db426d0b348bee9d18c2c1eaf123cf" + [[package]] name = "either" version = "1.15.0" @@ -825,6 +889,12 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "fastdivide" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" + [[package]] name = "fastrand" version = "2.3.0" @@ -884,6 +954,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs4" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8" +dependencies = [ + "rustix 0.38.44", + "windows-sys 0.52.0", +] + [[package]] name = "futures-channel" version = "0.3.31" @@ -1142,6 +1222,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "htmlescape" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" + [[package]] name = "http" version = "1.3.1" @@ -1242,6 +1328,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "hyperloglogplus" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "621debdf94dcac33e50475fdd76d34d5ea9c0362a834b9db08c3024696c1fbe3" +dependencies = [ + "serde", +] + [[package]] name = "iana-time-zone" version = "0.1.63" @@ -1520,6 +1615,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jobserver" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" +dependencies = [ + "getrandom 0.3.2", + "libc", +] + [[package]] name = "js-sys" version = "0.3.77" @@ -1566,6 +1671,12 @@ dependencies = [ "spin", ] +[[package]] +name = "levenshtein_automata" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" + [[package]] name = "libc" version = "0.2.172" @@ -1651,6 +1762,12 @@ dependencies = [ "hashbrown 0.15.2", ] +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" + [[package]] name = "matchit" version = "0.8.4" @@ -1667,18 +1784,42 @@ dependencies = [ "digest", ] +[[package]] +name = "measure_time" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e" +dependencies = [ + "log", +] + [[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memmap2" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" +dependencies = [ + "libc", +] + [[package]] name = "mime" version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.8.8" @@ -1706,6 +1847,12 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" +[[package]] +name = "murmurhash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" + [[package]] name = "native-tls" version = "0.2.14" @@ -1723,6 +1870,16 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -1857,6 +2014,12 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "oneshot" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ce411919553d3f9fa53a0880544cda985a112117a0444d5ff1e870a893d6ea" + [[package]] name = "openssl" version = "0.10.72" @@ -1913,6 +2076,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "ownedbytes" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fbd56f7631767e61784dc43f8580f403f4475bd4aaa4da003e6295e1bab4a7e" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "parking" version = "2.2.1" @@ -2275,6 +2447,16 @@ dependencies = [ "getrandom 0.3.2", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + [[package]] name = "rand_xoshiro" version = "0.6.0" @@ -2305,6 +2487,26 @@ dependencies = [ "unicode-width 0.2.0", ] +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.11" @@ -2444,12 +2646,28 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "rust-stemmers" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] + [[package]] name = "rustc-demangle" version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustc_version" version = "0.4.1" @@ -2521,6 +2739,7 @@ dependencies = [ "prost", "serde", "serde_json", + "tantivy", "tokio", "tonic", "tracing", @@ -2622,6 +2841,7 @@ dependencies = [ "prost", "regex", "rstest", + "search", "serde", "serde_json", "sqlx", @@ -2736,6 +2956,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "sketches-ddsketch" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a" +dependencies = [ + "serde", +] + [[package]] name = "slab" version = "0.4.9" @@ -3176,6 +3405,152 @@ dependencies = [ "syn 2.0.100", ] +[[package]] +name = "tantivy" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2374a21157427c5faff2d90930f035b6c22a5d7b0e5b0b7f522e988ef33c06" +dependencies = [ + "aho-corasick", + "arc-swap", + "base64", + "bitpacking", + "bon", + "byteorder", + "census", + "crc32fast", + "crossbeam-channel", + "downcast-rs", + "fastdivide", + "fnv", + "fs4", + "htmlescape", + "hyperloglogplus", + "itertools 0.14.0", + "levenshtein_automata", + "log", + "lru", + "lz4_flex", + "measure_time", + "memmap2", + "once_cell", + "oneshot", + "rayon", + "regex", + "rust-stemmers", + "rustc-hash", + "serde", + "serde_json", + "sketches-ddsketch", + "smallvec", + "tantivy-bitpacker", + "tantivy-columnar", + "tantivy-common", + "tantivy-fst", + "tantivy-query-grammar", + "tantivy-stacker", + "tantivy-tokenizer-api", + "tempfile", + "thiserror 2.0.12", + "time", + "uuid", + "winapi", +] + +[[package]] +name = "tantivy-bitpacker" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1adc286a39e089ae9938935cd488d7d34f14502544a36607effd2239ff0e2494" +dependencies = [ + "bitpacking", +] + +[[package]] +name = "tantivy-columnar" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6300428e0c104c4f7db6f95b466a6f5c1b9aece094ec57cdd365337908dc7344" +dependencies = [ + "downcast-rs", + "fastdivide", + "itertools 0.14.0", + "serde", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-sstable", + "tantivy-stacker", +] + +[[package]] +name = "tantivy-common" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b6ea6090ce03dc72c27d0619e77185d26cc3b20775966c346c6d4f7e99d7f" +dependencies = [ + "async-trait", + "byteorder", + "ownedbytes", + "serde", + "time", +] + +[[package]] +name = "tantivy-fst" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" +dependencies = [ + "byteorder", + "regex-syntax", + "utf8-ranges", +] + +[[package]] +name = "tantivy-query-grammar" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e810cdeeebca57fc3f7bfec5f85fdbea9031b2ac9b990eb5ff49b371d52bbe6a" +dependencies = [ + "nom", + "serde", + "serde_json", +] + +[[package]] +name = "tantivy-sstable" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "709f22c08a4c90e1b36711c1c6cad5ae21b20b093e535b69b18783dd2cb99416" +dependencies = [ + "futures-util", + "itertools 0.14.0", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-fst", + "zstd", +] + +[[package]] +name = "tantivy-stacker" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bcdebb267671311d1e8891fd9d1301803fdb8ad21ba22e0a30d0cab49ba59c1" +dependencies = [ + "murmurhash32", + "rand_distr", + "tantivy-common", +] + +[[package]] +name = "tantivy-tokenizer-api" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfa942fcee81e213e09715bbce8734ae2180070b97b33839a795ba1de201547d" +dependencies = [ + "serde", +] + [[package]] name = "tempfile" version = "3.19.1" @@ -3662,6 +4037,12 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" +[[package]] +name = "utf8-ranges" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -3864,7 +4245,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] @@ -4228,3 +4609,31 @@ dependencies = [ "quote", "syn 2.0.100", ] + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.15+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/search/Cargo.toml b/search/Cargo.toml index 60e212e..e3eeeb1 100644 --- a/search/Cargo.toml +++ b/search/Cargo.toml @@ -14,3 +14,4 @@ tonic = { workspace = true } tracing = { workspace = true } common = { path = "../common" } +tantivy = "0.24.1" diff --git a/search/src/lib.rs b/search/src/lib.rs index b93cf3f..fa710c6 100644 --- a/search/src/lib.rs +++ b/search/src/lib.rs @@ -1,14 +1,127 @@ -pub fn add(left: u64, right: u64) -> u64 { - left + right -} +// src/lib.rs -#[cfg(test)] -mod tests { - use super::*; +use std::path::Path; +use tantivy::{collector::TopDocs, query::QueryParser, Index, TantivyDocument}; +use tantivy::schema::Value; +use tonic::{transport::Server, Request, Response, Status}; - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); +use common::proto::multieko2::search::{ + search_response::Hit, + searcher_server::{Searcher, SearcherServer}, + SearchRequest, SearchResponse, +}; + +pub struct SearcherService; + +#[tonic::async_trait] +impl Searcher for SearcherService { + async fn search_table( + &self, + request: Request, + ) -> Result, Status> { + let req = request.into_inner(); + let table_name = req.table_name; + let query_str = req.query; + + if query_str.trim().is_empty() { + return Err(Status::invalid_argument("Query cannot be empty")); + } + + // Open the index for this table + let index_path = Path::new("./tantivy_indexes").join(&table_name); + + if !index_path.exists() { + return Err(Status::not_found(format!( + "No search index found for table '{}'", + table_name + ))); + } + + // Open the index + let index = Index::open_in_dir(&index_path).map_err(|e| { + Status::internal(format!("Failed to open index: {}", e)) + })?; + + // Create reader and searcher + let reader = index.reader().map_err(|e| { + Status::internal(format!("Failed to create index reader: {}", e)) + })?; + + let searcher = reader.searcher(); + let schema = index.schema(); + + // Get the fields we need + let all_text_field = match schema.get_field("all_text") { + Ok(field) => field, + Err(_) => { + return Err(Status::internal( + "Schema is missing the 'all_text' field.", + )) + } + }; + + let pg_id_field = match schema.get_field("pg_id") { + Ok(field) => field, + Err(_) => { + return Err(Status::internal( + "Schema is missing the 'pg_id' field.", + )) + } + }; + + // Parse the query + let query_parser = + QueryParser::for_index(&index, vec![all_text_field]); + let query = query_parser.parse_query(&query_str).map_err(|e| { + Status::invalid_argument(format!("Invalid query: {}", e)) + })?; + + // Perform the search + let top_docs = searcher + .search(&query, &TopDocs::with_limit(100)) + .map_err(|e| Status::internal(format!("Search failed: {}", e)))?; + + // Convert results to our response format + let mut hits = Vec::new(); + for (score, doc_address) in top_docs { + // FIX: Add explicit type TantivyDocument for the retrieved doc + let doc: TantivyDocument = searcher.doc(doc_address).map_err( + |e| { + Status::internal(format!( + "Failed to retrieve document: {}", + e + )) + }, + )?; + + // Extract the PostgreSQL ID from the document + if let Some(pg_id_value) = doc.get_first(pg_id_field) { + if let Some(pg_id) = pg_id_value.as_u64() { + hits.push(Hit { + id: pg_id as i64, + score, + }); + } + } + } + + let response = SearchResponse { hits }; + Ok(Response::new(response)) } } + +pub async fn run_search_service( + addr: &str, +) -> Result<(), Box> { + let addr = addr.parse()?; + let searcher_service = SearcherService; + + println!("Search service listening on {}", addr); + + Server::builder() + .add_service(SearcherServer::new(searcher_service)) + .serve(addr) + .await?; + + Ok(()) +} diff --git a/server/Cargo.toml b/server/Cargo.toml index b5d0f90..a9e426e 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -6,6 +6,7 @@ license = "AGPL-3.0-or-later" [dependencies] common = { path = "../common" } +search = { path = "../search" } chrono = { version = "0.4.40", features = ["serde"] } dotenvy = "0.15.7" diff --git a/server/src/bin/manual_indexer.rs b/server/src/bin/manual_indexer.rs new file mode 100644 index 0000000..25ea7ed --- /dev/null +++ b/server/src/bin/manual_indexer.rs @@ -0,0 +1,83 @@ +// In server/src/bin/manual_indexer.rs + +use sqlx::{PgPool, Row}; +use tantivy::schema::*; +use tantivy::{doc, Index}; +use std::path::Path; + +// --- CONFIGURATION --- +// IMPORTANT: Change this to a table name that actually exists and has data in your test DB. +// From your grpcurl output, "2025_test_post" is a good candidate. +const TABLE_TO_INDEX: &str = "2025_test_post2"; +const INDEX_DIR: &str = "./tantivy_indexes"; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + // --- Database Connection --- + // This assumes you have a .env file with DATABASE_URL + dotenvy::dotenv().ok(); + let database_url = std::env::var("DATABASE_URL") + .expect("DATABASE_URL must be set in your .env file"); + let pool = PgPool::connect(&database_url).await?; + println!("Connected to database."); + + // --- Tantivy Schema Definition --- + let mut schema_builder = Schema::builder(); + // This field will store the original Postgres row ID. It's crucial. + schema_builder.add_u64_field("pg_id", INDEXED | STORED); + // This field will contain ALL text data from the row, concatenated. + schema_builder.add_text_field("all_text", TEXT | STORED); + let schema = schema_builder.build(); + + // --- Index Creation --- + let index_path = Path::new(INDEX_DIR).join(TABLE_TO_INDEX); + if index_path.exists() { + println!("Removing existing index at: {}", index_path.display()); + std::fs::remove_dir_all(&index_path)?; + } + std::fs::create_dir_all(&index_path)?; + let index = Index::create_in_dir(&index_path, schema.clone())?; + let mut index_writer = index.writer(100_000_000)?; // 100MB heap + + println!("Indexing table: {}", TABLE_TO_INDEX); + + // --- Data Fetching and Indexing --- + let qualified_table = format!("gen.\"{}\"", TABLE_TO_INDEX); + let query_str = format!("SELECT id, to_jsonb(t) AS data FROM {} t", qualified_table); + let rows = sqlx::query(&query_str).fetch_all(&pool).await?; + + if rows.is_empty() { + println!("Warning: No rows found in table '{}'. Index will be empty.", TABLE_TO_INDEX); + } + + let pg_id_field = schema.get_field("pg_id").unwrap(); + let all_text_field = schema.get_field("all_text").unwrap(); + + for row in &rows { + let id: i64 = row.try_get("id")?; + let data: serde_json::Value = row.try_get("data")?; + + // Concatenate all text values from the JSON into one big string. + let mut full_text = String::new(); + if let Some(obj) = data.as_object() { + for value in obj.values() { + if let Some(s) = value.as_str() { + full_text.push_str(s); + full_text.push(' '); + } + } + } + + // Add the document to Tantivy + index_writer.add_document(doc!( + pg_id_field => id as u64, + all_text_field => full_text + ))?; + } + + // --- Finalize --- + index_writer.commit()?; + println!("Successfully indexed {} documents into '{}'", rows.len(), index_path.display()); + + Ok(()) +} diff --git a/server/src/bin/mod.rs b/server/src/bin/mod.rs new file mode 100644 index 0000000..740a433 --- /dev/null +++ b/server/src/bin/mod.rs @@ -0,0 +1,3 @@ +// src/bin/mod.rs + +pub mod manual_indexer; diff --git a/server/src/server/run.rs b/server/src/server/run.rs index 38ddc62..7e42e1b 100644 --- a/server/src/server/run.rs +++ b/server/src/server/run.rs @@ -21,10 +21,22 @@ use common::proto::multieko2::{ table_script::table_script_server::TableScriptServer, auth::auth_service_server::AuthServiceServer }; +use search::run_search_service; pub async fn run_server(db_pool: sqlx::PgPool) -> Result<(), Box> { // Initialize JWT for authentication crate::auth::logic::jwt::init_jwt()?; + + // ==================== SEARCH SERVER SETUP ================== + let search_addr = "[::1]:50052".to_string(); + println!("Spawning Search Service on {}", search_addr); + + tokio::spawn(async move { + if let Err(e) = run_search_service(&search_addr).await { + eprintln!("[Error] Search service failed to start: {}", e); + } + }); + // ============================================================ let addr = "[::1]:50051".parse()?;