// src/lib.rs use std::collections::HashMap; use std::path::Path; use tantivy::collector::TopDocs; use tantivy::query::{ BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, Query, QueryParser, TermQuery, }; use tantivy::schema::{IndexRecordOption, Value}; use tantivy::{Index, TantivyDocument, Term}; use tonic::{Request, Response, Status}; use common::proto::komp_ac::search::{ search_response::Hit, SearchRequest, SearchResponse, }; pub use common::proto::komp_ac::search::searcher_server::SearcherServer; use common::proto::komp_ac::search::searcher_server::Searcher; use common::search::register_slovak_tokenizers; use sqlx::{PgPool, Row}; use tracing::info; // We need to hold the database pool in our service struct. pub struct SearcherService { pub pool: PgPool, } // normalize_slovak_text function remains unchanged... fn normalize_slovak_text(text: &str) -> String { // ... function content is unchanged ... text.chars() .map(|c| match c { 'á' | 'à' | 'â' | 'ä' | 'ă' | 'ā' => 'a', 'Á' | 'À' | 'Â' | 'Ä' | 'Ă' | 'Ā' => 'A', 'é' | 'è' | 'ê' | 'ë' | 'ě' | 'ē' => 'e', 'É' | 'È' | 'Ê' | 'Ë' | 'Ě' | 'Ē' => 'E', 'í' | 'ì' | 'î' | 'ï' | 'ī' => 'i', 'Í' | 'Ì' | 'Î' | 'Ï' | 'Ī' => 'I', 'ó' | 'ò' | 'ô' | 'ö' | 'ō' | 'ő' => 'o', 'Ó' | 'Ò' | 'Ô' | 'Ö' | 'Ō' | 'Ő' => 'O', 'ú' | 'ù' | 'û' | 'ü' | 'ū' | 'ű' => 'u', 'Ú' | 'Ù' | 'Û' | 'Ü' | 'Ū' | 'Ű' => 'U', 'ý' | 'ỳ' | 'ŷ' | 'ÿ' => 'y', 'Ý' | 'Ỳ' | 'Ŷ' | 'Ÿ' => 'Y', 'č' => 'c', 'Č' => 'C', 'ď' => 'd', 'Ď' => 'D', 'ľ' => 'l', 'Ľ' => 'L', 'ň' => 'n', 'Ň' => 'N', 'ř' => 'r', 'Ř' => 'R', 'š' => 's', 'Š' => 'S', 'ť' => 't', 'Ť' => 'T', 'ž' => 'z', 'Ž' => 'Z', _ => c, }) .collect() } #[tonic::async_trait] impl Searcher for SearcherService { async fn search_table( &self, request: Request, ) -> Result, Status> { let req = request.into_inner(); let table_name = req.table_name; let query_str = req.query; // --- MODIFIED LOGIC --- // If the query is empty, fetch the 5 most recent records. if query_str.trim().is_empty() { info!( "Empty query for table '{}'. Fetching default results.", table_name ); let qualified_table = format!("gen.\"{}\"", table_name); let sql = format!( "SELECT id, to_jsonb(t) AS data FROM {} t ORDER BY id DESC LIMIT 5", qualified_table ); let rows = sqlx::query(&sql) .fetch_all(&self.pool) .await .map_err(|e| { Status::internal(format!( "DB query for default results failed: {}", e )) })?; let hits: Vec = rows .into_iter() .map(|row| { let id: i64 = row.try_get("id").unwrap_or_default(); let json_data: serde_json::Value = row.try_get("data").unwrap_or_default(); Hit { id, // Score is 0.0 as this is not a relevance-ranked search score: 0.0, content_json: json_data.to_string(), } }) .collect(); info!("--- SERVER: Successfully processed empty query. Returning {} default hits. ---", hits.len()); return Ok(Response::new(SearchResponse { hits })); } // --- END OF MODIFIED LOGIC --- let index_path = Path::new("./tantivy_indexes").join(&table_name); if !index_path.exists() { return Err(Status::not_found(format!( "No search index found for table '{}'", table_name ))); } let index = Index::open_in_dir(&index_path) .map_err(|e| Status::internal(format!("Failed to open index: {}", e)))?; register_slovak_tokenizers(&index).map_err(|e| { Status::internal(format!("Failed to register Slovak tokenizers: {}", e)) })?; let reader = index.reader().map_err(|e| { Status::internal(format!("Failed to create index reader: {}", e)) })?; let searcher = reader.searcher(); let schema = index.schema(); let pg_id_field = schema.get_field("pg_id").map_err(|_| { Status::internal("Schema is missing the 'pg_id' field.") })?; // --- Query Building Logic (no changes here) --- let prefix_edge_field = schema.get_field("prefix_edge").unwrap(); let prefix_full_field = schema.get_field("prefix_full").unwrap(); let text_ngram_field = schema.get_field("text_ngram").unwrap(); let normalized_query = normalize_slovak_text(&query_str); let words: Vec<&str> = normalized_query.split_whitespace().collect(); if words.is_empty() { return Ok(Response::new(SearchResponse { hits: vec![] })); } let mut query_layers: Vec<(Occur, Box)> = Vec::new(); // ... all your query building layers remain exactly the same ... // =============================== // LAYER 1: PREFIX MATCHING (HIGHEST PRIORITY, Boost: 4.0) // =============================== { let mut must_clauses: Vec<(Occur, Box)> = Vec::new(); for word in &words { let edge_term = Term::from_field_text(prefix_edge_field, word); let full_term = Term::from_field_text(prefix_full_field, word); let per_word_query = BooleanQuery::new(vec![ ( Occur::Should, Box::new(TermQuery::new( edge_term, IndexRecordOption::Basic, )), ), ( Occur::Should, Box::new(TermQuery::new( full_term, IndexRecordOption::Basic, )), ), ]); must_clauses.push((Occur::Must, Box::new(per_word_query) as Box)); } if !must_clauses.is_empty() { let prefix_query = BooleanQuery::new(must_clauses); let boosted_query = BoostQuery::new(Box::new(prefix_query), 4.0); query_layers.push((Occur::Should, Box::new(boosted_query))); } } // =============================== // LAYER 2: FUZZY MATCHING (HIGH PRIORITY, Boost: 3.0) // =============================== { let last_word = words.last().unwrap(); let fuzzy_term = Term::from_field_text(prefix_full_field, last_word); let fuzzy_query = FuzzyTermQuery::new(fuzzy_term, 2, true); let boosted_query = BoostQuery::new(Box::new(fuzzy_query), 3.0); query_layers.push((Occur::Should, Box::new(boosted_query))); } // =============================== // LAYER 3: PHRASE MATCHING WITH SLOP (MEDIUM PRIORITY, Boost: 2.0) // =============================== if words.len() > 1 { let slop_parser = QueryParser::for_index(&index, vec![prefix_full_field]); let slop_query_str = format!("\"{}\"~3", normalized_query); if let Ok(slop_query) = slop_parser.parse_query(&slop_query_str) { let boosted_query = BoostQuery::new(slop_query, 2.0); query_layers.push((Occur::Should, Box::new(boosted_query))); } } // =============================== // LAYER 4: NGRAM SUBSTRING MATCHING (LOWEST PRIORITY, Boost: 1.0) // =============================== { let ngram_parser = QueryParser::for_index(&index, vec![text_ngram_field]); if let Ok(ngram_query) = ngram_parser.parse_query(&normalized_query) { let boosted_query = BoostQuery::new(ngram_query, 1.0); query_layers.push((Occur::Should, Box::new(boosted_query))); } } let master_query = BooleanQuery::new(query_layers); // --- End of Query Building Logic --- let top_docs = searcher .search(&master_query, &TopDocs::with_limit(100)) .map_err(|e| Status::internal(format!("Search failed: {}", e)))?; if top_docs.is_empty() { return Ok(Response::new(SearchResponse { hits: vec![] })); } // --- NEW LOGIC: Fetch from DB and combine results --- // Step 1: Extract (score, pg_id) from Tantivy results. let mut scored_ids: Vec<(f32, u64)> = Vec::new(); for (score, doc_address) in top_docs { let doc: TantivyDocument = searcher.doc(doc_address).map_err(|e| { Status::internal(format!("Failed to retrieve document: {}", e)) })?; if let Some(pg_id_value) = doc.get_first(pg_id_field) { if let Some(pg_id) = pg_id_value.as_u64() { scored_ids.push((score, pg_id)); } } } // Step 2: Fetch all corresponding rows from Postgres in a single query. let pg_ids: Vec = scored_ids.iter().map(|(_, id)| *id as i64).collect(); let qualified_table = format!("gen.\"{}\"", table_name); let query_str = format!( "SELECT id, to_jsonb(t) AS data FROM {} t WHERE id = ANY($1)", qualified_table ); let rows = sqlx::query(&query_str) .bind(&pg_ids) .fetch_all(&self.pool) .await .map_err(|e| { Status::internal(format!("Database query failed: {}", e)) })?; // Step 3: Map the database results by ID for quick lookup. let mut content_map: HashMap = HashMap::new(); for row in rows { let id: i64 = row.try_get("id").unwrap_or(0); let json_data: serde_json::Value = row.try_get("data").unwrap_or(serde_json::Value::Null); content_map.insert(id, json_data.to_string()); } // Step 4: Build the final response, combining Tantivy scores with PG content. let hits: Vec = scored_ids .into_iter() .filter_map(|(score, pg_id)| { content_map .get(&(pg_id as i64)) .map(|content_json| Hit { id: pg_id as i64, score, content_json: content_json.clone(), }) }) .collect(); info!("--- SERVER: Successfully processed search. Returning {} hits. ---", hits.len()); let response = SearchResponse { hits }; Ok(Response::new(response)) } }