json in the otput of the tantivy
This commit is contained in:
@@ -1,12 +1,13 @@
|
||||
// search/src/lib.rs
|
||||
// src/lib.rs
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use tantivy::collector::TopDocs;
|
||||
use tantivy::query::{
|
||||
BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, Query, QueryParser,
|
||||
TermQuery,
|
||||
};
|
||||
use tantivy::schema::IndexRecordOption;
|
||||
use tantivy::schema::{IndexRecordOption, Value};
|
||||
use tantivy::{Index, TantivyDocument, Term};
|
||||
use tonic::{Request, Response, Status};
|
||||
|
||||
@@ -16,12 +17,16 @@ use common::proto::multieko2::search::{
|
||||
pub use common::proto::multieko2::search::searcher_server::SearcherServer;
|
||||
use common::proto::multieko2::search::searcher_server::Searcher;
|
||||
use common::search::register_slovak_tokenizers;
|
||||
use tantivy::schema::Value;
|
||||
use sqlx::{PgPool, Row}; // <-- Import PgPool and Row
|
||||
|
||||
pub struct SearcherService;
|
||||
// We need to hold the database pool in our service struct.
|
||||
pub struct SearcherService {
|
||||
pub pool: PgPool,
|
||||
}
|
||||
|
||||
// Normalize diacritics in queries
|
||||
// Normalize diacritics in queries (no changes here)
|
||||
fn normalize_slovak_text(text: &str) -> String {
|
||||
// ... function content is unchanged ...
|
||||
text.chars()
|
||||
.map(|c| match c {
|
||||
'á' | 'à' | 'â' | 'ä' | 'ă' | 'ā' => 'a',
|
||||
@@ -105,15 +110,17 @@ impl Searcher for SearcherService {
|
||||
Status::internal("Schema is missing the 'pg_id' field.")
|
||||
})?;
|
||||
|
||||
// --- Query Building Logic (no changes here) ---
|
||||
let prefix_edge_field = schema.get_field("prefix_edge").unwrap();
|
||||
let prefix_full_field = schema.get_field("prefix_full").unwrap();
|
||||
let text_ngram_field = schema.get_field("text_ngram").unwrap();
|
||||
let normalized_query = normalize_slovak_text(&query_str);
|
||||
let words: Vec<&str> = normalized_query.split_whitespace().collect();
|
||||
|
||||
if words.is_empty() {
|
||||
return Ok(Response::new(SearchResponse { hits: vec![] }));
|
||||
}
|
||||
|
||||
let mut query_layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
|
||||
// ... all your query building layers remain exactly the same ...
|
||||
// ===============================
|
||||
// LAYER 1: PREFIX MATCHING (HIGHEST PRIORITY, Boost: 4.0)
|
||||
// ===============================
|
||||
@@ -190,31 +197,73 @@ impl Searcher for SearcherService {
|
||||
query_layers.push((Occur::Should, Box::new(boosted_query)));
|
||||
}
|
||||
}
|
||||
|
||||
let master_query = BooleanQuery::new(query_layers);
|
||||
// --- End of Query Building Logic ---
|
||||
|
||||
let top_docs = searcher
|
||||
.search(&master_query, &TopDocs::with_limit(100))
|
||||
.map_err(|e| Status::internal(format!("Search failed: {}", e)))?;
|
||||
|
||||
let mut hits = Vec::new();
|
||||
if top_docs.is_empty() {
|
||||
return Ok(Response::new(SearchResponse { hits: vec![] }));
|
||||
}
|
||||
|
||||
// --- NEW LOGIC: Fetch from DB and combine results ---
|
||||
|
||||
// Step 1: Extract (score, pg_id) from Tantivy results.
|
||||
let mut scored_ids: Vec<(f32, u64)> = Vec::new();
|
||||
for (score, doc_address) in top_docs {
|
||||
let doc: TantivyDocument = searcher.doc(doc_address).map_err(|e| {
|
||||
Status::internal(format!("Failed to retrieve document: {}", e))
|
||||
})?;
|
||||
|
||||
if let Some(pg_id_value) = doc.get_first(pg_id_field) {
|
||||
if let Some(pg_id) = pg_id_value.as_u64() {
|
||||
hits.push(Hit {
|
||||
id: pg_id as i64,
|
||||
score,
|
||||
});
|
||||
scored_ids.push((score, pg_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Fetch all corresponding rows from Postgres in a single query.
|
||||
let pg_ids: Vec<i64> =
|
||||
scored_ids.iter().map(|(_, id)| *id as i64).collect();
|
||||
let qualified_table = format!("gen.\"{}\"", table_name);
|
||||
let query_str = format!(
|
||||
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE id = ANY($1)",
|
||||
qualified_table
|
||||
);
|
||||
|
||||
let rows = sqlx::query(&query_str)
|
||||
.bind(&pg_ids)
|
||||
.fetch_all(&self.pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
Status::internal(format!("Database query failed: {}", e))
|
||||
})?;
|
||||
|
||||
// Step 3: Map the database results by ID for quick lookup.
|
||||
let mut content_map: HashMap<i64, String> = HashMap::new();
|
||||
for row in rows {
|
||||
let id: i64 = row.try_get("id").unwrap_or(0);
|
||||
let json_data: serde_json::Value =
|
||||
row.try_get("data").unwrap_or(serde_json::Value::Null);
|
||||
content_map.insert(id, json_data.to_string());
|
||||
}
|
||||
|
||||
// Step 4: Build the final response, combining Tantivy scores with PG content.
|
||||
let hits: Vec<Hit> = scored_ids
|
||||
.into_iter()
|
||||
.filter_map(|(score, pg_id)| {
|
||||
content_map
|
||||
.get(&(pg_id as i64))
|
||||
.map(|content_json| Hit {
|
||||
id: pg_id as i64,
|
||||
score,
|
||||
content_json: content_json.clone(),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
let response = SearchResponse { hits };
|
||||
Ok(Response::new(response))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user