refactoring search based on the profile
This commit is contained in:
10
Cargo.lock
generated
10
Cargo.lock
generated
@@ -493,7 +493,7 @@ checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
|
||||
|
||||
[[package]]
|
||||
name = "canvas"
|
||||
version = "0.6.2"
|
||||
version = "0.6.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -585,7 +585,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.6.2"
|
||||
version = "0.6.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -641,7 +641,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common"
|
||||
version = "0.6.2"
|
||||
version = "0.6.3"
|
||||
dependencies = [
|
||||
"prost 0.13.5",
|
||||
"prost-build 0.14.1",
|
||||
@@ -3116,7 +3116,7 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
|
||||
|
||||
[[package]]
|
||||
name = "search"
|
||||
version = "0.6.2"
|
||||
version = "0.6.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"common",
|
||||
@@ -3215,7 +3215,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "server"
|
||||
version = "0.6.2"
|
||||
version = "0.6.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bcrypt",
|
||||
|
||||
@@ -7,14 +7,16 @@ service Searcher {
|
||||
}
|
||||
|
||||
message SearchRequest {
|
||||
string table_name = 1;
|
||||
optional string table_name = 1;
|
||||
string query = 2;
|
||||
string profile_name = 3;
|
||||
}
|
||||
message SearchResponse {
|
||||
message Hit {
|
||||
int64 id = 1; // PostgreSQL row ID
|
||||
float score = 2;
|
||||
string content_json = 3;
|
||||
string table_name = 4;
|
||||
}
|
||||
repeated Hit hits = 1;
|
||||
}
|
||||
|
||||
Binary file not shown.
@@ -1,10 +1,12 @@
|
||||
// This file is @generated by prost-build.
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct SearchRequest {
|
||||
#[prost(string, tag = "1")]
|
||||
pub table_name: ::prost::alloc::string::String,
|
||||
#[prost(string, optional, tag = "1")]
|
||||
pub table_name: ::core::option::Option<::prost::alloc::string::String>,
|
||||
#[prost(string, tag = "2")]
|
||||
pub query: ::prost::alloc::string::String,
|
||||
#[prost(string, tag = "3")]
|
||||
pub profile_name: ::prost::alloc::string::String,
|
||||
}
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct SearchResponse {
|
||||
@@ -22,6 +24,8 @@ pub mod search_response {
|
||||
pub score: f32,
|
||||
#[prost(string, tag = "3")]
|
||||
pub content_json: ::prost::alloc::string::String,
|
||||
#[prost(string, tag = "4")]
|
||||
pub table_name: ::prost::alloc::string::String,
|
||||
}
|
||||
}
|
||||
/// Generated client implementations.
|
||||
|
||||
@@ -1,16 +1,22 @@
|
||||
// common/src/search.rs
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use tantivy::schema::*;
|
||||
use tantivy::tokenizer::*;
|
||||
use tantivy::Index;
|
||||
|
||||
/// Returns the on-disk path for a profile/table search index.
|
||||
pub fn search_index_path(root: &Path, profile_name: &str, table_name: &str) -> PathBuf {
|
||||
root.join(profile_name).join(table_name)
|
||||
}
|
||||
|
||||
/// Creates a hybrid Slovak search schema with optimized prefix fields.
|
||||
pub fn create_search_schema() -> Schema {
|
||||
let mut schema_builder = Schema::builder();
|
||||
|
||||
schema_builder.add_u64_field("pg_id", INDEXED | STORED);
|
||||
|
||||
// FIELD 1: For prefixes (1-4 chars).
|
||||
// For prefixes (1-4 chars).
|
||||
let short_prefix_indexing = TextFieldIndexing::default()
|
||||
.set_tokenizer("slovak_prefix_edge")
|
||||
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
|
||||
@@ -19,7 +25,7 @@ pub fn create_search_schema() -> Schema {
|
||||
.set_stored();
|
||||
schema_builder.add_text_field("prefix_edge", short_prefix_options);
|
||||
|
||||
// FIELD 2: For the full word.
|
||||
// For the full word.
|
||||
let full_word_indexing = TextFieldIndexing::default()
|
||||
.set_tokenizer("slovak_prefix_full")
|
||||
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
|
||||
|
||||
1
search/.gitignore
vendored
Normal file
1
search/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
.codex
|
||||
@@ -1,7 +1,6 @@
|
||||
// src/lib.rs
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use tantivy::collector::TopDocs;
|
||||
use tantivy::query::{
|
||||
BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, Query, QueryParser, TermQuery,
|
||||
@@ -11,20 +10,27 @@ use tantivy::{Index, TantivyDocument, Term};
|
||||
use tonic::{Request, Response, Status};
|
||||
|
||||
use common::proto::komp_ac::search::searcher_server::Searcher;
|
||||
pub use common::proto::komp_ac::search::searcher_server::SearcherServer;
|
||||
use common::proto::komp_ac::search::{search_response::Hit, SearchRequest, SearchResponse};
|
||||
use common::search::register_slovak_tokenizers;
|
||||
pub use common::proto::komp_ac::search::searcher_server::SearcherServer;
|
||||
use common::search::{register_slovak_tokenizers, search_index_path};
|
||||
use sqlx::{PgPool, Row};
|
||||
use tracing::info;
|
||||
|
||||
// We need to hold the database pool in our service struct.
|
||||
const INDEX_ROOT: &str = "./tantivy_indexes";
|
||||
const DEFAULT_RESULT_LIMIT: usize = 5;
|
||||
const SEARCH_RESULT_LIMIT: usize = 100;
|
||||
|
||||
pub struct SearcherService {
|
||||
pub pool: PgPool,
|
||||
}
|
||||
|
||||
// normalize_slovak_text function remains unchanged...
|
||||
struct SearchTarget {
|
||||
table_name: String,
|
||||
qualified_table: String,
|
||||
index_path: PathBuf,
|
||||
}
|
||||
|
||||
fn normalize_slovak_text(text: &str) -> String {
|
||||
// ... function content is unchanged ...
|
||||
text.chars()
|
||||
.map(|c| match c {
|
||||
'á' | 'à' | 'â' | 'ä' | 'ă' | 'ā' => 'a',
|
||||
@@ -60,94 +66,125 @@ fn normalize_slovak_text(text: &str) -> String {
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl Searcher for SearcherService {
|
||||
async fn search_table(
|
||||
&self,
|
||||
request: Request<SearchRequest>,
|
||||
) -> Result<Response<SearchResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
let table_name = req.table_name;
|
||||
let query_str = req.query;
|
||||
fn validate_identifier(value: &str, field_name: &str) -> Result<(), Status> {
|
||||
let mut chars = value.chars();
|
||||
let Some(first) = chars.next() else {
|
||||
return Err(Status::invalid_argument(format!(
|
||||
"{field_name} must not be empty"
|
||||
)));
|
||||
};
|
||||
|
||||
// --- MODIFIED LOGIC ---
|
||||
// If the query is empty, fetch the 5 most recent records.
|
||||
if query_str.trim().is_empty() {
|
||||
info!(
|
||||
"Empty query for table '{}'. Fetching default results.",
|
||||
table_name
|
||||
);
|
||||
let qualified_table = format!("gen.\"{}\"", table_name);
|
||||
let sql = format!(
|
||||
"SELECT id, to_jsonb(t) AS data FROM {} t ORDER BY id DESC LIMIT 5",
|
||||
qualified_table
|
||||
);
|
||||
|
||||
let rows = sqlx::query(&sql).fetch_all(&self.pool).await.map_err(|e| {
|
||||
Status::internal(format!("DB query for default results failed: {}", e))
|
||||
})?;
|
||||
|
||||
let hits: Vec<Hit> = rows
|
||||
.into_iter()
|
||||
.map(|row| {
|
||||
let id: i64 = row.try_get("id").unwrap_or_default();
|
||||
let json_data: serde_json::Value = row.try_get("data").unwrap_or_default();
|
||||
Hit {
|
||||
id,
|
||||
// Score is 0.0 as this is not a relevance-ranked search
|
||||
score: 0.0,
|
||||
content_json: json_data.to_string(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
info!(
|
||||
"--- SERVER: Successfully processed empty query. Returning {} default hits. ---",
|
||||
hits.len()
|
||||
);
|
||||
return Ok(Response::new(SearchResponse { hits }));
|
||||
}
|
||||
// --- END OF MODIFIED LOGIC ---
|
||||
|
||||
let index_path = Path::new("./tantivy_indexes").join(&table_name);
|
||||
if !index_path.exists() {
|
||||
return Err(Status::not_found(format!(
|
||||
"No search index found for table '{}'",
|
||||
table_name
|
||||
if !(first.is_ascii_alphabetic() || first == '_')
|
||||
|| !chars.all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
|
||||
{
|
||||
return Err(Status::invalid_argument(format!(
|
||||
"{field_name} contains invalid characters"
|
||||
)));
|
||||
}
|
||||
|
||||
let index = Index::open_in_dir(&index_path)
|
||||
.map_err(|e| Status::internal(format!("Failed to open index: {}", e)))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
register_slovak_tokenizers(&index).map_err(|e| {
|
||||
Status::internal(format!("Failed to register Slovak tokenizers: {}", e))
|
||||
fn qualify_profile_table(profile_name: &str, table_name: &str) -> String {
|
||||
format!("\"{}\".\"{}\"", profile_name, table_name)
|
||||
}
|
||||
|
||||
async fn profile_exists(pool: &PgPool, profile_name: &str) -> Result<bool, Status> {
|
||||
let exists = sqlx::query_scalar::<_, bool>("SELECT EXISTS(SELECT 1 FROM schemas WHERE name = $1)")
|
||||
.bind(profile_name)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("Profile lookup failed: {}", e)))?;
|
||||
Ok(exists)
|
||||
}
|
||||
|
||||
// Scope resolution
|
||||
async fn resolve_search_targets(
|
||||
pool: &PgPool,
|
||||
profile_name: &str,
|
||||
requested_table: Option<&str>,
|
||||
) -> Result<Vec<SearchTarget>, Status> {
|
||||
validate_identifier(profile_name, "profile_name")?;
|
||||
|
||||
if !profile_exists(pool, profile_name).await? {
|
||||
return Err(Status::not_found(format!(
|
||||
"Profile '{}' was not found",
|
||||
profile_name
|
||||
)));
|
||||
}
|
||||
|
||||
let tables = if let Some(table_name) = requested_table.filter(|value| !value.trim().is_empty()) {
|
||||
validate_identifier(table_name, "table_name")?;
|
||||
|
||||
let row = sqlx::query_scalar::<_, String>(
|
||||
r#"
|
||||
SELECT td.table_name
|
||||
FROM table_definitions td
|
||||
JOIN schemas s ON td.schema_id = s.id
|
||||
WHERE s.name = $1 AND td.table_name = $2
|
||||
"#,
|
||||
)
|
||||
.bind(profile_name)
|
||||
.bind(table_name)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("Table lookup failed: {}", e)))?;
|
||||
|
||||
let table_name = row.ok_or_else(|| {
|
||||
Status::not_found(format!(
|
||||
"Table '{}' was not found in profile '{}'",
|
||||
table_name, profile_name
|
||||
))
|
||||
})?;
|
||||
|
||||
let reader = index
|
||||
.reader()
|
||||
.map_err(|e| Status::internal(format!("Failed to create index reader: {}", e)))?;
|
||||
let searcher = reader.searcher();
|
||||
vec![table_name]
|
||||
} else {
|
||||
sqlx::query_scalar::<_, String>(
|
||||
r#"
|
||||
SELECT td.table_name
|
||||
FROM table_definitions td
|
||||
JOIN schemas s ON td.schema_id = s.id
|
||||
WHERE s.name = $1
|
||||
ORDER BY td.table_name
|
||||
"#,
|
||||
)
|
||||
.bind(profile_name)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("Profile table lookup failed: {}", e)))?
|
||||
};
|
||||
|
||||
Ok(tables
|
||||
.into_iter()
|
||||
.map(|table_name| SearchTarget {
|
||||
qualified_table: qualify_profile_table(profile_name, &table_name),
|
||||
index_path: search_index_path(Path::new(INDEX_ROOT), profile_name, &table_name),
|
||||
table_name,
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
// Query building
|
||||
fn build_query(index: &Index, normalized_query: &str) -> Result<Option<BooleanQuery>, Status> {
|
||||
let schema = index.schema();
|
||||
let prefix_edge_field = schema
|
||||
.get_field("prefix_edge")
|
||||
.map_err(|_| Status::internal("Schema is missing the 'prefix_edge' field."))?;
|
||||
let prefix_full_field = schema
|
||||
.get_field("prefix_full")
|
||||
.map_err(|_| Status::internal("Schema is missing the 'prefix_full' field."))?;
|
||||
let text_ngram_field = schema
|
||||
.get_field("text_ngram")
|
||||
.map_err(|_| Status::internal("Schema is missing the 'text_ngram' field."))?;
|
||||
|
||||
let pg_id_field = schema
|
||||
.get_field("pg_id")
|
||||
.map_err(|_| Status::internal("Schema is missing the 'pg_id' field."))?;
|
||||
|
||||
// --- Query Building Logic (no changes here) ---
|
||||
let prefix_edge_field = schema.get_field("prefix_edge").unwrap();
|
||||
let prefix_full_field = schema.get_field("prefix_full").unwrap();
|
||||
let text_ngram_field = schema.get_field("text_ngram").unwrap();
|
||||
let normalized_query = normalize_slovak_text(&query_str);
|
||||
let words: Vec<&str> = normalized_query.split_whitespace().collect();
|
||||
if words.is_empty() {
|
||||
return Ok(Response::new(SearchResponse { hits: vec![] }));
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let mut query_layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
// ... all your query building layers remain exactly the same ...
|
||||
// ===============================
|
||||
// LAYER 1: PREFIX MATCHING (HIGHEST PRIORITY, Boost: 4.0)
|
||||
// ===============================
|
||||
|
||||
// Layer 1: prefix
|
||||
{
|
||||
let mut must_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
for word in &words {
|
||||
@@ -164,7 +201,7 @@ impl Searcher for SearcherService {
|
||||
Box::new(TermQuery::new(full_term, IndexRecordOption::Basic)),
|
||||
),
|
||||
]);
|
||||
must_clauses.push((Occur::Must, Box::new(per_word_query) as Box<dyn Query>));
|
||||
must_clauses.push((Occur::Must, Box::new(per_word_query)));
|
||||
}
|
||||
|
||||
if !must_clauses.is_empty() {
|
||||
@@ -174,22 +211,20 @@ impl Searcher for SearcherService {
|
||||
}
|
||||
}
|
||||
|
||||
// ===============================
|
||||
// LAYER 2: FUZZY MATCHING (HIGH PRIORITY, Boost: 3.0)
|
||||
// ===============================
|
||||
// Layer 2: fuzzy
|
||||
{
|
||||
let last_word = words.last().unwrap();
|
||||
let last_word = words
|
||||
.last()
|
||||
.ok_or_else(|| Status::internal("Query normalization lost all tokens"))?;
|
||||
let fuzzy_term = Term::from_field_text(prefix_full_field, last_word);
|
||||
let fuzzy_query = FuzzyTermQuery::new(fuzzy_term, 2, true);
|
||||
let boosted_query = BoostQuery::new(Box::new(fuzzy_query), 3.0);
|
||||
query_layers.push((Occur::Should, Box::new(boosted_query)));
|
||||
}
|
||||
|
||||
// ===============================
|
||||
// LAYER 3: PHRASE MATCHING WITH SLOP (MEDIUM PRIORITY, Boost: 2.0)
|
||||
// ===============================
|
||||
// Layer 3: phrase
|
||||
if words.len() > 1 {
|
||||
let slop_parser = QueryParser::for_index(&index, vec![prefix_full_field]);
|
||||
let slop_parser = QueryParser::for_index(index, vec![prefix_full_field]);
|
||||
let slop_query_str = format!("\"{}\"~3", normalized_query);
|
||||
if let Ok(slop_query) = slop_parser.parse_query(&slop_query_str) {
|
||||
let boosted_query = BoostQuery::new(slop_query, 2.0);
|
||||
@@ -197,30 +232,81 @@ impl Searcher for SearcherService {
|
||||
}
|
||||
}
|
||||
|
||||
// ===============================
|
||||
// LAYER 4: NGRAM SUBSTRING MATCHING (LOWEST PRIORITY, Boost: 1.0)
|
||||
// ===============================
|
||||
// Layer 4: ngram
|
||||
{
|
||||
let ngram_parser = QueryParser::for_index(&index, vec![text_ngram_field]);
|
||||
if let Ok(ngram_query) = ngram_parser.parse_query(&normalized_query) {
|
||||
let ngram_parser = QueryParser::for_index(index, vec![text_ngram_field]);
|
||||
if let Ok(ngram_query) = ngram_parser.parse_query(normalized_query) {
|
||||
let boosted_query = BoostQuery::new(ngram_query, 1.0);
|
||||
query_layers.push((Occur::Should, Box::new(boosted_query)));
|
||||
}
|
||||
}
|
||||
let master_query = BooleanQuery::new(query_layers);
|
||||
// --- End of Query Building Logic ---
|
||||
|
||||
Ok(Some(BooleanQuery::new(query_layers)))
|
||||
}
|
||||
|
||||
// Empty query
|
||||
async fn fetch_default_hits(pool: &PgPool, target: &SearchTarget) -> Result<Vec<Hit>, Status> {
|
||||
let sql = format!(
|
||||
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE deleted = FALSE ORDER BY id DESC LIMIT {}",
|
||||
target.qualified_table, DEFAULT_RESULT_LIMIT
|
||||
);
|
||||
|
||||
let rows = sqlx::query(&sql)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("DB query for default results failed: {}", e)))?;
|
||||
|
||||
Ok(rows
|
||||
.into_iter()
|
||||
.map(|row| {
|
||||
let id: i64 = row.try_get("id").unwrap_or_default();
|
||||
let json_data: serde_json::Value = row.try_get("data").unwrap_or_default();
|
||||
Hit {
|
||||
id,
|
||||
score: 0.0,
|
||||
content_json: json_data.to_string(),
|
||||
table_name: target.table_name.clone(),
|
||||
}
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
// Search + hydrate
|
||||
async fn search_target(
|
||||
pool: &PgPool,
|
||||
target: &SearchTarget,
|
||||
query_str: &str,
|
||||
) -> Result<Vec<Hit>, Status> {
|
||||
if !target.index_path.exists() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let index = Index::open_in_dir(&target.index_path)
|
||||
.map_err(|e| Status::internal(format!("Failed to open index: {}", e)))?;
|
||||
register_slovak_tokenizers(&index)
|
||||
.map_err(|e| Status::internal(format!("Failed to register Slovak tokenizers: {}", e)))?;
|
||||
|
||||
let Some(master_query) = build_query(&index, &normalize_slovak_text(query_str))? else {
|
||||
return Ok(vec![]);
|
||||
};
|
||||
|
||||
let reader = index
|
||||
.reader()
|
||||
.map_err(|e| Status::internal(format!("Failed to create index reader: {}", e)))?;
|
||||
let searcher = reader.searcher();
|
||||
let schema = index.schema();
|
||||
let pg_id_field = schema
|
||||
.get_field("pg_id")
|
||||
.map_err(|_| Status::internal("Schema is missing the 'pg_id' field."))?;
|
||||
|
||||
let top_docs = searcher
|
||||
.search(&master_query, &TopDocs::with_limit(100))
|
||||
.search(&master_query, &TopDocs::with_limit(SEARCH_RESULT_LIMIT))
|
||||
.map_err(|e| Status::internal(format!("Search failed: {}", e)))?;
|
||||
|
||||
if top_docs.is_empty() {
|
||||
return Ok(Response::new(SearchResponse { hits: vec![] }));
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
// --- NEW LOGIC: Fetch from DB and combine results ---
|
||||
|
||||
// Step 1: Extract (score, pg_id) from Tantivy results.
|
||||
let mut scored_ids: Vec<(f32, u64)> = Vec::new();
|
||||
for (score, doc_address) in top_docs {
|
||||
let doc: TantivyDocument = searcher
|
||||
@@ -233,47 +319,106 @@ impl Searcher for SearcherService {
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Fetch all corresponding rows from Postgres in a single query.
|
||||
if scored_ids.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let pg_ids: Vec<i64> = scored_ids.iter().map(|(_, id)| *id as i64).collect();
|
||||
let qualified_table = format!("gen.\"{}\"", table_name);
|
||||
let query_str = format!(
|
||||
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE id = ANY($1)",
|
||||
qualified_table
|
||||
let sql = format!(
|
||||
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE deleted = FALSE AND id = ANY($1)",
|
||||
target.qualified_table
|
||||
);
|
||||
|
||||
let rows = sqlx::query(&query_str)
|
||||
let rows = sqlx::query(&sql)
|
||||
.bind(&pg_ids)
|
||||
.fetch_all(&self.pool)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("Database query failed: {}", e)))?;
|
||||
|
||||
// Step 3: Map the database results by ID for quick lookup.
|
||||
let mut content_map: HashMap<i64, String> = HashMap::new();
|
||||
for row in rows {
|
||||
let id: i64 = row.try_get("id").unwrap_or(0);
|
||||
let json_data: serde_json::Value =
|
||||
row.try_get("data").unwrap_or(serde_json::Value::Null);
|
||||
let id: i64 = row.try_get("id").unwrap_or_default();
|
||||
let json_data: serde_json::Value = row.try_get("data").unwrap_or_default();
|
||||
content_map.insert(id, json_data.to_string());
|
||||
}
|
||||
|
||||
// Step 4: Build the final response, combining Tantivy scores with PG content.
|
||||
let hits: Vec<Hit> = scored_ids
|
||||
Ok(scored_ids
|
||||
.into_iter()
|
||||
.filter_map(|(score, pg_id)| {
|
||||
content_map.get(&(pg_id as i64)).map(|content_json| Hit {
|
||||
id: pg_id as i64,
|
||||
score,
|
||||
content_json: content_json.clone(),
|
||||
table_name: target.table_name.clone(),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
.collect())
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl Searcher for SearcherService {
|
||||
async fn search_table(
|
||||
&self,
|
||||
request: Request<SearchRequest>,
|
||||
) -> Result<Response<SearchResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
let profile_name = req.profile_name.trim();
|
||||
if profile_name.is_empty() {
|
||||
return Err(Status::invalid_argument("profile_name is required"));
|
||||
}
|
||||
|
||||
// Request scope
|
||||
let requested_table = req.table_name.as_deref().map(str::trim);
|
||||
let targets = resolve_search_targets(&self.pool, profile_name, requested_table).await?;
|
||||
|
||||
if targets.is_empty() {
|
||||
return Ok(Response::new(SearchResponse { hits: vec![] }));
|
||||
}
|
||||
|
||||
let query = req.query.trim();
|
||||
if query.is_empty() {
|
||||
// Empty query
|
||||
if targets.len() != 1 {
|
||||
return Err(Status::invalid_argument(
|
||||
"table_name is required when query is empty",
|
||||
));
|
||||
}
|
||||
|
||||
let hits = fetch_default_hits(&self.pool, &targets[0]).await?;
|
||||
info!(
|
||||
"Empty query for profile '{}' table '{}'. Returning {} default hits.",
|
||||
profile_name,
|
||||
targets[0].table_name,
|
||||
hits.len()
|
||||
);
|
||||
return Ok(Response::new(SearchResponse { hits }));
|
||||
}
|
||||
|
||||
if requested_table.is_some() && targets.len() == 1 && !targets[0].index_path.exists() {
|
||||
return Err(Status::not_found(format!(
|
||||
"No search index found for table '{}'",
|
||||
targets[0].table_name
|
||||
)));
|
||||
}
|
||||
|
||||
// Merge per-table hits
|
||||
let mut hits = Vec::new();
|
||||
for target in &targets {
|
||||
hits.extend(search_target(&self.pool, target, query).await?);
|
||||
}
|
||||
|
||||
hits.sort_by(|left, right| right.score.total_cmp(&left.score));
|
||||
if hits.len() > SEARCH_RESULT_LIMIT {
|
||||
hits.truncate(SEARCH_RESULT_LIMIT);
|
||||
}
|
||||
|
||||
info!(
|
||||
"--- SERVER: Successfully processed search. Returning {} hits. ---",
|
||||
"Processed search for profile '{}' (table scope: {}). Returning {} hits.",
|
||||
profile_name,
|
||||
requested_table.unwrap_or("*"),
|
||||
hits.len()
|
||||
);
|
||||
|
||||
let response = SearchResponse { hits };
|
||||
Ok(Response::new(response))
|
||||
Ok(Response::new(SearchResponse { hits }))
|
||||
}
|
||||
}
|
||||
|
||||
2
server
2
server
Submodule server updated: db54c07358...df65bbf8f3
Reference in New Issue
Block a user