From fb4769301c9a9905f1b355a993a4a9d76a98c42d Mon Sep 17 00:00:00 2001 From: Priec Date: Wed, 29 Apr 2026 01:33:48 +0200 Subject: [PATCH] column name indexing --- common/proto/search.proto | 1 + common/src/proto/descriptor.bin | Bin 57100 -> 57224 bytes common/src/proto/komp_ac.search.rs | 2 + common/src/search.rs | 43 +++++++++++++ search/src/lib.rs | 98 ++++++++++++++--------------- server | 2 +- 6 files changed, 96 insertions(+), 50 deletions(-) diff --git a/common/proto/search.proto b/common/proto/search.proto index 4d288db..86915ce 100644 --- a/common/proto/search.proto +++ b/common/proto/search.proto @@ -11,6 +11,7 @@ message SearchRequest { optional string table_name = 1; string query = 2; string profile_name = 3; + optional string column_name = 4; } message SearchResponse { message Hit { diff --git a/common/src/proto/descriptor.bin b/common/src/proto/descriptor.bin index 32330678c88e9dc2a5c0e11da2224d048e56a220..b05201b5558fb5af767fe39e25a95b49ffbb8b66 100644 GIT binary patch delta 551 zcmYk3F;c=n6o&WhKBBw@LN*~FN(hKE7M{RHhsw&r3uMrt18lUWrLfBhya5hl<&2Jn z7qIpedI8^tNkW%z|NqYn zPODdNL)*n`URwc->!g#1$?a^KwZ;vqCELR0HCjOR$05fch<17we?cStjX#%~5QS7m z)#Z?CRIz}TD5naO_HqIy)62<@>svmJ*NJnS$V c_DRKN4x{FW3OoBdO?qq zq(OTBF9*rJt$QEm$9rVsI^u_GGw!xw6hB;Qtqjp-;mr=uij67KK=kO C!yys? diff --git a/common/src/proto/komp_ac.search.rs b/common/src/proto/komp_ac.search.rs index 849d2ba..2aa6f69 100644 --- a/common/src/proto/komp_ac.search.rs +++ b/common/src/proto/komp_ac.search.rs @@ -7,6 +7,8 @@ pub struct SearchRequest { pub query: ::prost::alloc::string::String, #[prost(string, tag = "3")] pub profile_name: ::prost::alloc::string::String, + #[prost(string, optional, tag = "4")] + pub column_name: ::core::option::Option<::prost::alloc::string::String>, } #[derive(Clone, PartialEq, ::prost::Message)] pub struct SearchResponse { diff --git a/common/src/search.rs b/common/src/search.rs index 1ba9509..34cd127 100644 --- a/common/src/search.rs +++ b/common/src/search.rs @@ -15,6 +15,48 @@ pub fn search_row_key(table_name: &str, row_id: i64) -> String { format!("{}:{}", table_name, row_id) } +/// Normalizes user-entered search text while preserving letter case. +pub fn normalize_search_text(text: &str) -> String { + text.chars() + .map(|ch| match ch { + 'á' | 'à' | 'â' | 'ä' | 'ă' | 'ā' => 'a', + 'Á' | 'À' | 'Â' | 'Ä' | 'Ă' | 'Ā' => 'A', + 'é' | 'è' | 'ê' | 'ë' | 'ě' | 'ē' => 'e', + 'É' | 'È' | 'Ê' | 'Ë' | 'Ě' | 'Ē' => 'E', + 'í' | 'ì' | 'î' | 'ï' | 'ī' => 'i', + 'Í' | 'Ì' | 'Î' | 'Ï' | 'Ī' => 'I', + 'ó' | 'ò' | 'ô' | 'ö' | 'ō' | 'ő' => 'o', + 'Ó' | 'Ò' | 'Ô' | 'Ö' | 'Ō' | 'Ő' => 'O', + 'ú' | 'ù' | 'û' | 'ü' | 'ū' | 'ű' => 'u', + 'Ú' | 'Ù' | 'Û' | 'Ü' | 'Ū' | 'Ű' => 'U', + 'ý' | 'ỳ' | 'ŷ' | 'ÿ' => 'y', + 'Ý' | 'Ỳ' | 'Ŷ' | 'Ÿ' => 'Y', + 'č' => 'c', + 'Č' => 'C', + 'ď' => 'd', + 'Ď' => 'D', + 'ľ' => 'l', + 'Ľ' => 'L', + 'ň' => 'n', + 'Ň' => 'N', + 'ř' => 'r', + 'Ř' => 'R', + 'š' => 's', + 'Š' => 'S', + 'ť' => 't', + 'Ť' => 'T', + 'ž' => 'z', + 'Ž' => 'Z', + _ => ch, + }) + .collect() +} + +/// Normalizes an exact-match value so indexed data and user input use the same form. +pub fn normalize_exact_value(text: &str) -> String { + normalize_search_text(text).to_lowercase() +} + /// Creates a hybrid Slovak search schema with optimized prefix fields. pub fn create_search_schema() -> Schema { let mut schema_builder = Schema::builder(); @@ -22,6 +64,7 @@ pub fn create_search_schema() -> Schema { schema_builder.add_u64_field("pg_id", INDEXED | STORED); schema_builder.add_text_field("table_name", STRING | STORED); schema_builder.add_text_field("row_key", STRING | STORED); + schema_builder.add_text_field("column_exact", STRING); // For prefixes (1-4 chars). let short_prefix_indexing = TextFieldIndexing::default() diff --git a/search/src/lib.rs b/search/src/lib.rs index 47e1e2c..83e8c57 100644 --- a/search/src/lib.rs +++ b/search/src/lib.rs @@ -12,7 +12,9 @@ use tonic::{Request, Response, Status}; use common::proto::komp_ac::search::searcher_server::Searcher; use common::proto::komp_ac::search::{search_response::Hit, SearchRequest, SearchResponse}; pub use common::proto::komp_ac::search::searcher_server::SearcherServer; -use common::search::{register_slovak_tokenizers, search_index_path}; +use common::search::{ + normalize_exact_value, normalize_search_text, register_slovak_tokenizers, search_index_path, +}; use sqlx::{PgPool, Row}; use tracing::info; @@ -33,6 +35,7 @@ pub struct SearcherService { struct SearchScope { profile_name: String, requested_table: Option, + requested_column: Option, index_path: PathBuf, } @@ -42,42 +45,6 @@ struct SearchCandidate { table_name: String, } -fn normalize_slovak_text(text: &str) -> String { - text.chars() - .map(|c| match c { - 'á' | 'à' | 'â' | 'ä' | 'ă' | 'ā' => 'a', - 'Á' | 'À' | 'Â' | 'Ä' | 'Ă' | 'Ā' => 'A', - 'é' | 'è' | 'ê' | 'ë' | 'ě' | 'ē' => 'e', - 'É' | 'È' | 'Ê' | 'Ë' | 'Ě' | 'Ē' => 'E', - 'í' | 'ì' | 'î' | 'ï' | 'ī' => 'i', - 'Í' | 'Ì' | 'Î' | 'Ï' | 'Ī' => 'I', - 'ó' | 'ò' | 'ô' | 'ö' | 'ō' | 'ő' => 'o', - 'Ó' | 'Ò' | 'Ô' | 'Ö' | 'Ō' | 'Ő' => 'O', - 'ú' | 'ù' | 'û' | 'ü' | 'ū' | 'ű' => 'u', - 'Ú' | 'Ù' | 'Û' | 'Ü' | 'Ū' | 'Ű' => 'U', - 'ý' | 'ỳ' | 'ŷ' | 'ÿ' => 'y', - 'Ý' | 'Ỳ' | 'Ŷ' | 'Ÿ' => 'Y', - 'č' => 'c', - 'Č' => 'C', - 'ď' => 'd', - 'Ď' => 'D', - 'ľ' => 'l', - 'Ľ' => 'L', - 'ň' => 'n', - 'Ň' => 'N', - 'ř' => 'r', - 'Ř' => 'R', - 'š' => 's', - 'Š' => 'S', - 'ť' => 't', - 'Ť' => 'T', - 'ž' => 'z', - 'Ž' => 'Z', - _ => c, - }) - .collect() -} - fn validate_identifier(value: &str, field_name: &str) -> Result<(), Status> { let mut chars = value.chars(); let Some(first) = chars.next() else { @@ -116,6 +83,7 @@ async fn resolve_search_scope( pool: &PgPool, profile_name: &str, requested_table: Option<&str>, + requested_column: Option<&str>, ) -> Result { validate_identifier(profile_name, "profile_name")?; @@ -155,9 +123,19 @@ async fn resolve_search_scope( None }; + let requested_column = if let Some(column_name) = + requested_column.filter(|value| !value.trim().is_empty()) + { + validate_identifier(column_name, "column_name")?; + Some(column_name.to_string()) + } else { + None + }; + Ok(SearchScope { profile_name: profile_name.to_string(), requested_table, + requested_column, index_path: search_index_path(Path::new(INDEX_ROOT), profile_name), }) } @@ -168,11 +146,15 @@ fn build_query( normalized_query: &str, mode: SearchMode, table_filter: Option<&str>, + column_filter: Option<&str>, ) -> Result, Status> { let schema = index.schema(); let table_name_field = schema .get_field("table_name") .map_err(|_| Status::internal("Schema is missing the 'table_name' field."))?; + let column_exact_field = schema + .get_field("column_exact") + .map_err(|_| Status::internal("Schema is missing the 'column_exact' field."))?; let prefix_edge_field = schema .get_field("prefix_edge") .map_err(|_| Status::internal("Schema is missing the 'prefix_edge' field."))?; @@ -189,17 +171,29 @@ fn build_query( } let content_query: Box = if matches!(mode, SearchMode::Exact) { - let exact_parser = QueryParser::for_index(index, vec![prefix_full_field]); - let exact_query_str = if words.len() == 1 { - normalized_query.to_string() + if let Some(column_name) = column_filter { + let exact_term = Term::from_field_text( + column_exact_field, + &format!( + "{}:{}", + column_name.to_ascii_lowercase(), + normalize_exact_value(normalized_query) + ), + ); + Box::new(TermQuery::new(exact_term, IndexRecordOption::Basic)) } else { - format!("\"{}\"", normalized_query) - }; + let exact_parser = QueryParser::for_index(index, vec![prefix_full_field]); + let exact_query_str = if words.len() == 1 { + normalized_query.to_string() + } else { + format!("\"{}\"", normalized_query) + }; - let exact_query = exact_parser - .parse_query(&exact_query_str) - .map_err(|e| Status::internal(format!("Failed to build exact query: {}", e)))?; - Box::new(exact_query) + let exact_query = exact_parser + .parse_query(&exact_query_str) + .map_err(|e| Status::internal(format!("Failed to build exact query: {}", e)))?; + Box::new(exact_query) + } } else { let mut query_layers: Vec<(Occur, Box)> = Vec::new(); @@ -325,9 +319,10 @@ async fn search_profile( let Some(master_query) = build_query( &index, - &normalize_slovak_text(query_str), + &normalize_search_text(query_str), mode, scope.requested_table.as_deref(), + scope.requested_column.as_deref(), )? else { return Ok(vec![]); }; @@ -455,8 +450,13 @@ impl SearcherService { // Request scope let scope = - resolve_search_scope(&self.pool, profile_name, req.table_name.as_deref().map(str::trim)) - .await?; + resolve_search_scope( + &self.pool, + profile_name, + req.table_name.as_deref().map(str::trim), + req.column_name.as_deref().map(str::trim), + ) + .await?; let query = req.query.trim(); if query.is_empty() { diff --git a/server b/server index b26adc0..16ea6e1 160000 --- a/server +++ b/server @@ -1 +1 @@ -Subproject commit b26adc0cb0afeb4379da320ccb3fd6d4d3a241a4 +Subproject commit 16ea6e14b5ccf3b98862ee35dfa04f8661b3e11f