column name indexing

This commit is contained in:
Priec
2026-04-29 01:33:48 +02:00
parent 036e12f345
commit fb4769301c
6 changed files with 96 additions and 50 deletions

View File

@@ -11,6 +11,7 @@ message SearchRequest {
optional string table_name = 1;
string query = 2;
string profile_name = 3;
optional string column_name = 4;
}
message SearchResponse {
message Hit {

Binary file not shown.

View File

@@ -7,6 +7,8 @@ pub struct SearchRequest {
pub query: ::prost::alloc::string::String,
#[prost(string, tag = "3")]
pub profile_name: ::prost::alloc::string::String,
#[prost(string, optional, tag = "4")]
pub column_name: ::core::option::Option<::prost::alloc::string::String>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SearchResponse {

View File

@@ -15,6 +15,48 @@ pub fn search_row_key(table_name: &str, row_id: i64) -> String {
format!("{}:{}", table_name, row_id)
}
/// Normalizes user-entered search text while preserving letter case.
pub fn normalize_search_text(text: &str) -> String {
text.chars()
.map(|ch| match ch {
'á' | 'à' | 'â' | 'ä' | 'ă' | 'ā' => 'a',
'Á' | 'À' | 'Â' | 'Ä' | 'Ă' | 'Ā' => 'A',
'é' | 'è' | 'ê' | 'ë' | 'ě' | 'ē' => 'e',
'É' | 'È' | 'Ê' | 'Ë' | 'Ě' | 'Ē' => 'E',
'í' | 'ì' | 'î' | 'ï' | 'ī' => 'i',
'Í' | 'Ì' | 'Î' | 'Ï' | 'Ī' => 'I',
'ó' | 'ò' | 'ô' | 'ö' | 'ō' | 'ő' => 'o',
'Ó' | 'Ò' | 'Ô' | 'Ö' | 'Ō' | 'Ő' => 'O',
'ú' | 'ù' | 'û' | 'ü' | 'ū' | 'ű' => 'u',
'Ú' | 'Ù' | 'Û' | 'Ü' | 'Ū' | 'Ű' => 'U',
'ý' | 'ỳ' | 'ŷ' | 'ÿ' => 'y',
'Ý' | 'Ỳ' | 'Ŷ' | 'Ÿ' => 'Y',
'č' => 'c',
'Č' => 'C',
'ď' => 'd',
'Ď' => 'D',
'ľ' => 'l',
'Ľ' => 'L',
'ň' => 'n',
'Ň' => 'N',
'ř' => 'r',
'Ř' => 'R',
'š' => 's',
'Š' => 'S',
'ť' => 't',
'Ť' => 'T',
'ž' => 'z',
'Ž' => 'Z',
_ => ch,
})
.collect()
}
/// Normalizes an exact-match value so indexed data and user input use the same form.
pub fn normalize_exact_value(text: &str) -> String {
normalize_search_text(text).to_lowercase()
}
/// Creates a hybrid Slovak search schema with optimized prefix fields.
pub fn create_search_schema() -> Schema {
let mut schema_builder = Schema::builder();
@@ -22,6 +64,7 @@ pub fn create_search_schema() -> Schema {
schema_builder.add_u64_field("pg_id", INDEXED | STORED);
schema_builder.add_text_field("table_name", STRING | STORED);
schema_builder.add_text_field("row_key", STRING | STORED);
schema_builder.add_text_field("column_exact", STRING);
// For prefixes (1-4 chars).
let short_prefix_indexing = TextFieldIndexing::default()

View File

@@ -12,7 +12,9 @@ use tonic::{Request, Response, Status};
use common::proto::komp_ac::search::searcher_server::Searcher;
use common::proto::komp_ac::search::{search_response::Hit, SearchRequest, SearchResponse};
pub use common::proto::komp_ac::search::searcher_server::SearcherServer;
use common::search::{register_slovak_tokenizers, search_index_path};
use common::search::{
normalize_exact_value, normalize_search_text, register_slovak_tokenizers, search_index_path,
};
use sqlx::{PgPool, Row};
use tracing::info;
@@ -33,6 +35,7 @@ pub struct SearcherService {
struct SearchScope {
profile_name: String,
requested_table: Option<String>,
requested_column: Option<String>,
index_path: PathBuf,
}
@@ -42,42 +45,6 @@ struct SearchCandidate {
table_name: String,
}
fn normalize_slovak_text(text: &str) -> String {
text.chars()
.map(|c| match c {
'á' | 'à' | 'â' | 'ä' | 'ă' | 'ā' => 'a',
'Á' | 'À' | 'Â' | 'Ä' | 'Ă' | 'Ā' => 'A',
'é' | 'è' | 'ê' | 'ë' | 'ě' | 'ē' => 'e',
'É' | 'È' | 'Ê' | 'Ë' | 'Ě' | 'Ē' => 'E',
'í' | 'ì' | 'î' | 'ï' | 'ī' => 'i',
'Í' | 'Ì' | 'Î' | 'Ï' | 'Ī' => 'I',
'ó' | 'ò' | 'ô' | 'ö' | 'ō' | 'ő' => 'o',
'Ó' | 'Ò' | 'Ô' | 'Ö' | 'Ō' | 'Ő' => 'O',
'ú' | 'ù' | 'û' | 'ü' | 'ū' | 'ű' => 'u',
'Ú' | 'Ù' | 'Û' | 'Ü' | 'Ū' | 'Ű' => 'U',
'ý' | 'ỳ' | 'ŷ' | 'ÿ' => 'y',
'Ý' | 'Ỳ' | 'Ŷ' | 'Ÿ' => 'Y',
'č' => 'c',
'Č' => 'C',
'ď' => 'd',
'Ď' => 'D',
'ľ' => 'l',
'Ľ' => 'L',
'ň' => 'n',
'Ň' => 'N',
'ř' => 'r',
'Ř' => 'R',
'š' => 's',
'Š' => 'S',
'ť' => 't',
'Ť' => 'T',
'ž' => 'z',
'Ž' => 'Z',
_ => c,
})
.collect()
}
fn validate_identifier(value: &str, field_name: &str) -> Result<(), Status> {
let mut chars = value.chars();
let Some(first) = chars.next() else {
@@ -116,6 +83,7 @@ async fn resolve_search_scope(
pool: &PgPool,
profile_name: &str,
requested_table: Option<&str>,
requested_column: Option<&str>,
) -> Result<SearchScope, Status> {
validate_identifier(profile_name, "profile_name")?;
@@ -155,9 +123,19 @@ async fn resolve_search_scope(
None
};
let requested_column = if let Some(column_name) =
requested_column.filter(|value| !value.trim().is_empty())
{
validate_identifier(column_name, "column_name")?;
Some(column_name.to_string())
} else {
None
};
Ok(SearchScope {
profile_name: profile_name.to_string(),
requested_table,
requested_column,
index_path: search_index_path(Path::new(INDEX_ROOT), profile_name),
})
}
@@ -168,11 +146,15 @@ fn build_query(
normalized_query: &str,
mode: SearchMode,
table_filter: Option<&str>,
column_filter: Option<&str>,
) -> Result<Option<BooleanQuery>, Status> {
let schema = index.schema();
let table_name_field = schema
.get_field("table_name")
.map_err(|_| Status::internal("Schema is missing the 'table_name' field."))?;
let column_exact_field = schema
.get_field("column_exact")
.map_err(|_| Status::internal("Schema is missing the 'column_exact' field."))?;
let prefix_edge_field = schema
.get_field("prefix_edge")
.map_err(|_| Status::internal("Schema is missing the 'prefix_edge' field."))?;
@@ -189,17 +171,29 @@ fn build_query(
}
let content_query: Box<dyn Query> = if matches!(mode, SearchMode::Exact) {
let exact_parser = QueryParser::for_index(index, vec![prefix_full_field]);
let exact_query_str = if words.len() == 1 {
normalized_query.to_string()
if let Some(column_name) = column_filter {
let exact_term = Term::from_field_text(
column_exact_field,
&format!(
"{}:{}",
column_name.to_ascii_lowercase(),
normalize_exact_value(normalized_query)
),
);
Box::new(TermQuery::new(exact_term, IndexRecordOption::Basic))
} else {
format!("\"{}\"", normalized_query)
};
let exact_parser = QueryParser::for_index(index, vec![prefix_full_field]);
let exact_query_str = if words.len() == 1 {
normalized_query.to_string()
} else {
format!("\"{}\"", normalized_query)
};
let exact_query = exact_parser
.parse_query(&exact_query_str)
.map_err(|e| Status::internal(format!("Failed to build exact query: {}", e)))?;
Box::new(exact_query)
let exact_query = exact_parser
.parse_query(&exact_query_str)
.map_err(|e| Status::internal(format!("Failed to build exact query: {}", e)))?;
Box::new(exact_query)
}
} else {
let mut query_layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
@@ -325,9 +319,10 @@ async fn search_profile(
let Some(master_query) = build_query(
&index,
&normalize_slovak_text(query_str),
&normalize_search_text(query_str),
mode,
scope.requested_table.as_deref(),
scope.requested_column.as_deref(),
)? else {
return Ok(vec![]);
};
@@ -455,8 +450,13 @@ impl SearcherService {
// Request scope
let scope =
resolve_search_scope(&self.pool, profile_name, req.table_name.as_deref().map(str::trim))
.await?;
resolve_search_scope(
&self.pool,
profile_name,
req.table_name.as_deref().map(str::trim),
req.column_name.as_deref().map(str::trim),
)
.await?;
let query = req.query.trim();
if query.is_empty() {

2
server

Submodule server updated: b26adc0cb0...16ea6e14b5