column name indexing
This commit is contained in:
@@ -11,6 +11,7 @@ message SearchRequest {
|
|||||||
optional string table_name = 1;
|
optional string table_name = 1;
|
||||||
string query = 2;
|
string query = 2;
|
||||||
string profile_name = 3;
|
string profile_name = 3;
|
||||||
|
optional string column_name = 4;
|
||||||
}
|
}
|
||||||
message SearchResponse {
|
message SearchResponse {
|
||||||
message Hit {
|
message Hit {
|
||||||
|
|||||||
Binary file not shown.
@@ -7,6 +7,8 @@ pub struct SearchRequest {
|
|||||||
pub query: ::prost::alloc::string::String,
|
pub query: ::prost::alloc::string::String,
|
||||||
#[prost(string, tag = "3")]
|
#[prost(string, tag = "3")]
|
||||||
pub profile_name: ::prost::alloc::string::String,
|
pub profile_name: ::prost::alloc::string::String,
|
||||||
|
#[prost(string, optional, tag = "4")]
|
||||||
|
pub column_name: ::core::option::Option<::prost::alloc::string::String>,
|
||||||
}
|
}
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||||
pub struct SearchResponse {
|
pub struct SearchResponse {
|
||||||
|
|||||||
@@ -15,6 +15,48 @@ pub fn search_row_key(table_name: &str, row_id: i64) -> String {
|
|||||||
format!("{}:{}", table_name, row_id)
|
format!("{}:{}", table_name, row_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Normalizes user-entered search text while preserving letter case.
|
||||||
|
pub fn normalize_search_text(text: &str) -> String {
|
||||||
|
text.chars()
|
||||||
|
.map(|ch| match ch {
|
||||||
|
'á' | 'à' | 'â' | 'ä' | 'ă' | 'ā' => 'a',
|
||||||
|
'Á' | 'À' | 'Â' | 'Ä' | 'Ă' | 'Ā' => 'A',
|
||||||
|
'é' | 'è' | 'ê' | 'ë' | 'ě' | 'ē' => 'e',
|
||||||
|
'É' | 'È' | 'Ê' | 'Ë' | 'Ě' | 'Ē' => 'E',
|
||||||
|
'í' | 'ì' | 'î' | 'ï' | 'ī' => 'i',
|
||||||
|
'Í' | 'Ì' | 'Î' | 'Ï' | 'Ī' => 'I',
|
||||||
|
'ó' | 'ò' | 'ô' | 'ö' | 'ō' | 'ő' => 'o',
|
||||||
|
'Ó' | 'Ò' | 'Ô' | 'Ö' | 'Ō' | 'Ő' => 'O',
|
||||||
|
'ú' | 'ù' | 'û' | 'ü' | 'ū' | 'ű' => 'u',
|
||||||
|
'Ú' | 'Ù' | 'Û' | 'Ü' | 'Ū' | 'Ű' => 'U',
|
||||||
|
'ý' | 'ỳ' | 'ŷ' | 'ÿ' => 'y',
|
||||||
|
'Ý' | 'Ỳ' | 'Ŷ' | 'Ÿ' => 'Y',
|
||||||
|
'č' => 'c',
|
||||||
|
'Č' => 'C',
|
||||||
|
'ď' => 'd',
|
||||||
|
'Ď' => 'D',
|
||||||
|
'ľ' => 'l',
|
||||||
|
'Ľ' => 'L',
|
||||||
|
'ň' => 'n',
|
||||||
|
'Ň' => 'N',
|
||||||
|
'ř' => 'r',
|
||||||
|
'Ř' => 'R',
|
||||||
|
'š' => 's',
|
||||||
|
'Š' => 'S',
|
||||||
|
'ť' => 't',
|
||||||
|
'Ť' => 'T',
|
||||||
|
'ž' => 'z',
|
||||||
|
'Ž' => 'Z',
|
||||||
|
_ => ch,
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Normalizes an exact-match value so indexed data and user input use the same form.
|
||||||
|
pub fn normalize_exact_value(text: &str) -> String {
|
||||||
|
normalize_search_text(text).to_lowercase()
|
||||||
|
}
|
||||||
|
|
||||||
/// Creates a hybrid Slovak search schema with optimized prefix fields.
|
/// Creates a hybrid Slovak search schema with optimized prefix fields.
|
||||||
pub fn create_search_schema() -> Schema {
|
pub fn create_search_schema() -> Schema {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
@@ -22,6 +64,7 @@ pub fn create_search_schema() -> Schema {
|
|||||||
schema_builder.add_u64_field("pg_id", INDEXED | STORED);
|
schema_builder.add_u64_field("pg_id", INDEXED | STORED);
|
||||||
schema_builder.add_text_field("table_name", STRING | STORED);
|
schema_builder.add_text_field("table_name", STRING | STORED);
|
||||||
schema_builder.add_text_field("row_key", STRING | STORED);
|
schema_builder.add_text_field("row_key", STRING | STORED);
|
||||||
|
schema_builder.add_text_field("column_exact", STRING);
|
||||||
|
|
||||||
// For prefixes (1-4 chars).
|
// For prefixes (1-4 chars).
|
||||||
let short_prefix_indexing = TextFieldIndexing::default()
|
let short_prefix_indexing = TextFieldIndexing::default()
|
||||||
|
|||||||
@@ -12,7 +12,9 @@ use tonic::{Request, Response, Status};
|
|||||||
use common::proto::komp_ac::search::searcher_server::Searcher;
|
use common::proto::komp_ac::search::searcher_server::Searcher;
|
||||||
use common::proto::komp_ac::search::{search_response::Hit, SearchRequest, SearchResponse};
|
use common::proto::komp_ac::search::{search_response::Hit, SearchRequest, SearchResponse};
|
||||||
pub use common::proto::komp_ac::search::searcher_server::SearcherServer;
|
pub use common::proto::komp_ac::search::searcher_server::SearcherServer;
|
||||||
use common::search::{register_slovak_tokenizers, search_index_path};
|
use common::search::{
|
||||||
|
normalize_exact_value, normalize_search_text, register_slovak_tokenizers, search_index_path,
|
||||||
|
};
|
||||||
use sqlx::{PgPool, Row};
|
use sqlx::{PgPool, Row};
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
@@ -33,6 +35,7 @@ pub struct SearcherService {
|
|||||||
struct SearchScope {
|
struct SearchScope {
|
||||||
profile_name: String,
|
profile_name: String,
|
||||||
requested_table: Option<String>,
|
requested_table: Option<String>,
|
||||||
|
requested_column: Option<String>,
|
||||||
index_path: PathBuf,
|
index_path: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -42,42 +45,6 @@ struct SearchCandidate {
|
|||||||
table_name: String,
|
table_name: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn normalize_slovak_text(text: &str) -> String {
|
|
||||||
text.chars()
|
|
||||||
.map(|c| match c {
|
|
||||||
'á' | 'à' | 'â' | 'ä' | 'ă' | 'ā' => 'a',
|
|
||||||
'Á' | 'À' | 'Â' | 'Ä' | 'Ă' | 'Ā' => 'A',
|
|
||||||
'é' | 'è' | 'ê' | 'ë' | 'ě' | 'ē' => 'e',
|
|
||||||
'É' | 'È' | 'Ê' | 'Ë' | 'Ě' | 'Ē' => 'E',
|
|
||||||
'í' | 'ì' | 'î' | 'ï' | 'ī' => 'i',
|
|
||||||
'Í' | 'Ì' | 'Î' | 'Ï' | 'Ī' => 'I',
|
|
||||||
'ó' | 'ò' | 'ô' | 'ö' | 'ō' | 'ő' => 'o',
|
|
||||||
'Ó' | 'Ò' | 'Ô' | 'Ö' | 'Ō' | 'Ő' => 'O',
|
|
||||||
'ú' | 'ù' | 'û' | 'ü' | 'ū' | 'ű' => 'u',
|
|
||||||
'Ú' | 'Ù' | 'Û' | 'Ü' | 'Ū' | 'Ű' => 'U',
|
|
||||||
'ý' | 'ỳ' | 'ŷ' | 'ÿ' => 'y',
|
|
||||||
'Ý' | 'Ỳ' | 'Ŷ' | 'Ÿ' => 'Y',
|
|
||||||
'č' => 'c',
|
|
||||||
'Č' => 'C',
|
|
||||||
'ď' => 'd',
|
|
||||||
'Ď' => 'D',
|
|
||||||
'ľ' => 'l',
|
|
||||||
'Ľ' => 'L',
|
|
||||||
'ň' => 'n',
|
|
||||||
'Ň' => 'N',
|
|
||||||
'ř' => 'r',
|
|
||||||
'Ř' => 'R',
|
|
||||||
'š' => 's',
|
|
||||||
'Š' => 'S',
|
|
||||||
'ť' => 't',
|
|
||||||
'Ť' => 'T',
|
|
||||||
'ž' => 'z',
|
|
||||||
'Ž' => 'Z',
|
|
||||||
_ => c,
|
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn validate_identifier(value: &str, field_name: &str) -> Result<(), Status> {
|
fn validate_identifier(value: &str, field_name: &str) -> Result<(), Status> {
|
||||||
let mut chars = value.chars();
|
let mut chars = value.chars();
|
||||||
let Some(first) = chars.next() else {
|
let Some(first) = chars.next() else {
|
||||||
@@ -116,6 +83,7 @@ async fn resolve_search_scope(
|
|||||||
pool: &PgPool,
|
pool: &PgPool,
|
||||||
profile_name: &str,
|
profile_name: &str,
|
||||||
requested_table: Option<&str>,
|
requested_table: Option<&str>,
|
||||||
|
requested_column: Option<&str>,
|
||||||
) -> Result<SearchScope, Status> {
|
) -> Result<SearchScope, Status> {
|
||||||
validate_identifier(profile_name, "profile_name")?;
|
validate_identifier(profile_name, "profile_name")?;
|
||||||
|
|
||||||
@@ -155,9 +123,19 @@ async fn resolve_search_scope(
|
|||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let requested_column = if let Some(column_name) =
|
||||||
|
requested_column.filter(|value| !value.trim().is_empty())
|
||||||
|
{
|
||||||
|
validate_identifier(column_name, "column_name")?;
|
||||||
|
Some(column_name.to_string())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
Ok(SearchScope {
|
Ok(SearchScope {
|
||||||
profile_name: profile_name.to_string(),
|
profile_name: profile_name.to_string(),
|
||||||
requested_table,
|
requested_table,
|
||||||
|
requested_column,
|
||||||
index_path: search_index_path(Path::new(INDEX_ROOT), profile_name),
|
index_path: search_index_path(Path::new(INDEX_ROOT), profile_name),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -168,11 +146,15 @@ fn build_query(
|
|||||||
normalized_query: &str,
|
normalized_query: &str,
|
||||||
mode: SearchMode,
|
mode: SearchMode,
|
||||||
table_filter: Option<&str>,
|
table_filter: Option<&str>,
|
||||||
|
column_filter: Option<&str>,
|
||||||
) -> Result<Option<BooleanQuery>, Status> {
|
) -> Result<Option<BooleanQuery>, Status> {
|
||||||
let schema = index.schema();
|
let schema = index.schema();
|
||||||
let table_name_field = schema
|
let table_name_field = schema
|
||||||
.get_field("table_name")
|
.get_field("table_name")
|
||||||
.map_err(|_| Status::internal("Schema is missing the 'table_name' field."))?;
|
.map_err(|_| Status::internal("Schema is missing the 'table_name' field."))?;
|
||||||
|
let column_exact_field = schema
|
||||||
|
.get_field("column_exact")
|
||||||
|
.map_err(|_| Status::internal("Schema is missing the 'column_exact' field."))?;
|
||||||
let prefix_edge_field = schema
|
let prefix_edge_field = schema
|
||||||
.get_field("prefix_edge")
|
.get_field("prefix_edge")
|
||||||
.map_err(|_| Status::internal("Schema is missing the 'prefix_edge' field."))?;
|
.map_err(|_| Status::internal("Schema is missing the 'prefix_edge' field."))?;
|
||||||
@@ -189,17 +171,29 @@ fn build_query(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let content_query: Box<dyn Query> = if matches!(mode, SearchMode::Exact) {
|
let content_query: Box<dyn Query> = if matches!(mode, SearchMode::Exact) {
|
||||||
let exact_parser = QueryParser::for_index(index, vec![prefix_full_field]);
|
if let Some(column_name) = column_filter {
|
||||||
let exact_query_str = if words.len() == 1 {
|
let exact_term = Term::from_field_text(
|
||||||
normalized_query.to_string()
|
column_exact_field,
|
||||||
|
&format!(
|
||||||
|
"{}:{}",
|
||||||
|
column_name.to_ascii_lowercase(),
|
||||||
|
normalize_exact_value(normalized_query)
|
||||||
|
),
|
||||||
|
);
|
||||||
|
Box::new(TermQuery::new(exact_term, IndexRecordOption::Basic))
|
||||||
} else {
|
} else {
|
||||||
format!("\"{}\"", normalized_query)
|
let exact_parser = QueryParser::for_index(index, vec![prefix_full_field]);
|
||||||
};
|
let exact_query_str = if words.len() == 1 {
|
||||||
|
normalized_query.to_string()
|
||||||
|
} else {
|
||||||
|
format!("\"{}\"", normalized_query)
|
||||||
|
};
|
||||||
|
|
||||||
let exact_query = exact_parser
|
let exact_query = exact_parser
|
||||||
.parse_query(&exact_query_str)
|
.parse_query(&exact_query_str)
|
||||||
.map_err(|e| Status::internal(format!("Failed to build exact query: {}", e)))?;
|
.map_err(|e| Status::internal(format!("Failed to build exact query: {}", e)))?;
|
||||||
Box::new(exact_query)
|
Box::new(exact_query)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
let mut query_layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
let mut query_layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||||
|
|
||||||
@@ -325,9 +319,10 @@ async fn search_profile(
|
|||||||
|
|
||||||
let Some(master_query) = build_query(
|
let Some(master_query) = build_query(
|
||||||
&index,
|
&index,
|
||||||
&normalize_slovak_text(query_str),
|
&normalize_search_text(query_str),
|
||||||
mode,
|
mode,
|
||||||
scope.requested_table.as_deref(),
|
scope.requested_table.as_deref(),
|
||||||
|
scope.requested_column.as_deref(),
|
||||||
)? else {
|
)? else {
|
||||||
return Ok(vec![]);
|
return Ok(vec![]);
|
||||||
};
|
};
|
||||||
@@ -455,8 +450,13 @@ impl SearcherService {
|
|||||||
|
|
||||||
// Request scope
|
// Request scope
|
||||||
let scope =
|
let scope =
|
||||||
resolve_search_scope(&self.pool, profile_name, req.table_name.as_deref().map(str::trim))
|
resolve_search_scope(
|
||||||
.await?;
|
&self.pool,
|
||||||
|
profile_name,
|
||||||
|
req.table_name.as_deref().map(str::trim),
|
||||||
|
req.column_name.as_deref().map(str::trim),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
let query = req.query.trim();
|
let query = req.query.trim();
|
||||||
if query.is_empty() {
|
if query.is_empty() {
|
||||||
|
|||||||
2
server
2
server
Submodule server updated: b26adc0cb0...16ea6e14b5
Reference in New Issue
Block a user