working v12

This commit is contained in:
Priec
2026-05-17 13:10:44 +02:00
parent 6a87750329
commit dc273506b7
9 changed files with 121 additions and 50 deletions

View File

@@ -112,6 +112,7 @@ impl SearcherService {
Ok(Response::new(SearchResponse { hits }))
}
}
struct ProfileIndex {
@@ -133,7 +134,7 @@ impl ProfileIndex {
.map_err(|e| Status::internal(format!("Failed to build index reader: {}", e)))?;
let fields = SchemaFields::from(&index.schema()).map_err(|e| {
Status::internal(format!(
"Search index schema mismatch. Reindex required: {}",
"Search index schema mismatch. Delete the stale index and create it again: {}",
e
))
})?;
@@ -205,6 +206,22 @@ fn validate_identifier(value: &str, field_name: &str) -> Result<(), Status> {
Ok(())
}
fn validate_search_column(value: &str) -> Result<(), Status> {
if value.is_empty() {
return Err(Status::invalid_argument(
"constraint.column must not be empty",
));
}
if value.chars().any(|ch| ch.is_control() || ch == '\0') {
return Err(Status::invalid_argument(
"constraint.column contains invalid characters",
));
}
Ok(())
}
fn qualify_profile_table(profile_name: &str, table_name: &str) -> String {
format!("\"{}\".\"{}\"", profile_name, table_name)
}
@@ -258,12 +275,7 @@ fn normalize_request(req: SearchRequest) -> Result<NormalizedSearchRequest, Stat
for constraint in req.must {
let column = constraint.column.trim();
if column.is_empty() {
return Err(Status::invalid_argument(
"constraint.column must not be empty",
));
}
validate_identifier(column, "constraint.column")?;
validate_search_column(column)?;
let query = constraint.query.trim();
if query.is_empty() {

View File

@@ -1,5 +1,6 @@
use common::search::{
json_path_term, normalize_exact, tokenize_ngram, tokenize_word, SchemaFields,
json_path_term, normalize_column_name, normalize_exact, tokenize_ngram, tokenize_word,
SchemaFields,
};
use tantivy::query::{
BooleanQuery, BoostQuery, EmptyQuery, FuzzyTermQuery, Occur, PhraseQuery, Query, QueryParser,
@@ -48,7 +49,7 @@ pub fn build_master_query(
let free_words = tokenize_word(free_query);
if !free_words.is_empty() {
let predicate = fuzzy_predicate_unscoped(index, fields, &free_words)?;
clauses.push((Occur::Should, predicate));
clauses.push((Occur::Must, predicate));
has_search_clause = true;
}
@@ -79,7 +80,8 @@ fn exact_predicate(
));
}
let term = json_path_term(fields.data_exact, column, &normalized_value);
let column = normalize_column_name(column);
let term = json_path_term(fields.data_exact, &column, &normalized_value);
Ok(Box::new(TermQuery::new(term, IndexRecordOption::Basic)))
}
@@ -95,11 +97,13 @@ fn fuzzy_predicate_scoped(
));
}
let column = normalize_column_name(column);
let mut layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
let mut per_word_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
for word in &words {
let term = json_path_term(fields.data_word, column, word);
let term = json_path_term(fields.data_word, &column, word);
let mut alternates: Vec<(Occur, Box<dyn Query>)> = Vec::new();
alternates.push((
@@ -136,7 +140,7 @@ fn fuzzy_predicate_scoped(
let phrase_terms: Vec<(usize, Term)> = words
.iter()
.enumerate()
.map(|(offset, word)| (offset, json_path_term(fields.data_word, column, word)))
.map(|(offset, word)| (offset, json_path_term(fields.data_word, &column, word)))
.collect();
let phrase = PhraseQuery::new_with_offset_and_slop(phrase_terms, 3);
layers.push((
@@ -150,7 +154,7 @@ fn fuzzy_predicate_scoped(
let ngram_clauses: Vec<(Occur, Box<dyn Query>)> = ngrams
.into_iter()
.map(|gram| {
let term = json_path_term(fields.data_ngram, column, &gram);
let term = json_path_term(fields.data_ngram, &column, &gram);
(
Occur::Must,
Box::new(TermQuery::new(term, IndexRecordOption::Basic)) as Box<dyn Query>,
@@ -176,35 +180,43 @@ fn fuzzy_predicate_unscoped(
) -> Result<Box<dyn Query>, Status> {
let mut layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
{
let parser = QueryParser::for_index(index, vec![fields.data_word]);
let query_string = words
.iter()
.map(|word| format!("+{}*", word))
.collect::<Vec<_>>()
.join(" ");
if let Ok(query) = parser.parse_query(&query_string) {
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 4.0))));
}
}
let mut per_word_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
for word in words {
let term = Term::from_field_text(fields.all_text, word);
let mut alternates: Vec<(Occur, Box<dyn Query>)> = Vec::new();
{
let parser = QueryParser::for_index(index, vec![fields.data_word]);
let query_string = words
.iter()
.map(|word| match fuzzy_distance(word.chars().count()) {
Some(distance) => format!("+{}~{}", word, distance),
None => format!("+{}", word),
})
.collect::<Vec<_>>()
.join(" ");
if let Ok(query) = parser.parse_query(&query_string) {
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 2.0))));
alternates.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(TermQuery::new(term.clone(), IndexRecordOption::WithFreqs)),
4.0,
)),
));
alternates.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(FuzzyTermQuery::new_prefix(term.clone(), 0, false)),
3.0,
)),
));
if let Some(distance) = fuzzy_distance(word.chars().count()) {
alternates.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(FuzzyTermQuery::new(term, distance, true)),
2.0,
)),
));
}
per_word_clauses.push((Occur::Must, Box::new(BooleanQuery::new(alternates))));
}
layers.push((Occur::Should, Box::new(BooleanQuery::new(per_word_clauses))));
if words.len() > 1 {
let parser = QueryParser::for_index(index, vec![fields.data_word]);
let parser = QueryParser::for_index(index, vec![fields.all_text]);
let query_string = format!("\"{}\"~3", words.join(" "));
if let Ok(query) = parser.parse_query(&query_string) {
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 2.0))));
@@ -212,10 +224,10 @@ fn fuzzy_predicate_unscoped(
}
{
let parser = QueryParser::for_index(index, vec![fields.data_ngram]);
let parser = QueryParser::for_index(index, vec![fields.all_text]);
let query_string = words
.iter()
.map(|word| format!("+{}", word))
.map(|word| format!("+{}*", word))
.collect::<Vec<_>>()
.join(" ");
if let Ok(query) = parser.parse_query(&query_string) {