working v12
This commit is contained in:
@@ -112,6 +112,7 @@ impl SearcherService {
|
||||
|
||||
Ok(Response::new(SearchResponse { hits }))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
struct ProfileIndex {
|
||||
@@ -133,7 +134,7 @@ impl ProfileIndex {
|
||||
.map_err(|e| Status::internal(format!("Failed to build index reader: {}", e)))?;
|
||||
let fields = SchemaFields::from(&index.schema()).map_err(|e| {
|
||||
Status::internal(format!(
|
||||
"Search index schema mismatch. Reindex required: {}",
|
||||
"Search index schema mismatch. Delete the stale index and create it again: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
@@ -205,6 +206,22 @@ fn validate_identifier(value: &str, field_name: &str) -> Result<(), Status> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_search_column(value: &str) -> Result<(), Status> {
|
||||
if value.is_empty() {
|
||||
return Err(Status::invalid_argument(
|
||||
"constraint.column must not be empty",
|
||||
));
|
||||
}
|
||||
|
||||
if value.chars().any(|ch| ch.is_control() || ch == '\0') {
|
||||
return Err(Status::invalid_argument(
|
||||
"constraint.column contains invalid characters",
|
||||
));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn qualify_profile_table(profile_name: &str, table_name: &str) -> String {
|
||||
format!("\"{}\".\"{}\"", profile_name, table_name)
|
||||
}
|
||||
@@ -258,12 +275,7 @@ fn normalize_request(req: SearchRequest) -> Result<NormalizedSearchRequest, Stat
|
||||
|
||||
for constraint in req.must {
|
||||
let column = constraint.column.trim();
|
||||
if column.is_empty() {
|
||||
return Err(Status::invalid_argument(
|
||||
"constraint.column must not be empty",
|
||||
));
|
||||
}
|
||||
validate_identifier(column, "constraint.column")?;
|
||||
validate_search_column(column)?;
|
||||
|
||||
let query = constraint.query.trim();
|
||||
if query.is_empty() {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use common::search::{
|
||||
json_path_term, normalize_exact, tokenize_ngram, tokenize_word, SchemaFields,
|
||||
json_path_term, normalize_column_name, normalize_exact, tokenize_ngram, tokenize_word,
|
||||
SchemaFields,
|
||||
};
|
||||
use tantivy::query::{
|
||||
BooleanQuery, BoostQuery, EmptyQuery, FuzzyTermQuery, Occur, PhraseQuery, Query, QueryParser,
|
||||
@@ -48,7 +49,7 @@ pub fn build_master_query(
|
||||
let free_words = tokenize_word(free_query);
|
||||
if !free_words.is_empty() {
|
||||
let predicate = fuzzy_predicate_unscoped(index, fields, &free_words)?;
|
||||
clauses.push((Occur::Should, predicate));
|
||||
clauses.push((Occur::Must, predicate));
|
||||
has_search_clause = true;
|
||||
}
|
||||
|
||||
@@ -79,7 +80,8 @@ fn exact_predicate(
|
||||
));
|
||||
}
|
||||
|
||||
let term = json_path_term(fields.data_exact, column, &normalized_value);
|
||||
let column = normalize_column_name(column);
|
||||
let term = json_path_term(fields.data_exact, &column, &normalized_value);
|
||||
Ok(Box::new(TermQuery::new(term, IndexRecordOption::Basic)))
|
||||
}
|
||||
|
||||
@@ -95,11 +97,13 @@ fn fuzzy_predicate_scoped(
|
||||
));
|
||||
}
|
||||
|
||||
let column = normalize_column_name(column);
|
||||
|
||||
let mut layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
|
||||
let mut per_word_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
for word in &words {
|
||||
let term = json_path_term(fields.data_word, column, word);
|
||||
let term = json_path_term(fields.data_word, &column, word);
|
||||
let mut alternates: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
|
||||
alternates.push((
|
||||
@@ -136,7 +140,7 @@ fn fuzzy_predicate_scoped(
|
||||
let phrase_terms: Vec<(usize, Term)> = words
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(offset, word)| (offset, json_path_term(fields.data_word, column, word)))
|
||||
.map(|(offset, word)| (offset, json_path_term(fields.data_word, &column, word)))
|
||||
.collect();
|
||||
let phrase = PhraseQuery::new_with_offset_and_slop(phrase_terms, 3);
|
||||
layers.push((
|
||||
@@ -150,7 +154,7 @@ fn fuzzy_predicate_scoped(
|
||||
let ngram_clauses: Vec<(Occur, Box<dyn Query>)> = ngrams
|
||||
.into_iter()
|
||||
.map(|gram| {
|
||||
let term = json_path_term(fields.data_ngram, column, &gram);
|
||||
let term = json_path_term(fields.data_ngram, &column, &gram);
|
||||
(
|
||||
Occur::Must,
|
||||
Box::new(TermQuery::new(term, IndexRecordOption::Basic)) as Box<dyn Query>,
|
||||
@@ -176,35 +180,43 @@ fn fuzzy_predicate_unscoped(
|
||||
) -> Result<Box<dyn Query>, Status> {
|
||||
let mut layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
|
||||
{
|
||||
let parser = QueryParser::for_index(index, vec![fields.data_word]);
|
||||
let query_string = words
|
||||
.iter()
|
||||
.map(|word| format!("+{}*", word))
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
if let Ok(query) = parser.parse_query(&query_string) {
|
||||
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 4.0))));
|
||||
}
|
||||
}
|
||||
let mut per_word_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
for word in words {
|
||||
let term = Term::from_field_text(fields.all_text, word);
|
||||
let mut alternates: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
|
||||
{
|
||||
let parser = QueryParser::for_index(index, vec![fields.data_word]);
|
||||
let query_string = words
|
||||
.iter()
|
||||
.map(|word| match fuzzy_distance(word.chars().count()) {
|
||||
Some(distance) => format!("+{}~{}", word, distance),
|
||||
None => format!("+{}", word),
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
if let Ok(query) = parser.parse_query(&query_string) {
|
||||
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 2.0))));
|
||||
alternates.push((
|
||||
Occur::Should,
|
||||
Box::new(BoostQuery::new(
|
||||
Box::new(TermQuery::new(term.clone(), IndexRecordOption::WithFreqs)),
|
||||
4.0,
|
||||
)),
|
||||
));
|
||||
|
||||
alternates.push((
|
||||
Occur::Should,
|
||||
Box::new(BoostQuery::new(
|
||||
Box::new(FuzzyTermQuery::new_prefix(term.clone(), 0, false)),
|
||||
3.0,
|
||||
)),
|
||||
));
|
||||
|
||||
if let Some(distance) = fuzzy_distance(word.chars().count()) {
|
||||
alternates.push((
|
||||
Occur::Should,
|
||||
Box::new(BoostQuery::new(
|
||||
Box::new(FuzzyTermQuery::new(term, distance, true)),
|
||||
2.0,
|
||||
)),
|
||||
));
|
||||
}
|
||||
|
||||
per_word_clauses.push((Occur::Must, Box::new(BooleanQuery::new(alternates))));
|
||||
}
|
||||
layers.push((Occur::Should, Box::new(BooleanQuery::new(per_word_clauses))));
|
||||
|
||||
if words.len() > 1 {
|
||||
let parser = QueryParser::for_index(index, vec![fields.data_word]);
|
||||
let parser = QueryParser::for_index(index, vec![fields.all_text]);
|
||||
let query_string = format!("\"{}\"~3", words.join(" "));
|
||||
if let Ok(query) = parser.parse_query(&query_string) {
|
||||
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 2.0))));
|
||||
@@ -212,10 +224,10 @@ fn fuzzy_predicate_unscoped(
|
||||
}
|
||||
|
||||
{
|
||||
let parser = QueryParser::for_index(index, vec![fields.data_ngram]);
|
||||
let parser = QueryParser::for_index(index, vec![fields.all_text]);
|
||||
let query_string = words
|
||||
.iter()
|
||||
.map(|word| format!("+{}", word))
|
||||
.map(|word| format!("+{}*", word))
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
if let Ok(query) = parser.parse_query(&query_string) {
|
||||
|
||||
Reference in New Issue
Block a user