rust fmt and server clippy changes
This commit is contained in:
@@ -37,7 +37,6 @@ pub mod proto {
|
|||||||
pub mod table_validation {
|
pub mod table_validation {
|
||||||
include!("proto/komp_ac.table_validation.rs");
|
include!("proto/komp_ac.table_validation.rs");
|
||||||
}
|
}
|
||||||
pub const FILE_DESCRIPTOR_SET: &[u8] =
|
pub const FILE_DESCRIPTOR_SET: &[u8] = include_bytes!("proto/descriptor.bin");
|
||||||
include_bytes!("proto/descriptor.bin");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -48,30 +48,27 @@ pub fn register_slovak_tokenizers(index: &Index) -> tantivy::Result<()> {
|
|||||||
let tokenizer_manager = index.tokenizers();
|
let tokenizer_manager = index.tokenizers();
|
||||||
|
|
||||||
// TOKENIZER for `prefix_edge`: Edge N-gram (1-4 chars)
|
// TOKENIZER for `prefix_edge`: Edge N-gram (1-4 chars)
|
||||||
let edge_tokenizer =
|
let edge_tokenizer = TextAnalyzer::builder(NgramTokenizer::new(1, 4, true)?)
|
||||||
TextAnalyzer::builder(NgramTokenizer::new(1, 4, true)?)
|
.filter(RemoveLongFilter::limit(40))
|
||||||
.filter(RemoveLongFilter::limit(40))
|
.filter(LowerCaser)
|
||||||
.filter(LowerCaser)
|
.filter(AsciiFoldingFilter)
|
||||||
.filter(AsciiFoldingFilter)
|
.build();
|
||||||
.build();
|
|
||||||
tokenizer_manager.register("slovak_prefix_edge", edge_tokenizer);
|
tokenizer_manager.register("slovak_prefix_edge", edge_tokenizer);
|
||||||
|
|
||||||
// TOKENIZER for `prefix_full`: Simple word tokenizer
|
// TOKENIZER for `prefix_full`: Simple word tokenizer
|
||||||
let full_tokenizer =
|
let full_tokenizer = TextAnalyzer::builder(SimpleTokenizer::default())
|
||||||
TextAnalyzer::builder(SimpleTokenizer::default())
|
.filter(RemoveLongFilter::limit(40))
|
||||||
.filter(RemoveLongFilter::limit(40))
|
.filter(LowerCaser)
|
||||||
.filter(LowerCaser)
|
.filter(AsciiFoldingFilter)
|
||||||
.filter(AsciiFoldingFilter)
|
.build();
|
||||||
.build();
|
|
||||||
tokenizer_manager.register("slovak_prefix_full", full_tokenizer);
|
tokenizer_manager.register("slovak_prefix_full", full_tokenizer);
|
||||||
|
|
||||||
// NGRAM TOKENIZER: For substring matching.
|
// NGRAM TOKENIZER: For substring matching.
|
||||||
let ngram_tokenizer =
|
let ngram_tokenizer = TextAnalyzer::builder(NgramTokenizer::new(3, 3, false)?)
|
||||||
TextAnalyzer::builder(NgramTokenizer::new(3, 3, false)?)
|
.filter(RemoveLongFilter::limit(40))
|
||||||
.filter(RemoveLongFilter::limit(40))
|
.filter(LowerCaser)
|
||||||
.filter(LowerCaser)
|
.filter(AsciiFoldingFilter)
|
||||||
.filter(AsciiFoldingFilter)
|
.build();
|
||||||
.build();
|
|
||||||
tokenizer_manager.register("slovak_ngram", ngram_tokenizer);
|
tokenizer_manager.register("slovak_ngram", ngram_tokenizer);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -21,6 +21,9 @@
|
|||||||
clippy
|
clippy
|
||||||
cargo-watch
|
cargo-watch
|
||||||
rust-analyzer
|
rust-analyzer
|
||||||
|
cargo-tarpaulin
|
||||||
|
cargo-flamegraph
|
||||||
|
rust-code-analysis
|
||||||
|
|
||||||
# C build tools (for your linker issue)
|
# C build tools (for your linker issue)
|
||||||
gcc
|
gcc
|
||||||
|
|||||||
@@ -4,18 +4,15 @@ use std::collections::HashMap;
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use tantivy::collector::TopDocs;
|
use tantivy::collector::TopDocs;
|
||||||
use tantivy::query::{
|
use tantivy::query::{
|
||||||
BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, Query, QueryParser,
|
BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, Query, QueryParser, TermQuery,
|
||||||
TermQuery,
|
|
||||||
};
|
};
|
||||||
use tantivy::schema::{IndexRecordOption, Value};
|
use tantivy::schema::{IndexRecordOption, Value};
|
||||||
use tantivy::{Index, TantivyDocument, Term};
|
use tantivy::{Index, TantivyDocument, Term};
|
||||||
use tonic::{Request, Response, Status};
|
use tonic::{Request, Response, Status};
|
||||||
|
|
||||||
use common::proto::komp_ac::search::{
|
|
||||||
search_response::Hit, SearchRequest, SearchResponse,
|
|
||||||
};
|
|
||||||
pub use common::proto::komp_ac::search::searcher_server::SearcherServer;
|
|
||||||
use common::proto::komp_ac::search::searcher_server::Searcher;
|
use common::proto::komp_ac::search::searcher_server::Searcher;
|
||||||
|
pub use common::proto::komp_ac::search::searcher_server::SearcherServer;
|
||||||
|
use common::proto::komp_ac::search::{search_response::Hit, SearchRequest, SearchResponse};
|
||||||
use common::search::register_slovak_tokenizers;
|
use common::search::register_slovak_tokenizers;
|
||||||
use sqlx::{PgPool, Row};
|
use sqlx::{PgPool, Row};
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
@@ -86,22 +83,15 @@ impl Searcher for SearcherService {
|
|||||||
qualified_table
|
qualified_table
|
||||||
);
|
);
|
||||||
|
|
||||||
let rows = sqlx::query(&sql)
|
let rows = sqlx::query(&sql).fetch_all(&self.pool).await.map_err(|e| {
|
||||||
.fetch_all(&self.pool)
|
Status::internal(format!("DB query for default results failed: {}", e))
|
||||||
.await
|
})?;
|
||||||
.map_err(|e| {
|
|
||||||
Status::internal(format!(
|
|
||||||
"DB query for default results failed: {}",
|
|
||||||
e
|
|
||||||
))
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let hits: Vec<Hit> = rows
|
let hits: Vec<Hit> = rows
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|row| {
|
.map(|row| {
|
||||||
let id: i64 = row.try_get("id").unwrap_or_default();
|
let id: i64 = row.try_get("id").unwrap_or_default();
|
||||||
let json_data: serde_json::Value =
|
let json_data: serde_json::Value = row.try_get("data").unwrap_or_default();
|
||||||
row.try_get("data").unwrap_or_default();
|
|
||||||
Hit {
|
Hit {
|
||||||
id,
|
id,
|
||||||
// Score is 0.0 as this is not a relevance-ranked search
|
// Score is 0.0 as this is not a relevance-ranked search
|
||||||
@@ -111,7 +101,10 @@ impl Searcher for SearcherService {
|
|||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
info!("--- SERVER: Successfully processed empty query. Returning {} default hits. ---", hits.len());
|
info!(
|
||||||
|
"--- SERVER: Successfully processed empty query. Returning {} default hits. ---",
|
||||||
|
hits.len()
|
||||||
|
);
|
||||||
return Ok(Response::new(SearchResponse { hits }));
|
return Ok(Response::new(SearchResponse { hits }));
|
||||||
}
|
}
|
||||||
// --- END OF MODIFIED LOGIC ---
|
// --- END OF MODIFIED LOGIC ---
|
||||||
@@ -131,15 +124,15 @@ impl Searcher for SearcherService {
|
|||||||
Status::internal(format!("Failed to register Slovak tokenizers: {}", e))
|
Status::internal(format!("Failed to register Slovak tokenizers: {}", e))
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let reader = index.reader().map_err(|e| {
|
let reader = index
|
||||||
Status::internal(format!("Failed to create index reader: {}", e))
|
.reader()
|
||||||
})?;
|
.map_err(|e| Status::internal(format!("Failed to create index reader: {}", e)))?;
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let schema = index.schema();
|
let schema = index.schema();
|
||||||
|
|
||||||
let pg_id_field = schema.get_field("pg_id").map_err(|_| {
|
let pg_id_field = schema
|
||||||
Status::internal("Schema is missing the 'pg_id' field.")
|
.get_field("pg_id")
|
||||||
})?;
|
.map_err(|_| Status::internal("Schema is missing the 'pg_id' field."))?;
|
||||||
|
|
||||||
// --- Query Building Logic (no changes here) ---
|
// --- Query Building Logic (no changes here) ---
|
||||||
let prefix_edge_field = schema.get_field("prefix_edge").unwrap();
|
let prefix_edge_field = schema.get_field("prefix_edge").unwrap();
|
||||||
@@ -158,25 +151,17 @@ impl Searcher for SearcherService {
|
|||||||
{
|
{
|
||||||
let mut must_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
let mut must_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||||
for word in &words {
|
for word in &words {
|
||||||
let edge_term =
|
let edge_term = Term::from_field_text(prefix_edge_field, word);
|
||||||
Term::from_field_text(prefix_edge_field, word);
|
let full_term = Term::from_field_text(prefix_full_field, word);
|
||||||
let full_term =
|
|
||||||
Term::from_field_text(prefix_full_field, word);
|
|
||||||
|
|
||||||
let per_word_query = BooleanQuery::new(vec![
|
let per_word_query = BooleanQuery::new(vec![
|
||||||
(
|
(
|
||||||
Occur::Should,
|
Occur::Should,
|
||||||
Box::new(TermQuery::new(
|
Box::new(TermQuery::new(edge_term, IndexRecordOption::Basic)),
|
||||||
edge_term,
|
|
||||||
IndexRecordOption::Basic,
|
|
||||||
)),
|
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
Occur::Should,
|
Occur::Should,
|
||||||
Box::new(TermQuery::new(
|
Box::new(TermQuery::new(full_term, IndexRecordOption::Basic)),
|
||||||
full_term,
|
|
||||||
IndexRecordOption::Basic,
|
|
||||||
)),
|
|
||||||
),
|
),
|
||||||
]);
|
]);
|
||||||
must_clauses.push((Occur::Must, Box::new(per_word_query) as Box<dyn Query>));
|
must_clauses.push((Occur::Must, Box::new(per_word_query) as Box<dyn Query>));
|
||||||
@@ -184,8 +169,7 @@ impl Searcher for SearcherService {
|
|||||||
|
|
||||||
if !must_clauses.is_empty() {
|
if !must_clauses.is_empty() {
|
||||||
let prefix_query = BooleanQuery::new(must_clauses);
|
let prefix_query = BooleanQuery::new(must_clauses);
|
||||||
let boosted_query =
|
let boosted_query = BoostQuery::new(Box::new(prefix_query), 4.0);
|
||||||
BoostQuery::new(Box::new(prefix_query), 4.0);
|
|
||||||
query_layers.push((Occur::Should, Box::new(boosted_query)));
|
query_layers.push((Occur::Should, Box::new(boosted_query)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -195,8 +179,7 @@ impl Searcher for SearcherService {
|
|||||||
// ===============================
|
// ===============================
|
||||||
{
|
{
|
||||||
let last_word = words.last().unwrap();
|
let last_word = words.last().unwrap();
|
||||||
let fuzzy_term =
|
let fuzzy_term = Term::from_field_text(prefix_full_field, last_word);
|
||||||
Term::from_field_text(prefix_full_field, last_word);
|
|
||||||
let fuzzy_query = FuzzyTermQuery::new(fuzzy_term, 2, true);
|
let fuzzy_query = FuzzyTermQuery::new(fuzzy_term, 2, true);
|
||||||
let boosted_query = BoostQuery::new(Box::new(fuzzy_query), 3.0);
|
let boosted_query = BoostQuery::new(Box::new(fuzzy_query), 3.0);
|
||||||
query_layers.push((Occur::Should, Box::new(boosted_query)));
|
query_layers.push((Occur::Should, Box::new(boosted_query)));
|
||||||
@@ -206,8 +189,7 @@ impl Searcher for SearcherService {
|
|||||||
// LAYER 3: PHRASE MATCHING WITH SLOP (MEDIUM PRIORITY, Boost: 2.0)
|
// LAYER 3: PHRASE MATCHING WITH SLOP (MEDIUM PRIORITY, Boost: 2.0)
|
||||||
// ===============================
|
// ===============================
|
||||||
if words.len() > 1 {
|
if words.len() > 1 {
|
||||||
let slop_parser =
|
let slop_parser = QueryParser::for_index(&index, vec![prefix_full_field]);
|
||||||
QueryParser::for_index(&index, vec![prefix_full_field]);
|
|
||||||
let slop_query_str = format!("\"{}\"~3", normalized_query);
|
let slop_query_str = format!("\"{}\"~3", normalized_query);
|
||||||
if let Ok(slop_query) = slop_parser.parse_query(&slop_query_str) {
|
if let Ok(slop_query) = slop_parser.parse_query(&slop_query_str) {
|
||||||
let boosted_query = BoostQuery::new(slop_query, 2.0);
|
let boosted_query = BoostQuery::new(slop_query, 2.0);
|
||||||
@@ -219,11 +201,8 @@ impl Searcher for SearcherService {
|
|||||||
// LAYER 4: NGRAM SUBSTRING MATCHING (LOWEST PRIORITY, Boost: 1.0)
|
// LAYER 4: NGRAM SUBSTRING MATCHING (LOWEST PRIORITY, Boost: 1.0)
|
||||||
// ===============================
|
// ===============================
|
||||||
{
|
{
|
||||||
let ngram_parser =
|
let ngram_parser = QueryParser::for_index(&index, vec![text_ngram_field]);
|
||||||
QueryParser::for_index(&index, vec![text_ngram_field]);
|
if let Ok(ngram_query) = ngram_parser.parse_query(&normalized_query) {
|
||||||
if let Ok(ngram_query) =
|
|
||||||
ngram_parser.parse_query(&normalized_query)
|
|
||||||
{
|
|
||||||
let boosted_query = BoostQuery::new(ngram_query, 1.0);
|
let boosted_query = BoostQuery::new(ngram_query, 1.0);
|
||||||
query_layers.push((Occur::Should, Box::new(boosted_query)));
|
query_layers.push((Occur::Should, Box::new(boosted_query)));
|
||||||
}
|
}
|
||||||
@@ -244,9 +223,9 @@ impl Searcher for SearcherService {
|
|||||||
// Step 1: Extract (score, pg_id) from Tantivy results.
|
// Step 1: Extract (score, pg_id) from Tantivy results.
|
||||||
let mut scored_ids: Vec<(f32, u64)> = Vec::new();
|
let mut scored_ids: Vec<(f32, u64)> = Vec::new();
|
||||||
for (score, doc_address) in top_docs {
|
for (score, doc_address) in top_docs {
|
||||||
let doc: TantivyDocument = searcher.doc(doc_address).map_err(|e| {
|
let doc: TantivyDocument = searcher
|
||||||
Status::internal(format!("Failed to retrieve document: {}", e))
|
.doc(doc_address)
|
||||||
})?;
|
.map_err(|e| Status::internal(format!("Failed to retrieve document: {}", e)))?;
|
||||||
if let Some(pg_id_value) = doc.get_first(pg_id_field) {
|
if let Some(pg_id_value) = doc.get_first(pg_id_field) {
|
||||||
if let Some(pg_id) = pg_id_value.as_u64() {
|
if let Some(pg_id) = pg_id_value.as_u64() {
|
||||||
scored_ids.push((score, pg_id));
|
scored_ids.push((score, pg_id));
|
||||||
@@ -255,8 +234,7 @@ impl Searcher for SearcherService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Step 2: Fetch all corresponding rows from Postgres in a single query.
|
// Step 2: Fetch all corresponding rows from Postgres in a single query.
|
||||||
let pg_ids: Vec<i64> =
|
let pg_ids: Vec<i64> = scored_ids.iter().map(|(_, id)| *id as i64).collect();
|
||||||
scored_ids.iter().map(|(_, id)| *id as i64).collect();
|
|
||||||
let qualified_table = format!("gen.\"{}\"", table_name);
|
let qualified_table = format!("gen.\"{}\"", table_name);
|
||||||
let query_str = format!(
|
let query_str = format!(
|
||||||
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE id = ANY($1)",
|
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE id = ANY($1)",
|
||||||
@@ -267,9 +245,7 @@ impl Searcher for SearcherService {
|
|||||||
.bind(&pg_ids)
|
.bind(&pg_ids)
|
||||||
.fetch_all(&self.pool)
|
.fetch_all(&self.pool)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| {
|
.map_err(|e| Status::internal(format!("Database query failed: {}", e)))?;
|
||||||
Status::internal(format!("Database query failed: {}", e))
|
|
||||||
})?;
|
|
||||||
|
|
||||||
// Step 3: Map the database results by ID for quick lookup.
|
// Step 3: Map the database results by ID for quick lookup.
|
||||||
let mut content_map: HashMap<i64, String> = HashMap::new();
|
let mut content_map: HashMap<i64, String> = HashMap::new();
|
||||||
@@ -284,17 +260,18 @@ impl Searcher for SearcherService {
|
|||||||
let hits: Vec<Hit> = scored_ids
|
let hits: Vec<Hit> = scored_ids
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|(score, pg_id)| {
|
.filter_map(|(score, pg_id)| {
|
||||||
content_map
|
content_map.get(&(pg_id as i64)).map(|content_json| Hit {
|
||||||
.get(&(pg_id as i64))
|
id: pg_id as i64,
|
||||||
.map(|content_json| Hit {
|
score,
|
||||||
id: pg_id as i64,
|
content_json: content_json.clone(),
|
||||||
score,
|
})
|
||||||
content_json: content_json.clone(),
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
info!("--- SERVER: Successfully processed search. Returning {} hits. ---", hits.len());
|
info!(
|
||||||
|
"--- SERVER: Successfully processed search. Returning {} hits. ---",
|
||||||
|
hits.len()
|
||||||
|
);
|
||||||
|
|
||||||
let response = SearchResponse { hits };
|
let response = SearchResponse { hits };
|
||||||
Ok(Response::new(response))
|
Ok(Response::new(response))
|
||||||
|
|||||||
2
server
2
server
Submodule server updated: a398d72145...515f9932f8
Reference in New Issue
Block a user