rust fmt and server clippy changes

This commit is contained in:
Priec
2025-12-21 00:15:26 +01:00
parent a506cd8f08
commit e5ce96e210
5 changed files with 60 additions and 84 deletions

View File

@@ -37,7 +37,6 @@ pub mod proto {
pub mod table_validation { pub mod table_validation {
include!("proto/komp_ac.table_validation.rs"); include!("proto/komp_ac.table_validation.rs");
} }
pub const FILE_DESCRIPTOR_SET: &[u8] = pub const FILE_DESCRIPTOR_SET: &[u8] = include_bytes!("proto/descriptor.bin");
include_bytes!("proto/descriptor.bin");
} }
} }

View File

@@ -48,8 +48,7 @@ pub fn register_slovak_tokenizers(index: &Index) -> tantivy::Result<()> {
let tokenizer_manager = index.tokenizers(); let tokenizer_manager = index.tokenizers();
// TOKENIZER for `prefix_edge`: Edge N-gram (1-4 chars) // TOKENIZER for `prefix_edge`: Edge N-gram (1-4 chars)
let edge_tokenizer = let edge_tokenizer = TextAnalyzer::builder(NgramTokenizer::new(1, 4, true)?)
TextAnalyzer::builder(NgramTokenizer::new(1, 4, true)?)
.filter(RemoveLongFilter::limit(40)) .filter(RemoveLongFilter::limit(40))
.filter(LowerCaser) .filter(LowerCaser)
.filter(AsciiFoldingFilter) .filter(AsciiFoldingFilter)
@@ -57,8 +56,7 @@ pub fn register_slovak_tokenizers(index: &Index) -> tantivy::Result<()> {
tokenizer_manager.register("slovak_prefix_edge", edge_tokenizer); tokenizer_manager.register("slovak_prefix_edge", edge_tokenizer);
// TOKENIZER for `prefix_full`: Simple word tokenizer // TOKENIZER for `prefix_full`: Simple word tokenizer
let full_tokenizer = let full_tokenizer = TextAnalyzer::builder(SimpleTokenizer::default())
TextAnalyzer::builder(SimpleTokenizer::default())
.filter(RemoveLongFilter::limit(40)) .filter(RemoveLongFilter::limit(40))
.filter(LowerCaser) .filter(LowerCaser)
.filter(AsciiFoldingFilter) .filter(AsciiFoldingFilter)
@@ -66,8 +64,7 @@ pub fn register_slovak_tokenizers(index: &Index) -> tantivy::Result<()> {
tokenizer_manager.register("slovak_prefix_full", full_tokenizer); tokenizer_manager.register("slovak_prefix_full", full_tokenizer);
// NGRAM TOKENIZER: For substring matching. // NGRAM TOKENIZER: For substring matching.
let ngram_tokenizer = let ngram_tokenizer = TextAnalyzer::builder(NgramTokenizer::new(3, 3, false)?)
TextAnalyzer::builder(NgramTokenizer::new(3, 3, false)?)
.filter(RemoveLongFilter::limit(40)) .filter(RemoveLongFilter::limit(40))
.filter(LowerCaser) .filter(LowerCaser)
.filter(AsciiFoldingFilter) .filter(AsciiFoldingFilter)

View File

@@ -21,6 +21,9 @@
clippy clippy
cargo-watch cargo-watch
rust-analyzer rust-analyzer
cargo-tarpaulin
cargo-flamegraph
rust-code-analysis
# C build tools (for your linker issue) # C build tools (for your linker issue)
gcc gcc

View File

@@ -4,18 +4,15 @@ use std::collections::HashMap;
use std::path::Path; use std::path::Path;
use tantivy::collector::TopDocs; use tantivy::collector::TopDocs;
use tantivy::query::{ use tantivy::query::{
BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, Query, QueryParser, BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, Query, QueryParser, TermQuery,
TermQuery,
}; };
use tantivy::schema::{IndexRecordOption, Value}; use tantivy::schema::{IndexRecordOption, Value};
use tantivy::{Index, TantivyDocument, Term}; use tantivy::{Index, TantivyDocument, Term};
use tonic::{Request, Response, Status}; use tonic::{Request, Response, Status};
use common::proto::komp_ac::search::{
search_response::Hit, SearchRequest, SearchResponse,
};
pub use common::proto::komp_ac::search::searcher_server::SearcherServer;
use common::proto::komp_ac::search::searcher_server::Searcher; use common::proto::komp_ac::search::searcher_server::Searcher;
pub use common::proto::komp_ac::search::searcher_server::SearcherServer;
use common::proto::komp_ac::search::{search_response::Hit, SearchRequest, SearchResponse};
use common::search::register_slovak_tokenizers; use common::search::register_slovak_tokenizers;
use sqlx::{PgPool, Row}; use sqlx::{PgPool, Row};
use tracing::info; use tracing::info;
@@ -86,22 +83,15 @@ impl Searcher for SearcherService {
qualified_table qualified_table
); );
let rows = sqlx::query(&sql) let rows = sqlx::query(&sql).fetch_all(&self.pool).await.map_err(|e| {
.fetch_all(&self.pool) Status::internal(format!("DB query for default results failed: {}", e))
.await
.map_err(|e| {
Status::internal(format!(
"DB query for default results failed: {}",
e
))
})?; })?;
let hits: Vec<Hit> = rows let hits: Vec<Hit> = rows
.into_iter() .into_iter()
.map(|row| { .map(|row| {
let id: i64 = row.try_get("id").unwrap_or_default(); let id: i64 = row.try_get("id").unwrap_or_default();
let json_data: serde_json::Value = let json_data: serde_json::Value = row.try_get("data").unwrap_or_default();
row.try_get("data").unwrap_or_default();
Hit { Hit {
id, id,
// Score is 0.0 as this is not a relevance-ranked search // Score is 0.0 as this is not a relevance-ranked search
@@ -111,7 +101,10 @@ impl Searcher for SearcherService {
}) })
.collect(); .collect();
info!("--- SERVER: Successfully processed empty query. Returning {} default hits. ---", hits.len()); info!(
"--- SERVER: Successfully processed empty query. Returning {} default hits. ---",
hits.len()
);
return Ok(Response::new(SearchResponse { hits })); return Ok(Response::new(SearchResponse { hits }));
} }
// --- END OF MODIFIED LOGIC --- // --- END OF MODIFIED LOGIC ---
@@ -131,15 +124,15 @@ impl Searcher for SearcherService {
Status::internal(format!("Failed to register Slovak tokenizers: {}", e)) Status::internal(format!("Failed to register Slovak tokenizers: {}", e))
})?; })?;
let reader = index.reader().map_err(|e| { let reader = index
Status::internal(format!("Failed to create index reader: {}", e)) .reader()
})?; .map_err(|e| Status::internal(format!("Failed to create index reader: {}", e)))?;
let searcher = reader.searcher(); let searcher = reader.searcher();
let schema = index.schema(); let schema = index.schema();
let pg_id_field = schema.get_field("pg_id").map_err(|_| { let pg_id_field = schema
Status::internal("Schema is missing the 'pg_id' field.") .get_field("pg_id")
})?; .map_err(|_| Status::internal("Schema is missing the 'pg_id' field."))?;
// --- Query Building Logic (no changes here) --- // --- Query Building Logic (no changes here) ---
let prefix_edge_field = schema.get_field("prefix_edge").unwrap(); let prefix_edge_field = schema.get_field("prefix_edge").unwrap();
@@ -158,25 +151,17 @@ impl Searcher for SearcherService {
{ {
let mut must_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new(); let mut must_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
for word in &words { for word in &words {
let edge_term = let edge_term = Term::from_field_text(prefix_edge_field, word);
Term::from_field_text(prefix_edge_field, word); let full_term = Term::from_field_text(prefix_full_field, word);
let full_term =
Term::from_field_text(prefix_full_field, word);
let per_word_query = BooleanQuery::new(vec![ let per_word_query = BooleanQuery::new(vec![
( (
Occur::Should, Occur::Should,
Box::new(TermQuery::new( Box::new(TermQuery::new(edge_term, IndexRecordOption::Basic)),
edge_term,
IndexRecordOption::Basic,
)),
), ),
( (
Occur::Should, Occur::Should,
Box::new(TermQuery::new( Box::new(TermQuery::new(full_term, IndexRecordOption::Basic)),
full_term,
IndexRecordOption::Basic,
)),
), ),
]); ]);
must_clauses.push((Occur::Must, Box::new(per_word_query) as Box<dyn Query>)); must_clauses.push((Occur::Must, Box::new(per_word_query) as Box<dyn Query>));
@@ -184,8 +169,7 @@ impl Searcher for SearcherService {
if !must_clauses.is_empty() { if !must_clauses.is_empty() {
let prefix_query = BooleanQuery::new(must_clauses); let prefix_query = BooleanQuery::new(must_clauses);
let boosted_query = let boosted_query = BoostQuery::new(Box::new(prefix_query), 4.0);
BoostQuery::new(Box::new(prefix_query), 4.0);
query_layers.push((Occur::Should, Box::new(boosted_query))); query_layers.push((Occur::Should, Box::new(boosted_query)));
} }
} }
@@ -195,8 +179,7 @@ impl Searcher for SearcherService {
// =============================== // ===============================
{ {
let last_word = words.last().unwrap(); let last_word = words.last().unwrap();
let fuzzy_term = let fuzzy_term = Term::from_field_text(prefix_full_field, last_word);
Term::from_field_text(prefix_full_field, last_word);
let fuzzy_query = FuzzyTermQuery::new(fuzzy_term, 2, true); let fuzzy_query = FuzzyTermQuery::new(fuzzy_term, 2, true);
let boosted_query = BoostQuery::new(Box::new(fuzzy_query), 3.0); let boosted_query = BoostQuery::new(Box::new(fuzzy_query), 3.0);
query_layers.push((Occur::Should, Box::new(boosted_query))); query_layers.push((Occur::Should, Box::new(boosted_query)));
@@ -206,8 +189,7 @@ impl Searcher for SearcherService {
// LAYER 3: PHRASE MATCHING WITH SLOP (MEDIUM PRIORITY, Boost: 2.0) // LAYER 3: PHRASE MATCHING WITH SLOP (MEDIUM PRIORITY, Boost: 2.0)
// =============================== // ===============================
if words.len() > 1 { if words.len() > 1 {
let slop_parser = let slop_parser = QueryParser::for_index(&index, vec![prefix_full_field]);
QueryParser::for_index(&index, vec![prefix_full_field]);
let slop_query_str = format!("\"{}\"~3", normalized_query); let slop_query_str = format!("\"{}\"~3", normalized_query);
if let Ok(slop_query) = slop_parser.parse_query(&slop_query_str) { if let Ok(slop_query) = slop_parser.parse_query(&slop_query_str) {
let boosted_query = BoostQuery::new(slop_query, 2.0); let boosted_query = BoostQuery::new(slop_query, 2.0);
@@ -219,11 +201,8 @@ impl Searcher for SearcherService {
// LAYER 4: NGRAM SUBSTRING MATCHING (LOWEST PRIORITY, Boost: 1.0) // LAYER 4: NGRAM SUBSTRING MATCHING (LOWEST PRIORITY, Boost: 1.0)
// =============================== // ===============================
{ {
let ngram_parser = let ngram_parser = QueryParser::for_index(&index, vec![text_ngram_field]);
QueryParser::for_index(&index, vec![text_ngram_field]); if let Ok(ngram_query) = ngram_parser.parse_query(&normalized_query) {
if let Ok(ngram_query) =
ngram_parser.parse_query(&normalized_query)
{
let boosted_query = BoostQuery::new(ngram_query, 1.0); let boosted_query = BoostQuery::new(ngram_query, 1.0);
query_layers.push((Occur::Should, Box::new(boosted_query))); query_layers.push((Occur::Should, Box::new(boosted_query)));
} }
@@ -244,9 +223,9 @@ impl Searcher for SearcherService {
// Step 1: Extract (score, pg_id) from Tantivy results. // Step 1: Extract (score, pg_id) from Tantivy results.
let mut scored_ids: Vec<(f32, u64)> = Vec::new(); let mut scored_ids: Vec<(f32, u64)> = Vec::new();
for (score, doc_address) in top_docs { for (score, doc_address) in top_docs {
let doc: TantivyDocument = searcher.doc(doc_address).map_err(|e| { let doc: TantivyDocument = searcher
Status::internal(format!("Failed to retrieve document: {}", e)) .doc(doc_address)
})?; .map_err(|e| Status::internal(format!("Failed to retrieve document: {}", e)))?;
if let Some(pg_id_value) = doc.get_first(pg_id_field) { if let Some(pg_id_value) = doc.get_first(pg_id_field) {
if let Some(pg_id) = pg_id_value.as_u64() { if let Some(pg_id) = pg_id_value.as_u64() {
scored_ids.push((score, pg_id)); scored_ids.push((score, pg_id));
@@ -255,8 +234,7 @@ impl Searcher for SearcherService {
} }
// Step 2: Fetch all corresponding rows from Postgres in a single query. // Step 2: Fetch all corresponding rows from Postgres in a single query.
let pg_ids: Vec<i64> = let pg_ids: Vec<i64> = scored_ids.iter().map(|(_, id)| *id as i64).collect();
scored_ids.iter().map(|(_, id)| *id as i64).collect();
let qualified_table = format!("gen.\"{}\"", table_name); let qualified_table = format!("gen.\"{}\"", table_name);
let query_str = format!( let query_str = format!(
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE id = ANY($1)", "SELECT id, to_jsonb(t) AS data FROM {} t WHERE id = ANY($1)",
@@ -267,9 +245,7 @@ impl Searcher for SearcherService {
.bind(&pg_ids) .bind(&pg_ids)
.fetch_all(&self.pool) .fetch_all(&self.pool)
.await .await
.map_err(|e| { .map_err(|e| Status::internal(format!("Database query failed: {}", e)))?;
Status::internal(format!("Database query failed: {}", e))
})?;
// Step 3: Map the database results by ID for quick lookup. // Step 3: Map the database results by ID for quick lookup.
let mut content_map: HashMap<i64, String> = HashMap::new(); let mut content_map: HashMap<i64, String> = HashMap::new();
@@ -284,9 +260,7 @@ impl Searcher for SearcherService {
let hits: Vec<Hit> = scored_ids let hits: Vec<Hit> = scored_ids
.into_iter() .into_iter()
.filter_map(|(score, pg_id)| { .filter_map(|(score, pg_id)| {
content_map content_map.get(&(pg_id as i64)).map(|content_json| Hit {
.get(&(pg_id as i64))
.map(|content_json| Hit {
id: pg_id as i64, id: pg_id as i64,
score, score,
content_json: content_json.clone(), content_json: content_json.clone(),
@@ -294,7 +268,10 @@ impl Searcher for SearcherService {
}) })
.collect(); .collect();
info!("--- SERVER: Successfully processed search. Returning {} hits. ---", hits.len()); info!(
"--- SERVER: Successfully processed search. Returning {} hits. ---",
hits.len()
);
let response = SearchResponse { hits }; let response = SearchResponse { hits };
Ok(Response::new(response)) Ok(Response::new(response))

2
server

Submodule server updated: a398d72145...515f9932f8