From e5ce96e210412c48bcbe2ca9c1119ef04dbe84f4 Mon Sep 17 00:00:00 2001 From: Priec Date: Sun, 21 Dec 2025 00:15:26 +0100 Subject: [PATCH] rust fmt and server clippy changes --- common/src/lib.rs | 3 +- common/src/search.rs | 33 +++++++------- flake.nix | 3 ++ search/src/lib.rs | 103 +++++++++++++++++-------------------------- server | 2 +- 5 files changed, 60 insertions(+), 84 deletions(-) diff --git a/common/src/lib.rs b/common/src/lib.rs index 938155f..517b0da 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -37,7 +37,6 @@ pub mod proto { pub mod table_validation { include!("proto/komp_ac.table_validation.rs"); } - pub const FILE_DESCRIPTOR_SET: &[u8] = - include_bytes!("proto/descriptor.bin"); + pub const FILE_DESCRIPTOR_SET: &[u8] = include_bytes!("proto/descriptor.bin"); } } diff --git a/common/src/search.rs b/common/src/search.rs index 6413a90..ff25dd3 100644 --- a/common/src/search.rs +++ b/common/src/search.rs @@ -48,30 +48,27 @@ pub fn register_slovak_tokenizers(index: &Index) -> tantivy::Result<()> { let tokenizer_manager = index.tokenizers(); // TOKENIZER for `prefix_edge`: Edge N-gram (1-4 chars) - let edge_tokenizer = - TextAnalyzer::builder(NgramTokenizer::new(1, 4, true)?) - .filter(RemoveLongFilter::limit(40)) - .filter(LowerCaser) - .filter(AsciiFoldingFilter) - .build(); + let edge_tokenizer = TextAnalyzer::builder(NgramTokenizer::new(1, 4, true)?) + .filter(RemoveLongFilter::limit(40)) + .filter(LowerCaser) + .filter(AsciiFoldingFilter) + .build(); tokenizer_manager.register("slovak_prefix_edge", edge_tokenizer); // TOKENIZER for `prefix_full`: Simple word tokenizer - let full_tokenizer = - TextAnalyzer::builder(SimpleTokenizer::default()) - .filter(RemoveLongFilter::limit(40)) - .filter(LowerCaser) - .filter(AsciiFoldingFilter) - .build(); + let full_tokenizer = TextAnalyzer::builder(SimpleTokenizer::default()) + .filter(RemoveLongFilter::limit(40)) + .filter(LowerCaser) + .filter(AsciiFoldingFilter) + .build(); tokenizer_manager.register("slovak_prefix_full", full_tokenizer); // NGRAM TOKENIZER: For substring matching. - let ngram_tokenizer = - TextAnalyzer::builder(NgramTokenizer::new(3, 3, false)?) - .filter(RemoveLongFilter::limit(40)) - .filter(LowerCaser) - .filter(AsciiFoldingFilter) - .build(); + let ngram_tokenizer = TextAnalyzer::builder(NgramTokenizer::new(3, 3, false)?) + .filter(RemoveLongFilter::limit(40)) + .filter(LowerCaser) + .filter(AsciiFoldingFilter) + .build(); tokenizer_manager.register("slovak_ngram", ngram_tokenizer); Ok(()) diff --git a/flake.nix b/flake.nix index 770726c..6e44b6e 100644 --- a/flake.nix +++ b/flake.nix @@ -21,6 +21,9 @@ clippy cargo-watch rust-analyzer + cargo-tarpaulin + cargo-flamegraph + rust-code-analysis # C build tools (for your linker issue) gcc diff --git a/search/src/lib.rs b/search/src/lib.rs index b9ea66b..b17a5b8 100644 --- a/search/src/lib.rs +++ b/search/src/lib.rs @@ -4,18 +4,15 @@ use std::collections::HashMap; use std::path::Path; use tantivy::collector::TopDocs; use tantivy::query::{ - BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, Query, QueryParser, - TermQuery, + BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, Query, QueryParser, TermQuery, }; use tantivy::schema::{IndexRecordOption, Value}; use tantivy::{Index, TantivyDocument, Term}; use tonic::{Request, Response, Status}; -use common::proto::komp_ac::search::{ - search_response::Hit, SearchRequest, SearchResponse, -}; -pub use common::proto::komp_ac::search::searcher_server::SearcherServer; use common::proto::komp_ac::search::searcher_server::Searcher; +pub use common::proto::komp_ac::search::searcher_server::SearcherServer; +use common::proto::komp_ac::search::{search_response::Hit, SearchRequest, SearchResponse}; use common::search::register_slovak_tokenizers; use sqlx::{PgPool, Row}; use tracing::info; @@ -86,22 +83,15 @@ impl Searcher for SearcherService { qualified_table ); - let rows = sqlx::query(&sql) - .fetch_all(&self.pool) - .await - .map_err(|e| { - Status::internal(format!( - "DB query for default results failed: {}", - e - )) - })?; + let rows = sqlx::query(&sql).fetch_all(&self.pool).await.map_err(|e| { + Status::internal(format!("DB query for default results failed: {}", e)) + })?; let hits: Vec = rows .into_iter() .map(|row| { let id: i64 = row.try_get("id").unwrap_or_default(); - let json_data: serde_json::Value = - row.try_get("data").unwrap_or_default(); + let json_data: serde_json::Value = row.try_get("data").unwrap_or_default(); Hit { id, // Score is 0.0 as this is not a relevance-ranked search @@ -111,7 +101,10 @@ impl Searcher for SearcherService { }) .collect(); - info!("--- SERVER: Successfully processed empty query. Returning {} default hits. ---", hits.len()); + info!( + "--- SERVER: Successfully processed empty query. Returning {} default hits. ---", + hits.len() + ); return Ok(Response::new(SearchResponse { hits })); } // --- END OF MODIFIED LOGIC --- @@ -131,15 +124,15 @@ impl Searcher for SearcherService { Status::internal(format!("Failed to register Slovak tokenizers: {}", e)) })?; - let reader = index.reader().map_err(|e| { - Status::internal(format!("Failed to create index reader: {}", e)) - })?; + let reader = index + .reader() + .map_err(|e| Status::internal(format!("Failed to create index reader: {}", e)))?; let searcher = reader.searcher(); let schema = index.schema(); - let pg_id_field = schema.get_field("pg_id").map_err(|_| { - Status::internal("Schema is missing the 'pg_id' field.") - })?; + let pg_id_field = schema + .get_field("pg_id") + .map_err(|_| Status::internal("Schema is missing the 'pg_id' field."))?; // --- Query Building Logic (no changes here) --- let prefix_edge_field = schema.get_field("prefix_edge").unwrap(); @@ -158,25 +151,17 @@ impl Searcher for SearcherService { { let mut must_clauses: Vec<(Occur, Box)> = Vec::new(); for word in &words { - let edge_term = - Term::from_field_text(prefix_edge_field, word); - let full_term = - Term::from_field_text(prefix_full_field, word); + let edge_term = Term::from_field_text(prefix_edge_field, word); + let full_term = Term::from_field_text(prefix_full_field, word); let per_word_query = BooleanQuery::new(vec![ ( Occur::Should, - Box::new(TermQuery::new( - edge_term, - IndexRecordOption::Basic, - )), + Box::new(TermQuery::new(edge_term, IndexRecordOption::Basic)), ), ( Occur::Should, - Box::new(TermQuery::new( - full_term, - IndexRecordOption::Basic, - )), + Box::new(TermQuery::new(full_term, IndexRecordOption::Basic)), ), ]); must_clauses.push((Occur::Must, Box::new(per_word_query) as Box)); @@ -184,8 +169,7 @@ impl Searcher for SearcherService { if !must_clauses.is_empty() { let prefix_query = BooleanQuery::new(must_clauses); - let boosted_query = - BoostQuery::new(Box::new(prefix_query), 4.0); + let boosted_query = BoostQuery::new(Box::new(prefix_query), 4.0); query_layers.push((Occur::Should, Box::new(boosted_query))); } } @@ -195,8 +179,7 @@ impl Searcher for SearcherService { // =============================== { let last_word = words.last().unwrap(); - let fuzzy_term = - Term::from_field_text(prefix_full_field, last_word); + let fuzzy_term = Term::from_field_text(prefix_full_field, last_word); let fuzzy_query = FuzzyTermQuery::new(fuzzy_term, 2, true); let boosted_query = BoostQuery::new(Box::new(fuzzy_query), 3.0); query_layers.push((Occur::Should, Box::new(boosted_query))); @@ -206,8 +189,7 @@ impl Searcher for SearcherService { // LAYER 3: PHRASE MATCHING WITH SLOP (MEDIUM PRIORITY, Boost: 2.0) // =============================== if words.len() > 1 { - let slop_parser = - QueryParser::for_index(&index, vec![prefix_full_field]); + let slop_parser = QueryParser::for_index(&index, vec![prefix_full_field]); let slop_query_str = format!("\"{}\"~3", normalized_query); if let Ok(slop_query) = slop_parser.parse_query(&slop_query_str) { let boosted_query = BoostQuery::new(slop_query, 2.0); @@ -219,11 +201,8 @@ impl Searcher for SearcherService { // LAYER 4: NGRAM SUBSTRING MATCHING (LOWEST PRIORITY, Boost: 1.0) // =============================== { - let ngram_parser = - QueryParser::for_index(&index, vec![text_ngram_field]); - if let Ok(ngram_query) = - ngram_parser.parse_query(&normalized_query) - { + let ngram_parser = QueryParser::for_index(&index, vec![text_ngram_field]); + if let Ok(ngram_query) = ngram_parser.parse_query(&normalized_query) { let boosted_query = BoostQuery::new(ngram_query, 1.0); query_layers.push((Occur::Should, Box::new(boosted_query))); } @@ -244,9 +223,9 @@ impl Searcher for SearcherService { // Step 1: Extract (score, pg_id) from Tantivy results. let mut scored_ids: Vec<(f32, u64)> = Vec::new(); for (score, doc_address) in top_docs { - let doc: TantivyDocument = searcher.doc(doc_address).map_err(|e| { - Status::internal(format!("Failed to retrieve document: {}", e)) - })?; + let doc: TantivyDocument = searcher + .doc(doc_address) + .map_err(|e| Status::internal(format!("Failed to retrieve document: {}", e)))?; if let Some(pg_id_value) = doc.get_first(pg_id_field) { if let Some(pg_id) = pg_id_value.as_u64() { scored_ids.push((score, pg_id)); @@ -255,8 +234,7 @@ impl Searcher for SearcherService { } // Step 2: Fetch all corresponding rows from Postgres in a single query. - let pg_ids: Vec = - scored_ids.iter().map(|(_, id)| *id as i64).collect(); + let pg_ids: Vec = scored_ids.iter().map(|(_, id)| *id as i64).collect(); let qualified_table = format!("gen.\"{}\"", table_name); let query_str = format!( "SELECT id, to_jsonb(t) AS data FROM {} t WHERE id = ANY($1)", @@ -267,9 +245,7 @@ impl Searcher for SearcherService { .bind(&pg_ids) .fetch_all(&self.pool) .await - .map_err(|e| { - Status::internal(format!("Database query failed: {}", e)) - })?; + .map_err(|e| Status::internal(format!("Database query failed: {}", e)))?; // Step 3: Map the database results by ID for quick lookup. let mut content_map: HashMap = HashMap::new(); @@ -284,17 +260,18 @@ impl Searcher for SearcherService { let hits: Vec = scored_ids .into_iter() .filter_map(|(score, pg_id)| { - content_map - .get(&(pg_id as i64)) - .map(|content_json| Hit { - id: pg_id as i64, - score, - content_json: content_json.clone(), - }) + content_map.get(&(pg_id as i64)).map(|content_json| Hit { + id: pg_id as i64, + score, + content_json: content_json.clone(), + }) }) .collect(); - info!("--- SERVER: Successfully processed search. Returning {} hits. ---", hits.len()); + info!( + "--- SERVER: Successfully processed search. Returning {} hits. ---", + hits.len() + ); let response = SearchResponse { hits }; Ok(Response::new(response)) diff --git a/server b/server index a398d72..515f993 160000 --- a/server +++ b/server @@ -1 +1 @@ -Subproject commit a398d721457eab5956e6516c8fcb87c3b54e603f +Subproject commit 515f9932f84f9042a04a5b4cdff252bd7470e5c2