From 495d77fda5ae4b8a3ca8c77af031c3564eec92b7 Mon Sep 17 00:00:00 2001 From: filipriec Date: Tue, 10 Jun 2025 23:56:31 +0200 Subject: [PATCH] 4 ngram tokenizer, not doing anything elsekeeping this as is --- common/src/search.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/src/search.rs b/common/src/search.rs index 35d98ae..6413a90 100644 --- a/common/src/search.rs +++ b/common/src/search.rs @@ -10,7 +10,7 @@ pub fn create_search_schema() -> Schema { schema_builder.add_u64_field("pg_id", INDEXED | STORED); - // FIELD 1: For prefixes (1-15 chars). + // FIELD 1: For prefixes (1-4 chars). let short_prefix_indexing = TextFieldIndexing::default() .set_tokenizer("slovak_prefix_edge") .set_index_option(IndexRecordOption::WithFreqsAndPositions); @@ -47,9 +47,9 @@ pub fn create_search_schema() -> Schema { pub fn register_slovak_tokenizers(index: &Index) -> tantivy::Result<()> { let tokenizer_manager = index.tokenizers(); - // TOKENIZER for `prefix_edge`: Edge N-gram (1-15 chars) + // TOKENIZER for `prefix_edge`: Edge N-gram (1-4 chars) let edge_tokenizer = - TextAnalyzer::builder(NgramTokenizer::new(1, 15, true)?) + TextAnalyzer::builder(NgramTokenizer::new(1, 4, true)?) .filter(RemoveLongFilter::limit(40)) .filter(LowerCaser) .filter(AsciiFoldingFilter)