4 ngram tokenizer, not doing anything elsekeeping this as is

This commit is contained in:
filipriec
2025-06-10 23:56:31 +02:00
parent 679bb3b6ab
commit 495d77fda5

View File

@@ -10,7 +10,7 @@ pub fn create_search_schema() -> Schema {
schema_builder.add_u64_field("pg_id", INDEXED | STORED);
// FIELD 1: For prefixes (1-15 chars).
// FIELD 1: For prefixes (1-4 chars).
let short_prefix_indexing = TextFieldIndexing::default()
.set_tokenizer("slovak_prefix_edge")
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
@@ -47,9 +47,9 @@ pub fn create_search_schema() -> Schema {
pub fn register_slovak_tokenizers(index: &Index) -> tantivy::Result<()> {
let tokenizer_manager = index.tokenizers();
// TOKENIZER for `prefix_edge`: Edge N-gram (1-15 chars)
// TOKENIZER for `prefix_edge`: Edge N-gram (1-4 chars)
let edge_tokenizer =
TextAnalyzer::builder(NgramTokenizer::new(1, 15, true)?)
TextAnalyzer::builder(NgramTokenizer::new(1, 4, true)?)
.filter(RemoveLongFilter::limit(40))
.filter(LowerCaser)
.filter(AsciiFoldingFilter)