4 ngram tokenizer, not doing anything elsekeeping this as is
This commit is contained in:
@@ -10,7 +10,7 @@ pub fn create_search_schema() -> Schema {
|
|||||||
|
|
||||||
schema_builder.add_u64_field("pg_id", INDEXED | STORED);
|
schema_builder.add_u64_field("pg_id", INDEXED | STORED);
|
||||||
|
|
||||||
// FIELD 1: For prefixes (1-15 chars).
|
// FIELD 1: For prefixes (1-4 chars).
|
||||||
let short_prefix_indexing = TextFieldIndexing::default()
|
let short_prefix_indexing = TextFieldIndexing::default()
|
||||||
.set_tokenizer("slovak_prefix_edge")
|
.set_tokenizer("slovak_prefix_edge")
|
||||||
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
|
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
|
||||||
@@ -47,9 +47,9 @@ pub fn create_search_schema() -> Schema {
|
|||||||
pub fn register_slovak_tokenizers(index: &Index) -> tantivy::Result<()> {
|
pub fn register_slovak_tokenizers(index: &Index) -> tantivy::Result<()> {
|
||||||
let tokenizer_manager = index.tokenizers();
|
let tokenizer_manager = index.tokenizers();
|
||||||
|
|
||||||
// TOKENIZER for `prefix_edge`: Edge N-gram (1-15 chars)
|
// TOKENIZER for `prefix_edge`: Edge N-gram (1-4 chars)
|
||||||
let edge_tokenizer =
|
let edge_tokenizer =
|
||||||
TextAnalyzer::builder(NgramTokenizer::new(1, 15, true)?)
|
TextAnalyzer::builder(NgramTokenizer::new(1, 4, true)?)
|
||||||
.filter(RemoveLongFilter::limit(40))
|
.filter(RemoveLongFilter::limit(40))
|
||||||
.filter(LowerCaser)
|
.filter(LowerCaser)
|
||||||
.filter(AsciiFoldingFilter)
|
.filter(AsciiFoldingFilter)
|
||||||
|
|||||||
Reference in New Issue
Block a user