working v12

This commit is contained in:
Priec
2026-05-17 13:10:44 +02:00
parent 6a87750329
commit dc273506b7
9 changed files with 121 additions and 50 deletions

View File

@@ -1,8 +1,8 @@
use std::path::{Path, PathBuf};
use tantivy::schema::{
Field, IndexRecordOption, JsonObjectOptions, Schema, Term, TextFieldIndexing, INDEXED, STORED,
STRING,
Field, IndexRecordOption, JsonObjectOptions, Schema, Term, TextFieldIndexing, TextOptions,
INDEXED, STORED, STRING,
};
use tantivy::tokenizer::{
AsciiFoldingFilter, LowerCaser, NgramTokenizer, RawTokenizer, RemoveLongFilter,
@@ -13,6 +13,7 @@ use tantivy::Index;
pub const F_PG_ID: &str = "pg_id";
pub const F_TABLE_NAME: &str = "table_name";
pub const F_ROW_KEY: &str = "row_key";
pub const F_ALL_TEXT: &str = "all_text";
pub const F_DATA_WORD: &str = "data_word";
pub const F_DATA_NGRAM: &str = "data_ngram";
pub const F_DATA_EXACT: &str = "data_exact";
@@ -59,6 +60,7 @@ pub fn create_search_schema() -> Schema {
schema_builder.add_u64_field(F_PG_ID, INDEXED | STORED);
schema_builder.add_text_field(F_TABLE_NAME, STRING | STORED);
schema_builder.add_text_field(F_ROW_KEY, STRING | STORED);
schema_builder.add_text_field(F_ALL_TEXT, text_options(TOK_WORD));
schema_builder.add_json_field(F_DATA_WORD, json_options(TOK_WORD, true, false));
schema_builder.add_json_field(F_DATA_NGRAM, json_options(TOK_NGRAM, true, false));
@@ -67,6 +69,14 @@ pub fn create_search_schema() -> Schema {
schema_builder.build()
}
fn text_options(tokenizer_name: &str) -> TextOptions {
let indexing = TextFieldIndexing::default()
.set_tokenizer(tokenizer_name)
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
TextOptions::default().set_indexing_options(indexing)
}
fn json_options(tokenizer_name: &str, with_positions: bool, stored: bool) -> JsonObjectOptions {
let index_option = if with_positions {
IndexRecordOption::WithFreqsAndPositions
@@ -153,6 +163,7 @@ pub struct SchemaFields {
pub pg_id: Field,
pub table_name: Field,
pub row_key: Field,
pub all_text: Field,
pub data_word: Field,
pub data_ngram: Field,
pub data_exact: Field,
@@ -164,6 +175,7 @@ impl SchemaFields {
pg_id: get_field(schema, F_PG_ID)?,
table_name: get_field(schema, F_TABLE_NAME)?,
row_key: get_field(schema, F_ROW_KEY)?,
all_text: get_field(schema, F_ALL_TEXT)?,
data_word: get_field(schema, F_DATA_WORD)?,
data_ngram: get_field(schema, F_DATA_NGRAM)?,
data_exact: get_field(schema, F_DATA_EXACT)?,