better search but still has some flaws. It at least works, even tho its not perfect. Needs more testing, but im pretty happy with it rn, keeping it this way

2025-06-10 00:22:31 +02:00
parent 4760f42589
commit 350c522d19
3 changed files with 202 additions and 91 deletions
--- a/server/src/indexer.rs
+++ b/server/src/indexer.rs
@@ -1,11 +1,11 @@
-// src/indexer.rs
+// server/src/indexer.rs

-use std::path::Path;
 use sqlx::{PgPool, Row};
-use tantivy::schema::{Schema, Term};
-use tantivy::{doc, Index, IndexWriter};
+use tantivy::schema::Term;
+use tantivy::{doc, IndexWriter};
 use tokio::sync::mpsc::Receiver;
 use tracing::{error, info, warn};
+use tantivy::schema::Schema;
 use crate::search_schema;

 const INDEX_DIR: &str = "./tantivy_indexes";
@@ -49,44 +49,39 @@ async fn handle_add_or_update(
    pool: &PgPool,
    data: IndexCommandData,
 ) -> anyhow::Result<()> {
-    // 1. Fetch the full row data from PostgreSQL
    let qualified_table = format!("gen.\"{}\"", data.table_name);
    let query_str = format!(
        "SELECT to_jsonb(t) AS data FROM {} t WHERE id = $1",
        qualified_table
    );
-
    let row = sqlx::query(&query_str)
        .bind(data.row_id)
        .fetch_one(pool)
        .await?;
    let json_data: serde_json::Value = row.try_get("data")?;
-
-    // 2. Extract all text content for Slovak processing
    let slovak_text = extract_text_content(&json_data);

-    // 3. Open the index and write the document
    let (mut writer, schema) = get_index_writer(&data.table_name)?;
    let pg_id_field = schema.get_field("pg_id").unwrap();
-    let text_sk_field = schema.get_field("text_sk").unwrap();
+    let prefix_edge_field = schema.get_field("prefix_edge").unwrap();
+    let prefix_full_field = schema.get_field("prefix_full").unwrap();
+    let text_ngram_field = schema.get_field("text_ngram").unwrap();

-    // First, delete any existing document with this ID to handle updates
    let id_term = Term::from_field_u64(pg_id_field, data.row_id as u64);
    writer.delete_term(id_term);

-    // Add the new document
    writer.add_document(doc!(
        pg_id_field => data.row_id as u64,
-        text_sk_field => slovak_text
+        prefix_edge_field => slovak_text.clone(),
+        prefix_full_field => slovak_text.clone(),
+        text_ngram_field => slovak_text
    ))?;

-    // 4. Commit changes
    writer.commit()?;
    info!(
-        "Successfully indexed Slovak document id:{} for table:{}",
+        "Successfully indexed document id:{} for table:{}",
        data.row_id, data.table_name
    );
-
    Ok(())
 }

@@ -123,7 +118,7 @@ fn get_index_writer(
 /// Extract all text content from a JSON object for indexing
 fn extract_text_content(json_data: &serde_json::Value) -> String {
    let mut full_text = String::new();
-    
+
    if let Some(obj) = json_data.as_object() {
        for value in obj.values() {
            match value {
@@ -135,11 +130,10 @@ fn extract_text_content(json_data: &serde_json::Value) -> String {
                    full_text.push_str(&n.to_string());
                    full_text.push(' ');
                }
-                // We could recursively handle nested objects if needed
                _ => {}
            }
        }
    }
-    
+
    full_text.trim().to_string()
 }