better search but still has some flaws. It at least works, even tho its not perfect. Needs more testing, but im pretty happy with it rn, keeping it this way

This commit is contained in:
filipriec
2025-06-10 00:22:31 +02:00
parent 4760f42589
commit 350c522d19
3 changed files with 202 additions and 91 deletions

View File

@@ -1,11 +1,11 @@
// src/indexer.rs
// server/src/indexer.rs
use std::path::Path;
use sqlx::{PgPool, Row};
use tantivy::schema::{Schema, Term};
use tantivy::{doc, Index, IndexWriter};
use tantivy::schema::Term;
use tantivy::{doc, IndexWriter};
use tokio::sync::mpsc::Receiver;
use tracing::{error, info, warn};
use tantivy::schema::Schema;
use crate::search_schema;
const INDEX_DIR: &str = "./tantivy_indexes";
@@ -49,44 +49,39 @@ async fn handle_add_or_update(
pool: &PgPool,
data: IndexCommandData,
) -> anyhow::Result<()> {
// 1. Fetch the full row data from PostgreSQL
let qualified_table = format!("gen.\"{}\"", data.table_name);
let query_str = format!(
"SELECT to_jsonb(t) AS data FROM {} t WHERE id = $1",
qualified_table
);
let row = sqlx::query(&query_str)
.bind(data.row_id)
.fetch_one(pool)
.await?;
let json_data: serde_json::Value = row.try_get("data")?;
// 2. Extract all text content for Slovak processing
let slovak_text = extract_text_content(&json_data);
// 3. Open the index and write the document
let (mut writer, schema) = get_index_writer(&data.table_name)?;
let pg_id_field = schema.get_field("pg_id").unwrap();
let text_sk_field = schema.get_field("text_sk").unwrap();
let prefix_edge_field = schema.get_field("prefix_edge").unwrap();
let prefix_full_field = schema.get_field("prefix_full").unwrap();
let text_ngram_field = schema.get_field("text_ngram").unwrap();
// First, delete any existing document with this ID to handle updates
let id_term = Term::from_field_u64(pg_id_field, data.row_id as u64);
writer.delete_term(id_term);
// Add the new document
writer.add_document(doc!(
pg_id_field => data.row_id as u64,
text_sk_field => slovak_text
prefix_edge_field => slovak_text.clone(),
prefix_full_field => slovak_text.clone(),
text_ngram_field => slovak_text
))?;
// 4. Commit changes
writer.commit()?;
info!(
"Successfully indexed Slovak document id:{} for table:{}",
"Successfully indexed document id:{} for table:{}",
data.row_id, data.table_name
);
Ok(())
}
@@ -123,7 +118,7 @@ fn get_index_writer(
/// Extract all text content from a JSON object for indexing
fn extract_text_content(json_data: &serde_json::Value) -> String {
let mut full_text = String::new();
if let Some(obj) = json_data.as_object() {
for value in obj.values() {
match value {
@@ -135,11 +130,10 @@ fn extract_text_content(json_data: &serde_json::Value) -> String {
full_text.push_str(&n.to_string());
full_text.push(' ');
}
// We could recursively handle nested objects if needed
_ => {}
}
}
}
full_text.trim().to_string()
}