Compare commits
10 Commits
v0.6.0
...
90f8aedc3b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
90f8aedc3b | ||
|
|
2a811b1f8c | ||
|
|
1f9c29411e | ||
|
|
b928004c76 | ||
|
|
fb4769301c | ||
|
|
036e12f345 | ||
|
|
1ceab57f3b | ||
|
|
5de1cd7623 | ||
|
|
1867de513d | ||
|
|
42181499fe |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -6,3 +6,4 @@ steel_decimal/tests/property_tests.proptest-regressions
|
||||
.direnv/
|
||||
canvas/*.toml
|
||||
.aider*
|
||||
.codex
|
||||
|
||||
45
Cargo.lock
generated
45
Cargo.lock
generated
@@ -493,7 +493,7 @@ checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
|
||||
|
||||
[[package]]
|
||||
name = "canvas"
|
||||
version = "0.5.10"
|
||||
version = "0.6.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -585,7 +585,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.5.10"
|
||||
version = "0.6.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -596,6 +596,7 @@ dependencies = [
|
||||
"dotenvy",
|
||||
"futures",
|
||||
"lazy_static",
|
||||
"nucleo",
|
||||
"prost 0.13.5",
|
||||
"prost-types 0.13.5",
|
||||
"ratatui",
|
||||
@@ -614,7 +615,6 @@ dependencies = [
|
||||
"tonic",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"tui-textarea",
|
||||
"unicode-segmentation",
|
||||
"unicode-width 0.2.0",
|
||||
"uuid",
|
||||
@@ -641,7 +641,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common"
|
||||
version = "0.5.10"
|
||||
version = "0.6.3"
|
||||
dependencies = [
|
||||
"prost 0.13.5",
|
||||
"prost-build 0.14.1",
|
||||
@@ -2104,6 +2104,27 @@ dependencies = [
|
||||
"windows-sys 0.60.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nucleo"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5262af4c94921c2646c5ac6ff7900c2af9cbb08dc26a797e18130a7019c039d4"
|
||||
dependencies = [
|
||||
"nucleo-matcher",
|
||||
"parking_lot",
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nucleo-matcher"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf33f538733d1a5a3494b836ba913207f14d9d4a1d3cd67030c5061bdd2cac85"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-bigint"
|
||||
version = "0.4.6"
|
||||
@@ -3095,7 +3116,7 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
|
||||
|
||||
[[package]]
|
||||
name = "search"
|
||||
version = "0.5.10"
|
||||
version = "0.6.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"common",
|
||||
@@ -3194,7 +3215,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "server"
|
||||
version = "0.5.10"
|
||||
version = "0.6.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bcrypt",
|
||||
@@ -4414,18 +4435,6 @@ version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e78122066b0cb818b8afd08f7ed22f7fdbc3e90815035726f0840d0d26c0747a"
|
||||
|
||||
[[package]]
|
||||
name = "tui-textarea"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a5318dd619ed73c52a9417ad19046724effc1287fb75cdcc4eca1d6ac1acbae"
|
||||
dependencies = [
|
||||
"crossterm",
|
||||
"ratatui",
|
||||
"regex",
|
||||
"unicode-width 0.2.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typed-arena"
|
||||
version = "2.0.2"
|
||||
|
||||
@@ -5,7 +5,7 @@ resolver = "2"
|
||||
[workspace.package]
|
||||
# TODO: idk how to do the name, fix later
|
||||
# name = "komp_ac"
|
||||
version = "0.5.10"
|
||||
version = "0.6.3"
|
||||
edition = "2021"
|
||||
license = "GPL-3.0-or-later"
|
||||
authors = ["Filip Priečinský <filippriec@gmail.com>"]
|
||||
|
||||
2
canvas
2
canvas
Submodule canvas updated: 5227bed900...abbda5b7a9
2
client
2
client
Submodule client updated: 398d54f6e3...cdce1fc5f4
@@ -61,6 +61,26 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
".komp_ac.table_definition.TableDefinitionResponse",
|
||||
"#[derive(serde::Serialize, serde::Deserialize)]"
|
||||
)
|
||||
.type_attribute(
|
||||
".komp_ac.table_definition.GetColumnAliasRenameHistoryRequest",
|
||||
"#[derive(serde::Serialize, serde::Deserialize)]"
|
||||
)
|
||||
.type_attribute(
|
||||
".komp_ac.table_definition.ColumnAliasRenameHistoryEntry",
|
||||
"#[derive(serde::Serialize, serde::Deserialize)]"
|
||||
)
|
||||
.type_attribute(
|
||||
".komp_ac.table_definition.GetColumnAliasRenameHistoryResponse",
|
||||
"#[derive(serde::Serialize, serde::Deserialize)]"
|
||||
)
|
||||
.type_attribute(
|
||||
".komp_ac.table_definition.RenameColumnAliasRequest",
|
||||
"#[derive(serde::Serialize, serde::Deserialize)]"
|
||||
)
|
||||
.type_attribute(
|
||||
".komp_ac.table_definition.RenameColumnAliasResponse",
|
||||
"#[derive(serde::Serialize, serde::Deserialize)]"
|
||||
)
|
||||
.type_attribute(
|
||||
".komp_ac.table_script.PostTableScriptRequest",
|
||||
"#[derive(serde::Serialize, serde::Deserialize)]",
|
||||
|
||||
@@ -3,18 +3,34 @@ syntax = "proto3";
|
||||
package komp_ac.search;
|
||||
|
||||
service Searcher {
|
||||
rpc SearchTable(SearchRequest) returns (SearchResponse);
|
||||
rpc Search(SearchRequest) returns (SearchResponse);
|
||||
}
|
||||
|
||||
enum MatchMode {
|
||||
MATCH_MODE_UNSPECIFIED = 0;
|
||||
MATCH_MODE_FUZZY = 1;
|
||||
MATCH_MODE_EXACT = 2;
|
||||
}
|
||||
|
||||
message ColumnConstraint {
|
||||
string column = 1;
|
||||
string query = 2;
|
||||
MatchMode mode = 3;
|
||||
}
|
||||
|
||||
message SearchRequest {
|
||||
string table_name = 1;
|
||||
string query = 2;
|
||||
string profile_name = 1;
|
||||
optional string table_name = 2;
|
||||
string free_query = 3;
|
||||
repeated ColumnConstraint must = 4;
|
||||
optional uint32 limit = 5;
|
||||
}
|
||||
message SearchResponse {
|
||||
message Hit {
|
||||
int64 id = 1; // PostgreSQL row ID
|
||||
float score = 2;
|
||||
string content_json = 3;
|
||||
string table_name = 4;
|
||||
}
|
||||
repeated Hit hits = 1;
|
||||
}
|
||||
|
||||
@@ -18,6 +18,16 @@ service TableDefinition {
|
||||
// This provides a tree-like overview of table relationships.
|
||||
rpc GetProfileTree(komp_ac.common.Empty) returns (ProfileTreeResponse);
|
||||
|
||||
// Fetches all tables with their columns and scripts for a specific profile.
|
||||
// Pure data retrieval - no business logic.
|
||||
rpc GetProfileDetails(GetProfileDetailsRequest) returns (GetProfileDetailsResponse);
|
||||
|
||||
// Returns the stored rename history for column aliases in one profile.
|
||||
rpc GetColumnAliasRenameHistory(GetColumnAliasRenameHistoryRequest) returns (GetColumnAliasRenameHistoryResponse);
|
||||
|
||||
// Renames a user-visible column alias while keeping the physical column unchanged.
|
||||
rpc RenameColumnAlias(RenameColumnAliasRequest) returns (RenameColumnAliasResponse);
|
||||
|
||||
// Drops a table and its metadata, then deletes the profile if it becomes empty.
|
||||
rpc DeleteTable(DeleteTableRequest) returns (DeleteTableResponse);
|
||||
}
|
||||
@@ -119,6 +129,74 @@ message ProfileTreeResponse {
|
||||
repeated Profile profiles = 1;
|
||||
}
|
||||
|
||||
// Request to fetch all tables, columns and scripts for a profile.
|
||||
message GetProfileDetailsRequest {
|
||||
// Profile (schema) name to fetch details for.
|
||||
string profile_name = 1;
|
||||
}
|
||||
|
||||
// Response with all tables, columns and scripts for a profile.
|
||||
message GetProfileDetailsResponse {
|
||||
string profile_name = 1;
|
||||
repeated TableDetail tables = 2;
|
||||
}
|
||||
|
||||
// Request to fetch recorded column alias rename history for one profile.
|
||||
message GetColumnAliasRenameHistoryRequest {
|
||||
string profile_name = 1;
|
||||
|
||||
// Optional filter. When omitted, returns all tables in the profile.
|
||||
optional int64 table_definition_id = 2;
|
||||
}
|
||||
|
||||
// One recorded column alias rename.
|
||||
message ColumnAliasRenameHistoryEntry {
|
||||
int64 id = 1;
|
||||
string profile_name = 2;
|
||||
int64 table_definition_id = 3;
|
||||
string table_name = 4;
|
||||
string old_column_name = 5;
|
||||
string new_column_name = 6;
|
||||
string created_at = 7;
|
||||
}
|
||||
|
||||
// Response with stored column alias rename history rows.
|
||||
message GetColumnAliasRenameHistoryResponse {
|
||||
string profile_name = 1;
|
||||
repeated ColumnAliasRenameHistoryEntry entries = 2;
|
||||
}
|
||||
|
||||
// Describes a table with its columns and associated scripts.
|
||||
message TableDetail {
|
||||
string name = 1;
|
||||
int64 id = 2;
|
||||
repeated ColumnDefinition columns = 3;
|
||||
repeated ScriptInfo scripts = 4;
|
||||
}
|
||||
|
||||
// A script that targets a specific column in a table.
|
||||
message ScriptInfo {
|
||||
int64 script_id = 1;
|
||||
string target_column = 2;
|
||||
string target_column_type = 3;
|
||||
string script = 4;
|
||||
string description = 5;
|
||||
}
|
||||
|
||||
// Request to rename one user-visible column alias in a table.
|
||||
message RenameColumnAliasRequest {
|
||||
string profile_name = 1;
|
||||
string table_name = 2;
|
||||
string old_column_name = 3;
|
||||
string new_column_name = 4;
|
||||
}
|
||||
|
||||
// Response after renaming one column alias.
|
||||
message RenameColumnAliasResponse {
|
||||
bool success = 1;
|
||||
string message = 2;
|
||||
}
|
||||
|
||||
// Request to delete one table definition entirely.
|
||||
message DeleteTableRequest {
|
||||
// Profile (schema) name owning the table (must exist).
|
||||
|
||||
@@ -4,40 +4,45 @@ package komp_ac.table_structure;
|
||||
|
||||
import "common.proto";
|
||||
|
||||
// Introspects the physical PostgreSQL table for a given logical table
|
||||
// (defined in table_definitions) and returns its column structure.
|
||||
// Introspects the physical PostgreSQL tables for one or more logical tables
|
||||
// (defined in table_definitions) and returns their column structures.
|
||||
// The server validates that:
|
||||
// - The profile (schema) exists in `schemas`
|
||||
// - The table is defined for that profile in `table_definitions`
|
||||
// It then queries information_schema for the physical table and returns
|
||||
// normalized column metadata. If the physical table is missing despite
|
||||
// a definition, the response may contain an empty `columns` list.
|
||||
// - Every table is defined for that profile in `table_definitions`
|
||||
// It then queries information_schema for the physical tables and returns
|
||||
// normalized column metadata.
|
||||
service TableStructureService {
|
||||
// Return the physical column list (name, normalized data_type,
|
||||
// nullability, primary key flag) for a table in a profile.
|
||||
// nullability, primary key flag) for one or more tables in a profile.
|
||||
//
|
||||
// Behavior:
|
||||
// - NOT_FOUND if profile doesn't exist in `schemas`
|
||||
// - NOT_FOUND if table not defined for that profile in `table_definitions`
|
||||
// - NOT_FOUND if any table is not defined for that profile in `table_definitions`
|
||||
// - Queries information_schema.columns ordered by ordinal position
|
||||
// - Normalizes data_type text (details under TableColumn.data_type)
|
||||
// - Returns an empty list if the table is validated but has no visible
|
||||
// columns in information_schema (e.g., physical table missing)
|
||||
rpc GetTableStructure(GetTableStructureRequest) returns (TableStructureResponse);
|
||||
// - Returns an error if any validated table has no visible columns in
|
||||
// information_schema (e.g., physical table missing)
|
||||
rpc GetTableStructure(GetTableStructureRequest) returns (GetTableStructureResponse);
|
||||
}
|
||||
|
||||
// Request identifying the profile (schema) and table to inspect.
|
||||
// Request identifying the profile (schema) and tables to inspect.
|
||||
message GetTableStructureRequest {
|
||||
// Required. Profile (PostgreSQL schema) name. Must exist in `schemas`.
|
||||
string profile_name = 1;
|
||||
|
||||
// Required. Table name within the profile. Must exist in `table_definitions`
|
||||
// for the given profile. The physical table is then introspected via
|
||||
// information_schema.
|
||||
string table_name = 2;
|
||||
// Required. Table names within the profile. Each must exist in
|
||||
// `table_definitions` for the given profile. The physical tables are then
|
||||
// introspected via information_schema.
|
||||
repeated string table_names = 2;
|
||||
}
|
||||
|
||||
// Response with the ordered list of columns (by ordinal position).
|
||||
// Batched response keyed by table name.
|
||||
message GetTableStructureResponse {
|
||||
// Per-table physical column lists keyed by requested table name.
|
||||
map<string, TableStructureResponse> table_structures = 1;
|
||||
}
|
||||
|
||||
// Response with the ordered list of columns (by ordinal position) for one table.
|
||||
message TableStructureResponse {
|
||||
// Columns of the physical table, including system columns (id, deleted,
|
||||
// created_at), user-defined columns, and any foreign-key columns such as
|
||||
|
||||
Binary file not shown.
@@ -1,10 +1,25 @@
|
||||
// This file is @generated by prost-build.
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct ColumnConstraint {
|
||||
#[prost(string, tag = "1")]
|
||||
pub column: ::prost::alloc::string::String,
|
||||
#[prost(string, tag = "2")]
|
||||
pub query: ::prost::alloc::string::String,
|
||||
#[prost(enumeration = "MatchMode", tag = "3")]
|
||||
pub mode: i32,
|
||||
}
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct SearchRequest {
|
||||
#[prost(string, tag = "1")]
|
||||
pub table_name: ::prost::alloc::string::String,
|
||||
#[prost(string, tag = "2")]
|
||||
pub query: ::prost::alloc::string::String,
|
||||
pub profile_name: ::prost::alloc::string::String,
|
||||
#[prost(string, optional, tag = "2")]
|
||||
pub table_name: ::core::option::Option<::prost::alloc::string::String>,
|
||||
#[prost(string, tag = "3")]
|
||||
pub free_query: ::prost::alloc::string::String,
|
||||
#[prost(message, repeated, tag = "4")]
|
||||
pub must: ::prost::alloc::vec::Vec<ColumnConstraint>,
|
||||
#[prost(uint32, optional, tag = "5")]
|
||||
pub limit: ::core::option::Option<u32>,
|
||||
}
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct SearchResponse {
|
||||
@@ -22,6 +37,37 @@ pub mod search_response {
|
||||
pub score: f32,
|
||||
#[prost(string, tag = "3")]
|
||||
pub content_json: ::prost::alloc::string::String,
|
||||
#[prost(string, tag = "4")]
|
||||
pub table_name: ::prost::alloc::string::String,
|
||||
}
|
||||
}
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
|
||||
#[repr(i32)]
|
||||
pub enum MatchMode {
|
||||
Unspecified = 0,
|
||||
Fuzzy = 1,
|
||||
Exact = 2,
|
||||
}
|
||||
impl MatchMode {
|
||||
/// String value of the enum field names used in the ProtoBuf definition.
|
||||
///
|
||||
/// The values are not transformed in any way and thus are considered stable
|
||||
/// (if the ProtoBuf definition does not change) and safe for programmatic use.
|
||||
pub fn as_str_name(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Unspecified => "MATCH_MODE_UNSPECIFIED",
|
||||
Self::Fuzzy => "MATCH_MODE_FUZZY",
|
||||
Self::Exact => "MATCH_MODE_EXACT",
|
||||
}
|
||||
}
|
||||
/// Creates an enum from field names used in the ProtoBuf definition.
|
||||
pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
|
||||
match value {
|
||||
"MATCH_MODE_UNSPECIFIED" => Some(Self::Unspecified),
|
||||
"MATCH_MODE_FUZZY" => Some(Self::Fuzzy),
|
||||
"MATCH_MODE_EXACT" => Some(Self::Exact),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Generated client implementations.
|
||||
@@ -115,7 +161,7 @@ pub mod searcher_client {
|
||||
self.inner = self.inner.max_encoding_message_size(limit);
|
||||
self
|
||||
}
|
||||
pub async fn search_table(
|
||||
pub async fn search(
|
||||
&mut self,
|
||||
request: impl tonic::IntoRequest<super::SearchRequest>,
|
||||
) -> std::result::Result<tonic::Response<super::SearchResponse>, tonic::Status> {
|
||||
@@ -129,11 +175,11 @@ pub mod searcher_client {
|
||||
})?;
|
||||
let codec = tonic::codec::ProstCodec::default();
|
||||
let path = http::uri::PathAndQuery::from_static(
|
||||
"/komp_ac.search.Searcher/SearchTable",
|
||||
"/komp_ac.search.Searcher/Search",
|
||||
);
|
||||
let mut req = request.into_request();
|
||||
req.extensions_mut()
|
||||
.insert(GrpcMethod::new("komp_ac.search.Searcher", "SearchTable"));
|
||||
.insert(GrpcMethod::new("komp_ac.search.Searcher", "Search"));
|
||||
self.inner.unary(req, path, codec).await
|
||||
}
|
||||
}
|
||||
@@ -151,7 +197,7 @@ pub mod searcher_server {
|
||||
/// Generated trait containing gRPC methods that should be implemented for use with SearcherServer.
|
||||
#[async_trait]
|
||||
pub trait Searcher: std::marker::Send + std::marker::Sync + 'static {
|
||||
async fn search_table(
|
||||
async fn search(
|
||||
&self,
|
||||
request: tonic::Request<super::SearchRequest>,
|
||||
) -> std::result::Result<tonic::Response<super::SearchResponse>, tonic::Status>;
|
||||
@@ -232,11 +278,11 @@ pub mod searcher_server {
|
||||
}
|
||||
fn call(&mut self, req: http::Request<B>) -> Self::Future {
|
||||
match req.uri().path() {
|
||||
"/komp_ac.search.Searcher/SearchTable" => {
|
||||
"/komp_ac.search.Searcher/Search" => {
|
||||
#[allow(non_camel_case_types)]
|
||||
struct SearchTableSvc<T: Searcher>(pub Arc<T>);
|
||||
struct SearchSvc<T: Searcher>(pub Arc<T>);
|
||||
impl<T: Searcher> tonic::server::UnaryService<super::SearchRequest>
|
||||
for SearchTableSvc<T> {
|
||||
for SearchSvc<T> {
|
||||
type Response = super::SearchResponse;
|
||||
type Future = BoxFuture<
|
||||
tonic::Response<Self::Response>,
|
||||
@@ -248,7 +294,7 @@ pub mod searcher_server {
|
||||
) -> Self::Future {
|
||||
let inner = Arc::clone(&self.0);
|
||||
let fut = async move {
|
||||
<T as Searcher>::search_table(&inner, request).await
|
||||
<T as Searcher>::search(&inner, request).await
|
||||
};
|
||||
Box::pin(fut)
|
||||
}
|
||||
@@ -259,7 +305,7 @@ pub mod searcher_server {
|
||||
let max_encoding_message_size = self.max_encoding_message_size;
|
||||
let inner = self.inner.clone();
|
||||
let fut = async move {
|
||||
let method = SearchTableSvc(inner);
|
||||
let method = SearchSvc(inner);
|
||||
let codec = tonic::codec::ProstCodec::default();
|
||||
let mut grpc = tonic::server::Grpc::new(codec)
|
||||
.apply_compression_config(
|
||||
|
||||
@@ -110,6 +110,107 @@ pub mod profile_tree_response {
|
||||
pub tables: ::prost::alloc::vec::Vec<Table>,
|
||||
}
|
||||
}
|
||||
/// Request to fetch all tables, columns and scripts for a profile.
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct GetProfileDetailsRequest {
|
||||
/// Profile (schema) name to fetch details for.
|
||||
#[prost(string, tag = "1")]
|
||||
pub profile_name: ::prost::alloc::string::String,
|
||||
}
|
||||
/// Response with all tables, columns and scripts for a profile.
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct GetProfileDetailsResponse {
|
||||
#[prost(string, tag = "1")]
|
||||
pub profile_name: ::prost::alloc::string::String,
|
||||
#[prost(message, repeated, tag = "2")]
|
||||
pub tables: ::prost::alloc::vec::Vec<TableDetail>,
|
||||
}
|
||||
/// Request to fetch recorded column alias rename history for one profile.
|
||||
#[derive(serde::Serialize, serde::Deserialize)]
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct GetColumnAliasRenameHistoryRequest {
|
||||
#[prost(string, tag = "1")]
|
||||
pub profile_name: ::prost::alloc::string::String,
|
||||
/// Optional filter. When omitted, returns all tables in the profile.
|
||||
#[prost(int64, optional, tag = "2")]
|
||||
pub table_definition_id: ::core::option::Option<i64>,
|
||||
}
|
||||
/// One recorded column alias rename.
|
||||
#[derive(serde::Serialize, serde::Deserialize)]
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct ColumnAliasRenameHistoryEntry {
|
||||
#[prost(int64, tag = "1")]
|
||||
pub id: i64,
|
||||
#[prost(string, tag = "2")]
|
||||
pub profile_name: ::prost::alloc::string::String,
|
||||
#[prost(int64, tag = "3")]
|
||||
pub table_definition_id: i64,
|
||||
#[prost(string, tag = "4")]
|
||||
pub table_name: ::prost::alloc::string::String,
|
||||
#[prost(string, tag = "5")]
|
||||
pub old_column_name: ::prost::alloc::string::String,
|
||||
#[prost(string, tag = "6")]
|
||||
pub new_column_name: ::prost::alloc::string::String,
|
||||
#[prost(string, tag = "7")]
|
||||
pub created_at: ::prost::alloc::string::String,
|
||||
}
|
||||
/// Response with stored column alias rename history rows.
|
||||
#[derive(serde::Serialize, serde::Deserialize)]
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct GetColumnAliasRenameHistoryResponse {
|
||||
#[prost(string, tag = "1")]
|
||||
pub profile_name: ::prost::alloc::string::String,
|
||||
#[prost(message, repeated, tag = "2")]
|
||||
pub entries: ::prost::alloc::vec::Vec<ColumnAliasRenameHistoryEntry>,
|
||||
}
|
||||
/// Describes a table with its columns and associated scripts.
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct TableDetail {
|
||||
#[prost(string, tag = "1")]
|
||||
pub name: ::prost::alloc::string::String,
|
||||
#[prost(int64, tag = "2")]
|
||||
pub id: i64,
|
||||
#[prost(message, repeated, tag = "3")]
|
||||
pub columns: ::prost::alloc::vec::Vec<ColumnDefinition>,
|
||||
#[prost(message, repeated, tag = "4")]
|
||||
pub scripts: ::prost::alloc::vec::Vec<ScriptInfo>,
|
||||
}
|
||||
/// A script that targets a specific column in a table.
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct ScriptInfo {
|
||||
#[prost(int64, tag = "1")]
|
||||
pub script_id: i64,
|
||||
#[prost(string, tag = "2")]
|
||||
pub target_column: ::prost::alloc::string::String,
|
||||
#[prost(string, tag = "3")]
|
||||
pub target_column_type: ::prost::alloc::string::String,
|
||||
#[prost(string, tag = "4")]
|
||||
pub script: ::prost::alloc::string::String,
|
||||
#[prost(string, tag = "5")]
|
||||
pub description: ::prost::alloc::string::String,
|
||||
}
|
||||
/// Request to rename one user-visible column alias in a table.
|
||||
#[derive(serde::Serialize, serde::Deserialize)]
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct RenameColumnAliasRequest {
|
||||
#[prost(string, tag = "1")]
|
||||
pub profile_name: ::prost::alloc::string::String,
|
||||
#[prost(string, tag = "2")]
|
||||
pub table_name: ::prost::alloc::string::String,
|
||||
#[prost(string, tag = "3")]
|
||||
pub old_column_name: ::prost::alloc::string::String,
|
||||
#[prost(string, tag = "4")]
|
||||
pub new_column_name: ::prost::alloc::string::String,
|
||||
}
|
||||
/// Response after renaming one column alias.
|
||||
#[derive(serde::Serialize, serde::Deserialize)]
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct RenameColumnAliasResponse {
|
||||
#[prost(bool, tag = "1")]
|
||||
pub success: bool,
|
||||
#[prost(string, tag = "2")]
|
||||
pub message: ::prost::alloc::string::String,
|
||||
}
|
||||
/// Request to delete one table definition entirely.
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct DeleteTableRequest {
|
||||
@@ -289,6 +390,97 @@ pub mod table_definition_client {
|
||||
);
|
||||
self.inner.unary(req, path, codec).await
|
||||
}
|
||||
/// Fetches all tables with their columns and scripts for a specific profile.
|
||||
/// Pure data retrieval - no business logic.
|
||||
pub async fn get_profile_details(
|
||||
&mut self,
|
||||
request: impl tonic::IntoRequest<super::GetProfileDetailsRequest>,
|
||||
) -> std::result::Result<
|
||||
tonic::Response<super::GetProfileDetailsResponse>,
|
||||
tonic::Status,
|
||||
> {
|
||||
self.inner
|
||||
.ready()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tonic::Status::unknown(
|
||||
format!("Service was not ready: {}", e.into()),
|
||||
)
|
||||
})?;
|
||||
let codec = tonic::codec::ProstCodec::default();
|
||||
let path = http::uri::PathAndQuery::from_static(
|
||||
"/komp_ac.table_definition.TableDefinition/GetProfileDetails",
|
||||
);
|
||||
let mut req = request.into_request();
|
||||
req.extensions_mut()
|
||||
.insert(
|
||||
GrpcMethod::new(
|
||||
"komp_ac.table_definition.TableDefinition",
|
||||
"GetProfileDetails",
|
||||
),
|
||||
);
|
||||
self.inner.unary(req, path, codec).await
|
||||
}
|
||||
/// Returns the stored rename history for column aliases in one profile.
|
||||
pub async fn get_column_alias_rename_history(
|
||||
&mut self,
|
||||
request: impl tonic::IntoRequest<super::GetColumnAliasRenameHistoryRequest>,
|
||||
) -> std::result::Result<
|
||||
tonic::Response<super::GetColumnAliasRenameHistoryResponse>,
|
||||
tonic::Status,
|
||||
> {
|
||||
self.inner
|
||||
.ready()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tonic::Status::unknown(
|
||||
format!("Service was not ready: {}", e.into()),
|
||||
)
|
||||
})?;
|
||||
let codec = tonic::codec::ProstCodec::default();
|
||||
let path = http::uri::PathAndQuery::from_static(
|
||||
"/komp_ac.table_definition.TableDefinition/GetColumnAliasRenameHistory",
|
||||
);
|
||||
let mut req = request.into_request();
|
||||
req.extensions_mut()
|
||||
.insert(
|
||||
GrpcMethod::new(
|
||||
"komp_ac.table_definition.TableDefinition",
|
||||
"GetColumnAliasRenameHistory",
|
||||
),
|
||||
);
|
||||
self.inner.unary(req, path, codec).await
|
||||
}
|
||||
/// Renames a user-visible column alias while keeping the physical column unchanged.
|
||||
pub async fn rename_column_alias(
|
||||
&mut self,
|
||||
request: impl tonic::IntoRequest<super::RenameColumnAliasRequest>,
|
||||
) -> std::result::Result<
|
||||
tonic::Response<super::RenameColumnAliasResponse>,
|
||||
tonic::Status,
|
||||
> {
|
||||
self.inner
|
||||
.ready()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tonic::Status::unknown(
|
||||
format!("Service was not ready: {}", e.into()),
|
||||
)
|
||||
})?;
|
||||
let codec = tonic::codec::ProstCodec::default();
|
||||
let path = http::uri::PathAndQuery::from_static(
|
||||
"/komp_ac.table_definition.TableDefinition/RenameColumnAlias",
|
||||
);
|
||||
let mut req = request.into_request();
|
||||
req.extensions_mut()
|
||||
.insert(
|
||||
GrpcMethod::new(
|
||||
"komp_ac.table_definition.TableDefinition",
|
||||
"RenameColumnAlias",
|
||||
),
|
||||
);
|
||||
self.inner.unary(req, path, codec).await
|
||||
}
|
||||
/// Drops a table and its metadata, then deletes the profile if it becomes empty.
|
||||
pub async fn delete_table(
|
||||
&mut self,
|
||||
@@ -353,6 +545,31 @@ pub mod table_definition_server {
|
||||
tonic::Response<super::ProfileTreeResponse>,
|
||||
tonic::Status,
|
||||
>;
|
||||
/// Fetches all tables with their columns and scripts for a specific profile.
|
||||
/// Pure data retrieval - no business logic.
|
||||
async fn get_profile_details(
|
||||
&self,
|
||||
request: tonic::Request<super::GetProfileDetailsRequest>,
|
||||
) -> std::result::Result<
|
||||
tonic::Response<super::GetProfileDetailsResponse>,
|
||||
tonic::Status,
|
||||
>;
|
||||
/// Returns the stored rename history for column aliases in one profile.
|
||||
async fn get_column_alias_rename_history(
|
||||
&self,
|
||||
request: tonic::Request<super::GetColumnAliasRenameHistoryRequest>,
|
||||
) -> std::result::Result<
|
||||
tonic::Response<super::GetColumnAliasRenameHistoryResponse>,
|
||||
tonic::Status,
|
||||
>;
|
||||
/// Renames a user-visible column alias while keeping the physical column unchanged.
|
||||
async fn rename_column_alias(
|
||||
&self,
|
||||
request: tonic::Request<super::RenameColumnAliasRequest>,
|
||||
) -> std::result::Result<
|
||||
tonic::Response<super::RenameColumnAliasResponse>,
|
||||
tonic::Status,
|
||||
>;
|
||||
/// Drops a table and its metadata, then deletes the profile if it becomes empty.
|
||||
async fn delete_table(
|
||||
&self,
|
||||
@@ -537,6 +754,152 @@ pub mod table_definition_server {
|
||||
};
|
||||
Box::pin(fut)
|
||||
}
|
||||
"/komp_ac.table_definition.TableDefinition/GetProfileDetails" => {
|
||||
#[allow(non_camel_case_types)]
|
||||
struct GetProfileDetailsSvc<T: TableDefinition>(pub Arc<T>);
|
||||
impl<
|
||||
T: TableDefinition,
|
||||
> tonic::server::UnaryService<super::GetProfileDetailsRequest>
|
||||
for GetProfileDetailsSvc<T> {
|
||||
type Response = super::GetProfileDetailsResponse;
|
||||
type Future = BoxFuture<
|
||||
tonic::Response<Self::Response>,
|
||||
tonic::Status,
|
||||
>;
|
||||
fn call(
|
||||
&mut self,
|
||||
request: tonic::Request<super::GetProfileDetailsRequest>,
|
||||
) -> Self::Future {
|
||||
let inner = Arc::clone(&self.0);
|
||||
let fut = async move {
|
||||
<T as TableDefinition>::get_profile_details(&inner, request)
|
||||
.await
|
||||
};
|
||||
Box::pin(fut)
|
||||
}
|
||||
}
|
||||
let accept_compression_encodings = self.accept_compression_encodings;
|
||||
let send_compression_encodings = self.send_compression_encodings;
|
||||
let max_decoding_message_size = self.max_decoding_message_size;
|
||||
let max_encoding_message_size = self.max_encoding_message_size;
|
||||
let inner = self.inner.clone();
|
||||
let fut = async move {
|
||||
let method = GetProfileDetailsSvc(inner);
|
||||
let codec = tonic::codec::ProstCodec::default();
|
||||
let mut grpc = tonic::server::Grpc::new(codec)
|
||||
.apply_compression_config(
|
||||
accept_compression_encodings,
|
||||
send_compression_encodings,
|
||||
)
|
||||
.apply_max_message_size_config(
|
||||
max_decoding_message_size,
|
||||
max_encoding_message_size,
|
||||
);
|
||||
let res = grpc.unary(method, req).await;
|
||||
Ok(res)
|
||||
};
|
||||
Box::pin(fut)
|
||||
}
|
||||
"/komp_ac.table_definition.TableDefinition/GetColumnAliasRenameHistory" => {
|
||||
#[allow(non_camel_case_types)]
|
||||
struct GetColumnAliasRenameHistorySvc<T: TableDefinition>(
|
||||
pub Arc<T>,
|
||||
);
|
||||
impl<
|
||||
T: TableDefinition,
|
||||
> tonic::server::UnaryService<
|
||||
super::GetColumnAliasRenameHistoryRequest,
|
||||
> for GetColumnAliasRenameHistorySvc<T> {
|
||||
type Response = super::GetColumnAliasRenameHistoryResponse;
|
||||
type Future = BoxFuture<
|
||||
tonic::Response<Self::Response>,
|
||||
tonic::Status,
|
||||
>;
|
||||
fn call(
|
||||
&mut self,
|
||||
request: tonic::Request<
|
||||
super::GetColumnAliasRenameHistoryRequest,
|
||||
>,
|
||||
) -> Self::Future {
|
||||
let inner = Arc::clone(&self.0);
|
||||
let fut = async move {
|
||||
<T as TableDefinition>::get_column_alias_rename_history(
|
||||
&inner,
|
||||
request,
|
||||
)
|
||||
.await
|
||||
};
|
||||
Box::pin(fut)
|
||||
}
|
||||
}
|
||||
let accept_compression_encodings = self.accept_compression_encodings;
|
||||
let send_compression_encodings = self.send_compression_encodings;
|
||||
let max_decoding_message_size = self.max_decoding_message_size;
|
||||
let max_encoding_message_size = self.max_encoding_message_size;
|
||||
let inner = self.inner.clone();
|
||||
let fut = async move {
|
||||
let method = GetColumnAliasRenameHistorySvc(inner);
|
||||
let codec = tonic::codec::ProstCodec::default();
|
||||
let mut grpc = tonic::server::Grpc::new(codec)
|
||||
.apply_compression_config(
|
||||
accept_compression_encodings,
|
||||
send_compression_encodings,
|
||||
)
|
||||
.apply_max_message_size_config(
|
||||
max_decoding_message_size,
|
||||
max_encoding_message_size,
|
||||
);
|
||||
let res = grpc.unary(method, req).await;
|
||||
Ok(res)
|
||||
};
|
||||
Box::pin(fut)
|
||||
}
|
||||
"/komp_ac.table_definition.TableDefinition/RenameColumnAlias" => {
|
||||
#[allow(non_camel_case_types)]
|
||||
struct RenameColumnAliasSvc<T: TableDefinition>(pub Arc<T>);
|
||||
impl<
|
||||
T: TableDefinition,
|
||||
> tonic::server::UnaryService<super::RenameColumnAliasRequest>
|
||||
for RenameColumnAliasSvc<T> {
|
||||
type Response = super::RenameColumnAliasResponse;
|
||||
type Future = BoxFuture<
|
||||
tonic::Response<Self::Response>,
|
||||
tonic::Status,
|
||||
>;
|
||||
fn call(
|
||||
&mut self,
|
||||
request: tonic::Request<super::RenameColumnAliasRequest>,
|
||||
) -> Self::Future {
|
||||
let inner = Arc::clone(&self.0);
|
||||
let fut = async move {
|
||||
<T as TableDefinition>::rename_column_alias(&inner, request)
|
||||
.await
|
||||
};
|
||||
Box::pin(fut)
|
||||
}
|
||||
}
|
||||
let accept_compression_encodings = self.accept_compression_encodings;
|
||||
let send_compression_encodings = self.send_compression_encodings;
|
||||
let max_decoding_message_size = self.max_decoding_message_size;
|
||||
let max_encoding_message_size = self.max_encoding_message_size;
|
||||
let inner = self.inner.clone();
|
||||
let fut = async move {
|
||||
let method = RenameColumnAliasSvc(inner);
|
||||
let codec = tonic::codec::ProstCodec::default();
|
||||
let mut grpc = tonic::server::Grpc::new(codec)
|
||||
.apply_compression_config(
|
||||
accept_compression_encodings,
|
||||
send_compression_encodings,
|
||||
)
|
||||
.apply_max_message_size_config(
|
||||
max_decoding_message_size,
|
||||
max_encoding_message_size,
|
||||
);
|
||||
let res = grpc.unary(method, req).await;
|
||||
Ok(res)
|
||||
};
|
||||
Box::pin(fut)
|
||||
}
|
||||
"/komp_ac.table_definition.TableDefinition/DeleteTable" => {
|
||||
#[allow(non_camel_case_types)]
|
||||
struct DeleteTableSvc<T: TableDefinition>(pub Arc<T>);
|
||||
|
||||
@@ -1,17 +1,27 @@
|
||||
// This file is @generated by prost-build.
|
||||
/// Request identifying the profile (schema) and table to inspect.
|
||||
/// Request identifying the profile (schema) and tables to inspect.
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct GetTableStructureRequest {
|
||||
/// Required. Profile (PostgreSQL schema) name. Must exist in `schemas`.
|
||||
#[prost(string, tag = "1")]
|
||||
pub profile_name: ::prost::alloc::string::String,
|
||||
/// Required. Table name within the profile. Must exist in `table_definitions`
|
||||
/// for the given profile. The physical table is then introspected via
|
||||
/// information_schema.
|
||||
#[prost(string, tag = "2")]
|
||||
pub table_name: ::prost::alloc::string::String,
|
||||
/// Required. Table names within the profile. Each must exist in
|
||||
/// `table_definitions` for the given profile. The physical tables are then
|
||||
/// introspected via information_schema.
|
||||
#[prost(string, repeated, tag = "2")]
|
||||
pub table_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
|
||||
}
|
||||
/// Response with the ordered list of columns (by ordinal position).
|
||||
/// Batched response keyed by table name.
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct GetTableStructureResponse {
|
||||
/// Per-table physical column lists keyed by requested table name.
|
||||
#[prost(map = "string, message", tag = "1")]
|
||||
pub table_structures: ::std::collections::HashMap<
|
||||
::prost::alloc::string::String,
|
||||
TableStructureResponse,
|
||||
>,
|
||||
}
|
||||
/// Response with the ordered list of columns (by ordinal position) for one table.
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct TableStructureResponse {
|
||||
/// Columns of the physical table, including system columns (id, deleted,
|
||||
@@ -55,14 +65,13 @@ pub mod table_structure_service_client {
|
||||
)]
|
||||
use tonic::codegen::*;
|
||||
use tonic::codegen::http::Uri;
|
||||
/// Introspects the physical PostgreSQL table for a given logical table
|
||||
/// (defined in table_definitions) and returns its column structure.
|
||||
/// Introspects the physical PostgreSQL tables for one or more logical tables
|
||||
/// (defined in table_definitions) and returns their column structures.
|
||||
/// The server validates that:
|
||||
/// - The profile (schema) exists in `schemas`
|
||||
/// - The table is defined for that profile in `table_definitions`
|
||||
/// It then queries information_schema for the physical table and returns
|
||||
/// normalized column metadata. If the physical table is missing despite
|
||||
/// a definition, the response may contain an empty `columns` list.
|
||||
/// - Every table is defined for that profile in `table_definitions`
|
||||
/// It then queries information_schema for the physical tables and returns
|
||||
/// normalized column metadata.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TableStructureServiceClient<T> {
|
||||
inner: tonic::client::Grpc<T>,
|
||||
@@ -144,20 +153,20 @@ pub mod table_structure_service_client {
|
||||
self
|
||||
}
|
||||
/// Return the physical column list (name, normalized data_type,
|
||||
/// nullability, primary key flag) for a table in a profile.
|
||||
/// nullability, primary key flag) for one or more tables in a profile.
|
||||
///
|
||||
/// Behavior:
|
||||
/// - NOT_FOUND if profile doesn't exist in `schemas`
|
||||
/// - NOT_FOUND if table not defined for that profile in `table_definitions`
|
||||
/// - NOT_FOUND if any table is not defined for that profile in `table_definitions`
|
||||
/// - Queries information_schema.columns ordered by ordinal position
|
||||
/// - Normalizes data_type text (details under TableColumn.data_type)
|
||||
/// - Returns an empty list if the table is validated but has no visible
|
||||
/// columns in information_schema (e.g., physical table missing)
|
||||
/// - Returns an error if any validated table has no visible columns in
|
||||
/// information_schema (e.g., physical table missing)
|
||||
pub async fn get_table_structure(
|
||||
&mut self,
|
||||
request: impl tonic::IntoRequest<super::GetTableStructureRequest>,
|
||||
) -> std::result::Result<
|
||||
tonic::Response<super::TableStructureResponse>,
|
||||
tonic::Response<super::GetTableStructureResponse>,
|
||||
tonic::Status,
|
||||
> {
|
||||
self.inner
|
||||
@@ -198,31 +207,30 @@ pub mod table_structure_service_server {
|
||||
#[async_trait]
|
||||
pub trait TableStructureService: std::marker::Send + std::marker::Sync + 'static {
|
||||
/// Return the physical column list (name, normalized data_type,
|
||||
/// nullability, primary key flag) for a table in a profile.
|
||||
/// nullability, primary key flag) for one or more tables in a profile.
|
||||
///
|
||||
/// Behavior:
|
||||
/// - NOT_FOUND if profile doesn't exist in `schemas`
|
||||
/// - NOT_FOUND if table not defined for that profile in `table_definitions`
|
||||
/// - NOT_FOUND if any table is not defined for that profile in `table_definitions`
|
||||
/// - Queries information_schema.columns ordered by ordinal position
|
||||
/// - Normalizes data_type text (details under TableColumn.data_type)
|
||||
/// - Returns an empty list if the table is validated but has no visible
|
||||
/// columns in information_schema (e.g., physical table missing)
|
||||
/// - Returns an error if any validated table has no visible columns in
|
||||
/// information_schema (e.g., physical table missing)
|
||||
async fn get_table_structure(
|
||||
&self,
|
||||
request: tonic::Request<super::GetTableStructureRequest>,
|
||||
) -> std::result::Result<
|
||||
tonic::Response<super::TableStructureResponse>,
|
||||
tonic::Response<super::GetTableStructureResponse>,
|
||||
tonic::Status,
|
||||
>;
|
||||
}
|
||||
/// Introspects the physical PostgreSQL table for a given logical table
|
||||
/// (defined in table_definitions) and returns its column structure.
|
||||
/// Introspects the physical PostgreSQL tables for one or more logical tables
|
||||
/// (defined in table_definitions) and returns their column structures.
|
||||
/// The server validates that:
|
||||
/// - The profile (schema) exists in `schemas`
|
||||
/// - The table is defined for that profile in `table_definitions`
|
||||
/// It then queries information_schema for the physical table and returns
|
||||
/// normalized column metadata. If the physical table is missing despite
|
||||
/// a definition, the response may contain an empty `columns` list.
|
||||
/// - Every table is defined for that profile in `table_definitions`
|
||||
/// It then queries information_schema for the physical tables and returns
|
||||
/// normalized column metadata.
|
||||
#[derive(Debug)]
|
||||
pub struct TableStructureServiceServer<T> {
|
||||
inner: Arc<T>,
|
||||
@@ -307,7 +315,7 @@ pub mod table_structure_service_server {
|
||||
T: TableStructureService,
|
||||
> tonic::server::UnaryService<super::GetTableStructureRequest>
|
||||
for GetTableStructureSvc<T> {
|
||||
type Response = super::TableStructureResponse;
|
||||
type Response = super::GetTableStructureResponse;
|
||||
type Future = BoxFuture<
|
||||
tonic::Response<Self::Response>,
|
||||
tonic::Status,
|
||||
|
||||
@@ -1,75 +1,182 @@
|
||||
// common/src/search.rs
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use tantivy::schema::*;
|
||||
use tantivy::tokenizer::*;
|
||||
use tantivy::schema::{
|
||||
Field, IndexRecordOption, JsonObjectOptions, Schema, TextFieldIndexing, Term, INDEXED,
|
||||
STORED, STRING,
|
||||
};
|
||||
use tantivy::tokenizer::{
|
||||
AsciiFoldingFilter, LowerCaser, NgramTokenizer, RawTokenizer, RemoveLongFilter,
|
||||
SimpleTokenizer, TextAnalyzer, TokenStream,
|
||||
};
|
||||
use tantivy::Index;
|
||||
|
||||
/// Creates a hybrid Slovak search schema with optimized prefix fields.
|
||||
pub const F_PG_ID: &str = "pg_id";
|
||||
pub const F_TABLE_NAME: &str = "table_name";
|
||||
pub const F_ROW_KEY: &str = "row_key";
|
||||
pub const F_DATA_WORD: &str = "data_word";
|
||||
pub const F_DATA_NGRAM: &str = "data_ngram";
|
||||
pub const F_DATA_EXACT: &str = "data_exact";
|
||||
|
||||
pub const TOK_WORD: &str = "kw_word";
|
||||
pub const TOK_NGRAM: &str = "kw_ngram";
|
||||
pub const TOK_EXACT: &str = "kw_exact";
|
||||
|
||||
/// Returns the on-disk path for a profile search index.
|
||||
pub fn search_index_path(root: &Path, profile_name: &str) -> PathBuf {
|
||||
root.join(profile_name)
|
||||
}
|
||||
|
||||
/// Returns the unique index key for one table row inside a profile index.
|
||||
pub fn search_row_key(table_name: &str, row_id: i64) -> String {
|
||||
format!("{}:{}", table_name, row_id)
|
||||
}
|
||||
|
||||
/// Normalizes user-entered values for exact-mode terms.
|
||||
pub fn normalize_exact(input: &str) -> String {
|
||||
let trimmed = input.trim();
|
||||
if trimmed.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
let mut analyzer = exact_analyzer();
|
||||
let mut stream = analyzer.token_stream(trimmed);
|
||||
let mut out = String::with_capacity(trimmed.len());
|
||||
while let Some(token) = stream.next() {
|
||||
out.push_str(&token.text);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Normalizes a column name to the JSON-key form used at index time.
|
||||
pub fn normalize_column_name(column: &str) -> String {
|
||||
column.to_ascii_lowercase()
|
||||
}
|
||||
|
||||
/// Creates the column-aware search schema.
|
||||
pub fn create_search_schema() -> Schema {
|
||||
let mut schema_builder = Schema::builder();
|
||||
|
||||
schema_builder.add_u64_field("pg_id", INDEXED | STORED);
|
||||
schema_builder.add_u64_field(F_PG_ID, INDEXED | STORED);
|
||||
schema_builder.add_text_field(F_TABLE_NAME, STRING | STORED);
|
||||
schema_builder.add_text_field(F_ROW_KEY, STRING | STORED);
|
||||
|
||||
// FIELD 1: For prefixes (1-4 chars).
|
||||
let short_prefix_indexing = TextFieldIndexing::default()
|
||||
.set_tokenizer("slovak_prefix_edge")
|
||||
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
|
||||
let short_prefix_options = TextOptions::default()
|
||||
.set_indexing_options(short_prefix_indexing)
|
||||
.set_stored();
|
||||
schema_builder.add_text_field("prefix_edge", short_prefix_options);
|
||||
|
||||
// FIELD 2: For the full word.
|
||||
let full_word_indexing = TextFieldIndexing::default()
|
||||
.set_tokenizer("slovak_prefix_full")
|
||||
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
|
||||
let full_word_options = TextOptions::default()
|
||||
.set_indexing_options(full_word_indexing)
|
||||
.set_stored();
|
||||
schema_builder.add_text_field("prefix_full", full_word_options);
|
||||
|
||||
// NGRAM FIELD: For substring matching.
|
||||
let ngram_field_indexing = TextFieldIndexing::default()
|
||||
.set_tokenizer("slovak_ngram")
|
||||
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
|
||||
let ngram_options = TextOptions::default()
|
||||
.set_indexing_options(ngram_field_indexing)
|
||||
.set_stored();
|
||||
schema_builder.add_text_field("text_ngram", ngram_options);
|
||||
schema_builder.add_json_field(F_DATA_WORD, json_options(TOK_WORD, true, false));
|
||||
schema_builder.add_json_field(F_DATA_NGRAM, json_options(TOK_NGRAM, true, false));
|
||||
schema_builder.add_json_field(F_DATA_EXACT, json_options(TOK_EXACT, false, false));
|
||||
|
||||
schema_builder.build()
|
||||
}
|
||||
|
||||
/// Registers all necessary Slovak tokenizers with the index.
|
||||
///
|
||||
/// This must be called by ANY process that opens the index
|
||||
/// to ensure the tokenizers are loaded into memory.
|
||||
pub fn register_slovak_tokenizers(index: &Index) -> tantivy::Result<()> {
|
||||
fn json_options(
|
||||
tokenizer_name: &str,
|
||||
with_positions: bool,
|
||||
stored: bool,
|
||||
) -> JsonObjectOptions {
|
||||
let index_option = if with_positions {
|
||||
IndexRecordOption::WithFreqsAndPositions
|
||||
} else {
|
||||
IndexRecordOption::Basic
|
||||
};
|
||||
|
||||
let indexing = TextFieldIndexing::default()
|
||||
.set_tokenizer(tokenizer_name)
|
||||
.set_index_option(index_option);
|
||||
|
||||
let mut options = JsonObjectOptions::default().set_indexing_options(indexing);
|
||||
if stored {
|
||||
options = options.set_stored();
|
||||
}
|
||||
options
|
||||
}
|
||||
|
||||
/// Registers all required tokenizers with the index.
|
||||
pub fn register_tokenizers(index: &Index) -> tantivy::Result<()> {
|
||||
let tokenizer_manager = index.tokenizers();
|
||||
|
||||
// TOKENIZER for `prefix_edge`: Edge N-gram (1-4 chars)
|
||||
let edge_tokenizer = TextAnalyzer::builder(NgramTokenizer::new(1, 4, true)?)
|
||||
.filter(RemoveLongFilter::limit(40))
|
||||
.filter(LowerCaser)
|
||||
.filter(AsciiFoldingFilter)
|
||||
.build();
|
||||
tokenizer_manager.register("slovak_prefix_edge", edge_tokenizer);
|
||||
|
||||
// TOKENIZER for `prefix_full`: Simple word tokenizer
|
||||
let full_tokenizer = TextAnalyzer::builder(SimpleTokenizer::default())
|
||||
.filter(RemoveLongFilter::limit(40))
|
||||
.filter(LowerCaser)
|
||||
.filter(AsciiFoldingFilter)
|
||||
.build();
|
||||
tokenizer_manager.register("slovak_prefix_full", full_tokenizer);
|
||||
|
||||
// NGRAM TOKENIZER: For substring matching.
|
||||
let ngram_tokenizer = TextAnalyzer::builder(NgramTokenizer::new(3, 3, false)?)
|
||||
.filter(RemoveLongFilter::limit(40))
|
||||
.filter(LowerCaser)
|
||||
.filter(AsciiFoldingFilter)
|
||||
.build();
|
||||
tokenizer_manager.register("slovak_ngram", ngram_tokenizer);
|
||||
tokenizer_manager.register(TOK_WORD, word_analyzer());
|
||||
tokenizer_manager.register(TOK_NGRAM, ngram_analyzer()?);
|
||||
tokenizer_manager.register(TOK_EXACT, exact_analyzer());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn word_analyzer() -> TextAnalyzer {
|
||||
TextAnalyzer::builder(SimpleTokenizer::default())
|
||||
.filter(RemoveLongFilter::limit(80))
|
||||
.filter(LowerCaser)
|
||||
.filter(AsciiFoldingFilter)
|
||||
.build()
|
||||
}
|
||||
|
||||
fn ngram_analyzer() -> tantivy::Result<TextAnalyzer> {
|
||||
Ok(TextAnalyzer::builder(NgramTokenizer::new(3, 3, false)?)
|
||||
.filter(RemoveLongFilter::limit(80))
|
||||
.filter(LowerCaser)
|
||||
.filter(AsciiFoldingFilter)
|
||||
.build())
|
||||
}
|
||||
|
||||
fn exact_analyzer() -> TextAnalyzer {
|
||||
TextAnalyzer::builder(RawTokenizer::default())
|
||||
.filter(LowerCaser)
|
||||
.filter(AsciiFoldingFilter)
|
||||
.build()
|
||||
}
|
||||
|
||||
/// Tokenizes text the same way `data_word` is indexed.
|
||||
pub fn tokenize_word(text: &str) -> Vec<String> {
|
||||
tokenize_with(word_analyzer(), text)
|
||||
}
|
||||
|
||||
/// Tokenizes text the same way `data_ngram` is indexed.
|
||||
pub fn tokenize_ngram(text: &str) -> Vec<String> {
|
||||
match ngram_analyzer() {
|
||||
Ok(analyzer) => tokenize_with(analyzer, text),
|
||||
Err(_) => Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn tokenize_with(mut analyzer: TextAnalyzer, text: &str) -> Vec<String> {
|
||||
let mut stream = analyzer.token_stream(text);
|
||||
let mut out = Vec::new();
|
||||
while let Some(token) = stream.next() {
|
||||
out.push(token.text.clone());
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Builds a term scoped to a specific JSON path within a JSON field.
|
||||
pub fn json_path_term(field: Field, column: &str, text: &str) -> Term {
|
||||
let mut term = Term::from_field_json_path(field, column, false);
|
||||
term.append_type_and_str(text);
|
||||
term
|
||||
}
|
||||
|
||||
/// Returns all required schema fields or fails loudly on mismatch.
|
||||
pub struct SchemaFields {
|
||||
pub pg_id: Field,
|
||||
pub table_name: Field,
|
||||
pub row_key: Field,
|
||||
pub data_word: Field,
|
||||
pub data_ngram: Field,
|
||||
pub data_exact: Field,
|
||||
}
|
||||
|
||||
impl SchemaFields {
|
||||
pub fn from(schema: &Schema) -> tantivy::Result<Self> {
|
||||
Ok(Self {
|
||||
pg_id: get_field(schema, F_PG_ID)?,
|
||||
table_name: get_field(schema, F_TABLE_NAME)?,
|
||||
row_key: get_field(schema, F_ROW_KEY)?,
|
||||
data_word: get_field(schema, F_DATA_WORD)?,
|
||||
data_ngram: get_field(schema, F_DATA_NGRAM)?,
|
||||
data_exact: get_field(schema, F_DATA_EXACT)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn get_field(schema: &Schema, name: &str) -> tantivy::Result<Field> {
|
||||
schema.get_field(name).map_err(|e| {
|
||||
tantivy::TantivyError::SchemaError(format!("schema is missing field '{name}': {e}"))
|
||||
})
|
||||
}
|
||||
|
||||
1
search/.gitignore
vendored
Normal file
1
search/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
.codex
|
||||
@@ -1,279 +1,426 @@
|
||||
// src/lib.rs
|
||||
mod query_builder;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use tantivy::collector::TopDocs;
|
||||
use tantivy::query::{
|
||||
BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, Query, QueryParser, TermQuery,
|
||||
};
|
||||
use tantivy::schema::{IndexRecordOption, Value};
|
||||
use tantivy::{Index, TantivyDocument, Term};
|
||||
use tonic::{Request, Response, Status};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use common::proto::komp_ac::search::searcher_server::Searcher;
|
||||
pub use common::proto::komp_ac::search::searcher_server::SearcherServer;
|
||||
use common::proto::komp_ac::search::{search_response::Hit, SearchRequest, SearchResponse};
|
||||
use common::search::register_slovak_tokenizers;
|
||||
pub use common::proto::komp_ac::search::searcher_server::SearcherServer;
|
||||
use common::search::{register_tokenizers, search_index_path, SchemaFields};
|
||||
use query_builder::{build_master_query, ConstraintMode, SearchConstraint};
|
||||
use sqlx::{PgPool, Row};
|
||||
use tantivy::collector::TopDocs;
|
||||
use tantivy::schema::Value;
|
||||
use tantivy::{Index, IndexReader, ReloadPolicy, TantivyDocument};
|
||||
use tonic::{Request, Response, Status};
|
||||
use tracing::info;
|
||||
|
||||
// We need to hold the database pool in our service struct.
|
||||
const INDEX_ROOT: &str = "./tantivy_indexes";
|
||||
const DEFAULT_RESULT_LIMIT: usize = 25;
|
||||
const HARD_RESULT_LIMIT: usize = 200;
|
||||
const DEFAULT_LIST_LIMIT: usize = 5;
|
||||
|
||||
pub struct SearcherService {
|
||||
pub pool: PgPool,
|
||||
profiles: Mutex<HashMap<String, Arc<ProfileIndex>>>,
|
||||
}
|
||||
|
||||
// normalize_slovak_text function remains unchanged...
|
||||
fn normalize_slovak_text(text: &str) -> String {
|
||||
// ... function content is unchanged ...
|
||||
text.chars()
|
||||
.map(|c| match c {
|
||||
'á' | 'à' | 'â' | 'ä' | 'ă' | 'ā' => 'a',
|
||||
'Á' | 'À' | 'Â' | 'Ä' | 'Ă' | 'Ā' => 'A',
|
||||
'é' | 'è' | 'ê' | 'ë' | 'ě' | 'ē' => 'e',
|
||||
'É' | 'È' | 'Ê' | 'Ë' | 'Ě' | 'Ē' => 'E',
|
||||
'í' | 'ì' | 'î' | 'ï' | 'ī' => 'i',
|
||||
'Í' | 'Ì' | 'Î' | 'Ï' | 'Ī' => 'I',
|
||||
'ó' | 'ò' | 'ô' | 'ö' | 'ō' | 'ő' => 'o',
|
||||
'Ó' | 'Ò' | 'Ô' | 'Ö' | 'Ō' | 'Ő' => 'O',
|
||||
'ú' | 'ù' | 'û' | 'ü' | 'ū' | 'ű' => 'u',
|
||||
'Ú' | 'Ù' | 'Û' | 'Ü' | 'Ū' | 'Ű' => 'U',
|
||||
'ý' | 'ỳ' | 'ŷ' | 'ÿ' => 'y',
|
||||
'Ý' | 'Ỳ' | 'Ŷ' | 'Ÿ' => 'Y',
|
||||
'č' => 'c',
|
||||
'Č' => 'C',
|
||||
'ď' => 'd',
|
||||
'Ď' => 'D',
|
||||
'ľ' => 'l',
|
||||
'Ľ' => 'L',
|
||||
'ň' => 'n',
|
||||
'Ň' => 'N',
|
||||
'ř' => 'r',
|
||||
'Ř' => 'R',
|
||||
'š' => 's',
|
||||
'Š' => 'S',
|
||||
'ť' => 't',
|
||||
'Ť' => 'T',
|
||||
'ž' => 'z',
|
||||
'Ž' => 'Z',
|
||||
_ => c,
|
||||
})
|
||||
.collect()
|
||||
impl SearcherService {
|
||||
pub fn new(pool: PgPool) -> Self {
|
||||
Self {
|
||||
pool,
|
||||
profiles: Mutex::new(HashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl Searcher for SearcherService {
|
||||
async fn search_table(
|
||||
&self,
|
||||
request: Request<SearchRequest>,
|
||||
) -> Result<Response<SearchResponse>, Status> {
|
||||
async fn run_rpc(&self, request: Request<SearchRequest>) -> Result<Response<SearchResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
let table_name = req.table_name;
|
||||
let query_str = req.query;
|
||||
let normalized = normalize_request(req)?;
|
||||
|
||||
if !profile_exists(&self.pool, &normalized.profile_name).await? {
|
||||
return Err(Status::not_found(format!(
|
||||
"Profile '{}' was not found",
|
||||
normalized.profile_name
|
||||
)));
|
||||
}
|
||||
|
||||
if let Some(table_name) = normalized.table_name.as_deref() {
|
||||
if !table_exists(&self.pool, &normalized.profile_name, table_name).await? {
|
||||
return Err(Status::not_found(format!(
|
||||
"Table '{}' was not found in profile '{}'",
|
||||
table_name, normalized.profile_name
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
if !normalized.has_input() {
|
||||
let Some(table_name) = normalized.table_name.as_deref() else {
|
||||
return Err(Status::invalid_argument(
|
||||
"table_name is required when query is empty",
|
||||
));
|
||||
};
|
||||
|
||||
let hits = fetch_latest_rows(
|
||||
&self.pool,
|
||||
&normalized.profile_name,
|
||||
table_name,
|
||||
normalized.limit.unwrap_or(DEFAULT_LIST_LIMIT),
|
||||
)
|
||||
.await?;
|
||||
return Ok(Response::new(SearchResponse { hits }));
|
||||
}
|
||||
|
||||
let index_path = search_index_path(Path::new(INDEX_ROOT), &normalized.profile_name);
|
||||
if !index_path.exists() {
|
||||
return Err(Status::not_found(format!(
|
||||
"No search index found for profile '{}'",
|
||||
normalized.profile_name
|
||||
)));
|
||||
}
|
||||
|
||||
let profile = profile_index(&self.profiles, &normalized.profile_name, &index_path)?;
|
||||
let mut hits = run_search(
|
||||
&self.pool,
|
||||
&profile,
|
||||
&normalized.profile_name,
|
||||
normalized.table_name.as_deref(),
|
||||
&normalized.free_query,
|
||||
&normalized.must,
|
||||
normalized.limit.unwrap_or(DEFAULT_RESULT_LIMIT),
|
||||
)
|
||||
.await?;
|
||||
|
||||
hits.sort_by(|left, right| right.score.total_cmp(&left.score));
|
||||
if let Some(limit) = normalized.limit {
|
||||
if hits.len() > limit {
|
||||
hits.truncate(limit);
|
||||
}
|
||||
}
|
||||
|
||||
// --- MODIFIED LOGIC ---
|
||||
// If the query is empty, fetch the 5 most recent records.
|
||||
if query_str.trim().is_empty() {
|
||||
info!(
|
||||
"Empty query for table '{}'. Fetching default results.",
|
||||
table_name
|
||||
);
|
||||
let qualified_table = format!("gen.\"{}\"", table_name);
|
||||
let sql = format!(
|
||||
"SELECT id, to_jsonb(t) AS data FROM {} t ORDER BY id DESC LIMIT 5",
|
||||
qualified_table
|
||||
"search: profile={} table={:?} free='{}' constraints={} hits={}",
|
||||
normalized.profile_name,
|
||||
normalized.table_name,
|
||||
normalized.free_query,
|
||||
normalized.must.len(),
|
||||
hits.len()
|
||||
);
|
||||
|
||||
let rows = sqlx::query(&sql).fetch_all(&self.pool).await.map_err(|e| {
|
||||
Status::internal(format!("DB query for default results failed: {}", e))
|
||||
Ok(Response::new(SearchResponse { hits }))
|
||||
}
|
||||
}
|
||||
|
||||
struct ProfileIndex {
|
||||
index: Index,
|
||||
reader: IndexReader,
|
||||
fields: SchemaFields,
|
||||
}
|
||||
|
||||
impl ProfileIndex {
|
||||
fn open(path: &Path) -> Result<Self, Status> {
|
||||
let index = Index::open_in_dir(path)
|
||||
.map_err(|e| Status::internal(format!("Failed to open index: {}", e)))?;
|
||||
register_tokenizers(&index)
|
||||
.map_err(|e| Status::internal(format!("Failed to register tokenizers: {}", e)))?;
|
||||
let reader = index
|
||||
.reader_builder()
|
||||
.reload_policy(ReloadPolicy::OnCommitWithDelay)
|
||||
.try_into()
|
||||
.map_err(|e| Status::internal(format!("Failed to build index reader: {}", e)))?;
|
||||
let fields = SchemaFields::from(&index.schema()).map_err(|e| {
|
||||
Status::internal(format!(
|
||||
"Search index schema mismatch. Reindex required: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
let hits: Vec<Hit> = rows
|
||||
Ok(Self {
|
||||
index,
|
||||
reader,
|
||||
fields,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct NormalizedSearchRequest {
|
||||
profile_name: String,
|
||||
table_name: Option<String>,
|
||||
free_query: String,
|
||||
must: Vec<SearchConstraint>,
|
||||
limit: Option<usize>,
|
||||
}
|
||||
|
||||
impl NormalizedSearchRequest {
|
||||
fn has_input(&self) -> bool {
|
||||
!self.free_query.is_empty() || !self.must.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
fn profile_index(
|
||||
cache: &Mutex<HashMap<String, Arc<ProfileIndex>>>,
|
||||
profile_name: &str,
|
||||
path: &Path,
|
||||
) -> Result<Arc<ProfileIndex>, Status> {
|
||||
{
|
||||
let cache_guard = cache
|
||||
.lock()
|
||||
.map_err(|_| Status::internal("Profile index cache lock poisoned"))?;
|
||||
if let Some(index) = cache_guard.get(profile_name) {
|
||||
return Ok(index.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let opened = Arc::new(ProfileIndex::open(path)?);
|
||||
let mut cache_guard = cache
|
||||
.lock()
|
||||
.map_err(|_| Status::internal("Profile index cache lock poisoned"))?;
|
||||
if let Some(index) = cache_guard.get(profile_name) {
|
||||
return Ok(index.clone());
|
||||
}
|
||||
cache_guard.insert(profile_name.to_string(), opened.clone());
|
||||
Ok(opened)
|
||||
}
|
||||
|
||||
fn validate_identifier(value: &str, field_name: &str) -> Result<(), Status> {
|
||||
let mut chars = value.chars();
|
||||
let Some(first) = chars.next() else {
|
||||
return Err(Status::invalid_argument(format!(
|
||||
"{field_name} must not be empty"
|
||||
)));
|
||||
};
|
||||
|
||||
if !(first.is_ascii_alphabetic() || first == '_')
|
||||
|| !chars.all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
|
||||
{
|
||||
return Err(Status::invalid_argument(format!(
|
||||
"{field_name} contains invalid characters"
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn qualify_profile_table(profile_name: &str, table_name: &str) -> String {
|
||||
format!("\"{}\".\"{}\"", profile_name, table_name)
|
||||
}
|
||||
|
||||
async fn profile_exists(pool: &PgPool, profile_name: &str) -> Result<bool, Status> {
|
||||
let exists =
|
||||
sqlx::query_scalar::<_, bool>("SELECT EXISTS(SELECT 1 FROM schemas WHERE name = $1)")
|
||||
.bind(profile_name)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("Profile lookup failed: {}", e)))?;
|
||||
Ok(exists)
|
||||
}
|
||||
|
||||
async fn table_exists(pool: &PgPool, profile_name: &str, table_name: &str) -> Result<bool, Status> {
|
||||
let exists = sqlx::query_scalar::<_, bool>(
|
||||
r#"
|
||||
SELECT EXISTS(
|
||||
SELECT 1
|
||||
FROM table_definitions td
|
||||
JOIN schemas s ON td.schema_id = s.id
|
||||
WHERE s.name = $1 AND td.table_name = $2
|
||||
)
|
||||
"#,
|
||||
)
|
||||
.bind(profile_name)
|
||||
.bind(table_name)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("Table lookup failed: {}", e)))?;
|
||||
Ok(exists)
|
||||
}
|
||||
|
||||
fn normalize_request(req: SearchRequest) -> Result<NormalizedSearchRequest, Status> {
|
||||
let profile_name = req.profile_name.trim();
|
||||
if profile_name.is_empty() {
|
||||
return Err(Status::invalid_argument("profile_name is required"));
|
||||
}
|
||||
validate_identifier(profile_name, "profile_name")?;
|
||||
|
||||
let table_name = match req.table_name.as_deref().map(str::trim) {
|
||||
Some(table_name) if !table_name.is_empty() => {
|
||||
validate_identifier(table_name, "table_name")?;
|
||||
Some(table_name.to_string())
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let free_query = req.free_query.trim().to_string();
|
||||
let mut must = Vec::new();
|
||||
|
||||
for constraint in req.must {
|
||||
let column = constraint.column.trim();
|
||||
if column.is_empty() {
|
||||
return Err(Status::invalid_argument(
|
||||
"constraint.column must not be empty",
|
||||
));
|
||||
}
|
||||
validate_identifier(column, "constraint.column")?;
|
||||
|
||||
let query = constraint.query.trim();
|
||||
if query.is_empty() {
|
||||
return Err(Status::invalid_argument(
|
||||
"constraint.query must not be empty",
|
||||
));
|
||||
}
|
||||
|
||||
must.push(SearchConstraint {
|
||||
column: column.to_string(),
|
||||
query: query.to_string(),
|
||||
mode: constraint_mode_from_proto(constraint.mode),
|
||||
});
|
||||
}
|
||||
|
||||
let limit = req.limit.map(|value| (value as usize).min(HARD_RESULT_LIMIT));
|
||||
|
||||
Ok(NormalizedSearchRequest {
|
||||
profile_name: profile_name.to_string(),
|
||||
table_name,
|
||||
free_query,
|
||||
must,
|
||||
limit,
|
||||
})
|
||||
}
|
||||
|
||||
fn constraint_mode_from_proto(raw_mode: i32) -> ConstraintMode {
|
||||
match raw_mode {
|
||||
2 => ConstraintMode::Exact,
|
||||
_ => ConstraintMode::Fuzzy,
|
||||
}
|
||||
}
|
||||
|
||||
async fn fetch_latest_rows(
|
||||
pool: &PgPool,
|
||||
profile_name: &str,
|
||||
table_name: &str,
|
||||
limit: usize,
|
||||
) -> Result<Vec<Hit>, Status> {
|
||||
let sql = format!(
|
||||
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE deleted = FALSE ORDER BY id DESC LIMIT $1",
|
||||
qualify_profile_table(profile_name, table_name)
|
||||
);
|
||||
|
||||
let rows = sqlx::query(&sql)
|
||||
.bind(limit as i64)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("DB query for default results failed: {}", e)))?;
|
||||
|
||||
Ok(rows
|
||||
.into_iter()
|
||||
.map(|row| {
|
||||
let id: i64 = row.try_get("id").unwrap_or_default();
|
||||
let json_data: serde_json::Value = row.try_get("data").unwrap_or_default();
|
||||
Hit {
|
||||
id,
|
||||
// Score is 0.0 as this is not a relevance-ranked search
|
||||
score: 0.0,
|
||||
content_json: json_data.to_string(),
|
||||
table_name: table_name.to_string(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
info!(
|
||||
"--- SERVER: Successfully processed empty query. Returning {} default hits. ---",
|
||||
hits.len()
|
||||
);
|
||||
return Ok(Response::new(SearchResponse { hits }));
|
||||
}
|
||||
// --- END OF MODIFIED LOGIC ---
|
||||
|
||||
let index_path = Path::new("./tantivy_indexes").join(&table_name);
|
||||
if !index_path.exists() {
|
||||
return Err(Status::not_found(format!(
|
||||
"No search index found for table '{}'",
|
||||
table_name
|
||||
)));
|
||||
.collect())
|
||||
}
|
||||
|
||||
let index = Index::open_in_dir(&index_path)
|
||||
.map_err(|e| Status::internal(format!("Failed to open index: {}", e)))?;
|
||||
|
||||
register_slovak_tokenizers(&index).map_err(|e| {
|
||||
Status::internal(format!("Failed to register Slovak tokenizers: {}", e))
|
||||
})?;
|
||||
|
||||
let reader = index
|
||||
.reader()
|
||||
.map_err(|e| Status::internal(format!("Failed to create index reader: {}", e)))?;
|
||||
let searcher = reader.searcher();
|
||||
let schema = index.schema();
|
||||
|
||||
let pg_id_field = schema
|
||||
.get_field("pg_id")
|
||||
.map_err(|_| Status::internal("Schema is missing the 'pg_id' field."))?;
|
||||
|
||||
// --- Query Building Logic (no changes here) ---
|
||||
let prefix_edge_field = schema.get_field("prefix_edge").unwrap();
|
||||
let prefix_full_field = schema.get_field("prefix_full").unwrap();
|
||||
let text_ngram_field = schema.get_field("text_ngram").unwrap();
|
||||
let normalized_query = normalize_slovak_text(&query_str);
|
||||
let words: Vec<&str> = normalized_query.split_whitespace().collect();
|
||||
if words.is_empty() {
|
||||
return Ok(Response::new(SearchResponse { hits: vec![] }));
|
||||
}
|
||||
let mut query_layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
// ... all your query building layers remain exactly the same ...
|
||||
// ===============================
|
||||
// LAYER 1: PREFIX MATCHING (HIGHEST PRIORITY, Boost: 4.0)
|
||||
// ===============================
|
||||
{
|
||||
let mut must_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
for word in &words {
|
||||
let edge_term = Term::from_field_text(prefix_edge_field, word);
|
||||
let full_term = Term::from_field_text(prefix_full_field, word);
|
||||
|
||||
let per_word_query = BooleanQuery::new(vec![
|
||||
(
|
||||
Occur::Should,
|
||||
Box::new(TermQuery::new(edge_term, IndexRecordOption::Basic)),
|
||||
),
|
||||
(
|
||||
Occur::Should,
|
||||
Box::new(TermQuery::new(full_term, IndexRecordOption::Basic)),
|
||||
),
|
||||
]);
|
||||
must_clauses.push((Occur::Must, Box::new(per_word_query) as Box<dyn Query>));
|
||||
}
|
||||
|
||||
if !must_clauses.is_empty() {
|
||||
let prefix_query = BooleanQuery::new(must_clauses);
|
||||
let boosted_query = BoostQuery::new(Box::new(prefix_query), 4.0);
|
||||
query_layers.push((Occur::Should, Box::new(boosted_query)));
|
||||
}
|
||||
}
|
||||
|
||||
// ===============================
|
||||
// LAYER 2: FUZZY MATCHING (HIGH PRIORITY, Boost: 3.0)
|
||||
// ===============================
|
||||
{
|
||||
let last_word = words.last().unwrap();
|
||||
let fuzzy_term = Term::from_field_text(prefix_full_field, last_word);
|
||||
let fuzzy_query = FuzzyTermQuery::new(fuzzy_term, 2, true);
|
||||
let boosted_query = BoostQuery::new(Box::new(fuzzy_query), 3.0);
|
||||
query_layers.push((Occur::Should, Box::new(boosted_query)));
|
||||
}
|
||||
|
||||
// ===============================
|
||||
// LAYER 3: PHRASE MATCHING WITH SLOP (MEDIUM PRIORITY, Boost: 2.0)
|
||||
// ===============================
|
||||
if words.len() > 1 {
|
||||
let slop_parser = QueryParser::for_index(&index, vec![prefix_full_field]);
|
||||
let slop_query_str = format!("\"{}\"~3", normalized_query);
|
||||
if let Ok(slop_query) = slop_parser.parse_query(&slop_query_str) {
|
||||
let boosted_query = BoostQuery::new(slop_query, 2.0);
|
||||
query_layers.push((Occur::Should, Box::new(boosted_query)));
|
||||
}
|
||||
}
|
||||
|
||||
// ===============================
|
||||
// LAYER 4: NGRAM SUBSTRING MATCHING (LOWEST PRIORITY, Boost: 1.0)
|
||||
// ===============================
|
||||
{
|
||||
let ngram_parser = QueryParser::for_index(&index, vec![text_ngram_field]);
|
||||
if let Ok(ngram_query) = ngram_parser.parse_query(&normalized_query) {
|
||||
let boosted_query = BoostQuery::new(ngram_query, 1.0);
|
||||
query_layers.push((Occur::Should, Box::new(boosted_query)));
|
||||
}
|
||||
}
|
||||
let master_query = BooleanQuery::new(query_layers);
|
||||
// --- End of Query Building Logic ---
|
||||
async fn run_search(
|
||||
pool: &PgPool,
|
||||
profile: &ProfileIndex,
|
||||
profile_name: &str,
|
||||
table_filter: Option<&str>,
|
||||
free_query: &str,
|
||||
must: &[SearchConstraint],
|
||||
limit: usize,
|
||||
) -> Result<Vec<Hit>, Status> {
|
||||
let master_query =
|
||||
build_master_query(&profile.index, &profile.fields, free_query, must, table_filter)?;
|
||||
|
||||
let searcher = profile.reader.searcher();
|
||||
let top_docs = searcher
|
||||
.search(&master_query, &TopDocs::with_limit(100))
|
||||
.search(&*master_query, &TopDocs::with_limit(limit))
|
||||
.map_err(|e| Status::internal(format!("Search failed: {}", e)))?;
|
||||
|
||||
if top_docs.is_empty() {
|
||||
return Ok(Response::new(SearchResponse { hits: vec![] }));
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
// --- NEW LOGIC: Fetch from DB and combine results ---
|
||||
|
||||
// Step 1: Extract (score, pg_id) from Tantivy results.
|
||||
let mut scored_ids: Vec<(f32, u64)> = Vec::new();
|
||||
let mut candidates: Vec<(f32, i64, String)> = Vec::with_capacity(top_docs.len());
|
||||
for (score, doc_address) in top_docs {
|
||||
let doc: TantivyDocument = searcher
|
||||
.doc(doc_address)
|
||||
.map_err(|e| Status::internal(format!("Failed to retrieve document: {}", e)))?;
|
||||
if let Some(pg_id_value) = doc.get_first(pg_id_field) {
|
||||
if let Some(pg_id) = pg_id_value.as_u64() {
|
||||
scored_ids.push((score, pg_id));
|
||||
}
|
||||
}
|
||||
let Some(pg_id) = doc
|
||||
.get_first(profile.fields.pg_id)
|
||||
.and_then(|value| value.as_u64())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let Some(table_name) = doc
|
||||
.get_first(profile.fields.table_name)
|
||||
.and_then(|value| value.as_str())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
candidates.push((score, pg_id as i64, table_name.to_string()));
|
||||
}
|
||||
|
||||
// Step 2: Fetch all corresponding rows from Postgres in a single query.
|
||||
let pg_ids: Vec<i64> = scored_ids.iter().map(|(_, id)| *id as i64).collect();
|
||||
let qualified_table = format!("gen.\"{}\"", table_name);
|
||||
let query_str = format!(
|
||||
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE id = ANY($1)",
|
||||
qualified_table
|
||||
if candidates.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let mut ids_by_table: HashMap<String, Vec<i64>> = HashMap::new();
|
||||
for (_, pg_id, table_name) in &candidates {
|
||||
ids_by_table
|
||||
.entry(table_name.clone())
|
||||
.or_default()
|
||||
.push(*pg_id);
|
||||
}
|
||||
|
||||
let mut content_map: HashMap<(String, i64), String> = HashMap::new();
|
||||
for (table_name, pg_ids) in ids_by_table {
|
||||
validate_identifier(&table_name, "table_name")?;
|
||||
let sql = format!(
|
||||
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE deleted = FALSE AND id = ANY($1)",
|
||||
qualify_profile_table(profile_name, &table_name)
|
||||
);
|
||||
|
||||
let rows = sqlx::query(&query_str)
|
||||
let rows = sqlx::query(&sql)
|
||||
.bind(&pg_ids)
|
||||
.fetch_all(&self.pool)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("Database query failed: {}", e)))?;
|
||||
|
||||
// Step 3: Map the database results by ID for quick lookup.
|
||||
let mut content_map: HashMap<i64, String> = HashMap::new();
|
||||
for row in rows {
|
||||
let id: i64 = row.try_get("id").unwrap_or(0);
|
||||
let json_data: serde_json::Value =
|
||||
row.try_get("data").unwrap_or(serde_json::Value::Null);
|
||||
content_map.insert(id, json_data.to_string());
|
||||
let id: i64 = row.try_get("id").unwrap_or_default();
|
||||
let json_data: serde_json::Value = row.try_get("data").unwrap_or_default();
|
||||
content_map.insert((table_name.clone(), id), json_data.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: Build the final response, combining Tantivy scores with PG content.
|
||||
let hits: Vec<Hit> = scored_ids
|
||||
Ok(candidates
|
||||
.into_iter()
|
||||
.filter_map(|(score, pg_id)| {
|
||||
content_map.get(&(pg_id as i64)).map(|content_json| Hit {
|
||||
id: pg_id as i64,
|
||||
.filter_map(|(score, pg_id, table_name)| {
|
||||
content_map
|
||||
.get(&(table_name.clone(), pg_id))
|
||||
.map(|content_json| Hit {
|
||||
id: pg_id,
|
||||
score,
|
||||
content_json: content_json.clone(),
|
||||
table_name,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
.collect())
|
||||
}
|
||||
|
||||
info!(
|
||||
"--- SERVER: Successfully processed search. Returning {} hits. ---",
|
||||
hits.len()
|
||||
);
|
||||
|
||||
let response = SearchResponse { hits };
|
||||
Ok(Response::new(response))
|
||||
#[tonic::async_trait]
|
||||
impl Searcher for SearcherService {
|
||||
async fn search(
|
||||
&self,
|
||||
request: Request<SearchRequest>,
|
||||
) -> Result<Response<SearchResponse>, Status> {
|
||||
self.run_rpc(request).await
|
||||
}
|
||||
}
|
||||
|
||||
234
search/src/query_builder.rs
Normal file
234
search/src/query_builder.rs
Normal file
@@ -0,0 +1,234 @@
|
||||
use common::search::{
|
||||
json_path_term, normalize_exact, tokenize_ngram, tokenize_word, SchemaFields,
|
||||
};
|
||||
use tantivy::query::{
|
||||
BooleanQuery, BoostQuery, EmptyQuery, FuzzyTermQuery, Occur, PhraseQuery, Query, QueryParser,
|
||||
TermQuery,
|
||||
};
|
||||
use tantivy::schema::{IndexRecordOption, Term};
|
||||
use tantivy::Index;
|
||||
use tonic::Status;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum ConstraintMode {
|
||||
Fuzzy,
|
||||
Exact,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SearchConstraint {
|
||||
pub column: String,
|
||||
pub query: String,
|
||||
pub mode: ConstraintMode,
|
||||
}
|
||||
|
||||
pub fn build_master_query(
|
||||
index: &Index,
|
||||
fields: &SchemaFields,
|
||||
free_query: &str,
|
||||
must: &[SearchConstraint],
|
||||
table_filter: Option<&str>,
|
||||
) -> Result<Box<dyn Query>, Status> {
|
||||
let mut clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
let mut has_search_clause = false;
|
||||
|
||||
for constraint in must {
|
||||
let predicate = match constraint.mode {
|
||||
ConstraintMode::Exact => exact_predicate(fields, &constraint.column, &constraint.query)?,
|
||||
ConstraintMode::Fuzzy => {
|
||||
fuzzy_predicate_scoped(fields, &constraint.column, &constraint.query)?
|
||||
}
|
||||
};
|
||||
clauses.push((Occur::Must, predicate));
|
||||
has_search_clause = true;
|
||||
}
|
||||
|
||||
let free_words = tokenize_word(free_query);
|
||||
if !free_words.is_empty() {
|
||||
let predicate = fuzzy_predicate_unscoped(index, fields, &free_words)?;
|
||||
clauses.push((Occur::Should, predicate));
|
||||
has_search_clause = true;
|
||||
}
|
||||
|
||||
if let Some(table_name) = table_filter {
|
||||
let term = Term::from_field_text(fields.table_name, table_name);
|
||||
clauses.push((
|
||||
Occur::Must,
|
||||
Box::new(TermQuery::new(term, IndexRecordOption::Basic)),
|
||||
));
|
||||
}
|
||||
|
||||
if !has_search_clause {
|
||||
return Ok(Box::new(EmptyQuery));
|
||||
}
|
||||
|
||||
Ok(Box::new(BooleanQuery::new(clauses)))
|
||||
}
|
||||
|
||||
fn exact_predicate(
|
||||
fields: &SchemaFields,
|
||||
column: &str,
|
||||
query: &str,
|
||||
) -> Result<Box<dyn Query>, Status> {
|
||||
let normalized_value = normalize_exact(query);
|
||||
if normalized_value.is_empty() {
|
||||
return Err(Status::invalid_argument(
|
||||
"exact query is empty after normalization",
|
||||
));
|
||||
}
|
||||
|
||||
let term = json_path_term(fields.data_exact, column, &normalized_value);
|
||||
Ok(Box::new(TermQuery::new(term, IndexRecordOption::Basic)))
|
||||
}
|
||||
|
||||
fn fuzzy_predicate_scoped(
|
||||
fields: &SchemaFields,
|
||||
column: &str,
|
||||
query: &str,
|
||||
) -> Result<Box<dyn Query>, Status> {
|
||||
let words = tokenize_word(query);
|
||||
if words.is_empty() {
|
||||
return Err(Status::invalid_argument(
|
||||
"fuzzy query has no searchable tokens",
|
||||
));
|
||||
}
|
||||
|
||||
let mut layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
|
||||
let mut per_word_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
for word in &words {
|
||||
let term = json_path_term(fields.data_word, column, word);
|
||||
let mut alternates: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
|
||||
alternates.push((
|
||||
Occur::Should,
|
||||
Box::new(BoostQuery::new(
|
||||
Box::new(TermQuery::new(term.clone(), IndexRecordOption::WithFreqs)),
|
||||
4.0,
|
||||
)),
|
||||
));
|
||||
|
||||
alternates.push((
|
||||
Occur::Should,
|
||||
Box::new(BoostQuery::new(
|
||||
Box::new(FuzzyTermQuery::new_prefix(term.clone(), 0, false)),
|
||||
3.0,
|
||||
)),
|
||||
));
|
||||
|
||||
if let Some(distance) = fuzzy_distance(word.chars().count()) {
|
||||
alternates.push((
|
||||
Occur::Should,
|
||||
Box::new(BoostQuery::new(
|
||||
Box::new(FuzzyTermQuery::new(term.clone(), distance, true)),
|
||||
2.0,
|
||||
)),
|
||||
));
|
||||
}
|
||||
|
||||
per_word_clauses.push((Occur::Must, Box::new(BooleanQuery::new(alternates))));
|
||||
}
|
||||
layers.push((Occur::Should, Box::new(BooleanQuery::new(per_word_clauses))));
|
||||
|
||||
if words.len() > 1 {
|
||||
let phrase_terms: Vec<(usize, Term)> = words
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(offset, word)| (offset, json_path_term(fields.data_word, column, word)))
|
||||
.collect();
|
||||
let phrase = PhraseQuery::new_with_offset_and_slop(phrase_terms, 3);
|
||||
layers.push((
|
||||
Occur::Should,
|
||||
Box::new(BoostQuery::new(Box::new(phrase), 2.0)),
|
||||
));
|
||||
}
|
||||
|
||||
let ngrams = tokenize_ngram(query);
|
||||
if !ngrams.is_empty() {
|
||||
let ngram_clauses: Vec<(Occur, Box<dyn Query>)> = ngrams
|
||||
.into_iter()
|
||||
.map(|gram| {
|
||||
let term = json_path_term(fields.data_ngram, column, &gram);
|
||||
(
|
||||
Occur::Must,
|
||||
Box::new(TermQuery::new(term, IndexRecordOption::Basic)) as Box<dyn Query>,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
layers.push((
|
||||
Occur::Should,
|
||||
Box::new(BoostQuery::new(Box::new(BooleanQuery::new(ngram_clauses)), 1.0)),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(Box::new(BooleanQuery::new(layers)))
|
||||
}
|
||||
|
||||
fn fuzzy_predicate_unscoped(
|
||||
index: &Index,
|
||||
fields: &SchemaFields,
|
||||
words: &[String],
|
||||
) -> Result<Box<dyn Query>, Status> {
|
||||
let mut layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||
|
||||
{
|
||||
let parser = QueryParser::for_index(index, vec![fields.data_word]);
|
||||
let query_string = words
|
||||
.iter()
|
||||
.map(|word| format!("+{}*", word))
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
if let Ok(query) = parser.parse_query(&query_string) {
|
||||
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 4.0))));
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let parser = QueryParser::for_index(index, vec![fields.data_word]);
|
||||
let query_string = words
|
||||
.iter()
|
||||
.map(|word| match fuzzy_distance(word.chars().count()) {
|
||||
Some(distance) => format!("+{}~{}", word, distance),
|
||||
None => format!("+{}", word),
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
if let Ok(query) = parser.parse_query(&query_string) {
|
||||
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 2.0))));
|
||||
}
|
||||
}
|
||||
|
||||
if words.len() > 1 {
|
||||
let parser = QueryParser::for_index(index, vec![fields.data_word]);
|
||||
let query_string = format!("\"{}\"~3", words.join(" "));
|
||||
if let Ok(query) = parser.parse_query(&query_string) {
|
||||
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 2.0))));
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let parser = QueryParser::for_index(index, vec![fields.data_ngram]);
|
||||
let query_string = words
|
||||
.iter()
|
||||
.map(|word| format!("+{}", word))
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
if let Ok(query) = parser.parse_query(&query_string) {
|
||||
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 1.0))));
|
||||
}
|
||||
}
|
||||
|
||||
if layers.is_empty() {
|
||||
return Ok(Box::new(EmptyQuery));
|
||||
}
|
||||
|
||||
Ok(Box::new(BooleanQuery::new(layers)))
|
||||
}
|
||||
|
||||
fn fuzzy_distance(word_len: usize) -> Option<u8> {
|
||||
match word_len {
|
||||
0..=3 => None,
|
||||
4..=6 => Some(1),
|
||||
_ => Some(2),
|
||||
}
|
||||
}
|
||||
2
server
2
server
Submodule server updated: 6b0c3e63b4...8dbe9cc14c
Reference in New Issue
Block a user