Compare commits

..

6 Commits

Author SHA1 Message Date
Priec
f11c6060ea tui pages is not prod ready yet 2026-05-26 21:52:53 +02:00
Priec
6d8fa0de63 cant change logic once data are in that column 2026-05-19 14:30:56 +02:00
Priec
dc273506b7 working v12 2026-05-17 13:10:44 +02:00
Priec
6a87750329 v0.6.9 nice 2026-05-10 17:14:11 +02:00
Priec
819058ad5c rule page in the validation client2 2026-05-10 09:24:04 +02:00
Priec
def75c00b4 rule page in the validation client 2026-05-10 09:23:33 +02:00
20 changed files with 517 additions and 91 deletions

1
.gitignore vendored
View File

@@ -7,3 +7,4 @@ steel_decimal/tests/property_tests.proptest-regressions
canvas/*.toml
.aider*
.codex
TODO.md

3
.gitmodules vendored
View File

@@ -7,3 +7,6 @@
[submodule "server"]
path = server
url = git@gitlab.com:filipriec/komp_ac_server.git
[submodule "tui-pages"]
path = tui-pages
url = git@gitlab.com:filipriec/tui-pages.git

12
Cargo.lock generated
View File

@@ -493,7 +493,7 @@ checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
[[package]]
name = "canvas"
version = "0.6.7"
version = "0.6.14"
dependencies = [
"anyhow",
"async-trait",
@@ -586,7 +586,7 @@ dependencies = [
[[package]]
name = "client"
version = "0.6.7"
version = "0.6.14"
dependencies = [
"anyhow",
"async-trait",
@@ -642,7 +642,7 @@ dependencies = [
[[package]]
name = "common"
version = "0.6.7"
version = "0.6.14"
dependencies = [
"prost 0.13.5",
"prost-build 0.14.1",
@@ -3117,7 +3117,7 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
[[package]]
name = "search"
version = "0.6.7"
version = "0.6.14"
dependencies = [
"anyhow",
"common",
@@ -3216,7 +3216,7 @@ dependencies = [
[[package]]
name = "server"
version = "0.6.7"
version = "0.6.14"
dependencies = [
"anyhow",
"bcrypt",
@@ -4549,7 +4549,7 @@ dependencies = [
[[package]]
name = "validation-core"
version = "0.6.7"
version = "0.6.14"
dependencies = [
"regex",
"serde",

View File

@@ -5,7 +5,7 @@ resolver = "2"
[workspace.package]
# TODO: idk how to do the name, fix later
# name = "komp_ac"
version = "0.6.7"
version = "0.6.15"
edition = "2021"
license = "GPL-3.0-or-later"
authors = ["Filip Priečinský <filippriec@gmail.com>"]

2
client

Submodule client updated: 25a901ff5e...426f85d6cf

View File

@@ -8,6 +8,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
".komp_ac.table_validation.FieldValidation",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.field_attribute(
".komp_ac.table_validation.FieldValidation.locked",
"#[serde(default)]",
)
.type_attribute(
".komp_ac.table_validation.CharacterLimits",
"#[derive(serde::Serialize, serde::Deserialize)]",
@@ -68,6 +72,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
".komp_ac.table_validation.ValidationRuleDefinition",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ValidationSetRuleItem",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ValidationSetRuleItem.Source",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ValidationSetDefinition",
"#[derive(serde::Serialize, serde::Deserialize)]",
@@ -128,6 +140,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
".komp_ac.table_validation.ApplyValidationSetResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.LockFieldValidationRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.LockFieldValidationResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
// Enum -> readable strings in JSON ("BYTES", "DISPLAY_WIDTH")
.type_attribute(
".komp_ac.table_validation.CountMode",
@@ -153,6 +173,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
".komp_ac.table_definition.PostTableDefinitionRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_definition.AddTableColumnsRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_definition.TableDefinitionResponse",
"#[derive(serde::Serialize, serde::Deserialize)]"

View File

@@ -14,6 +14,10 @@ service TableDefinition {
// Also inserts metadata and default validation rules. Entirely transactional.
rpc PostTableDefinition(PostTableDefinitionRequest) returns (TableDefinitionResponse);
// Appends new user-defined columns to an existing table.
// Existing columns, links, and table logic are never changed by this call.
rpc AddTableColumns(AddTableColumnsRequest) returns (TableDefinitionResponse);
// Lists all profiles (schemas) and their tables with declared dependencies.
// This provides a tree-like overview of table relationships.
rpc GetProfileTree(komp_ac.common.Empty) returns (ProfileTreeResponse);
@@ -72,6 +76,21 @@ message PostTableDefinitionRequest {
string profile_name = 5;
}
// Defines append-only column additions for an existing table.
message AddTableColumnsRequest {
// Existing profile/schema name.
string profile_name = 1;
// Existing table name in the profile.
string table_name = 2;
// New user-defined columns only. Existing columns cannot be changed here.
repeated ColumnDefinition columns = 3;
// Optional indexes for the new columns only.
repeated string indexes = 4;
}
// Describes one user-defined column for a table.
message ColumnDefinition {
// Column name that follows the same validation rules as table_name.

View File

@@ -52,6 +52,9 @@ message FieldValidation {
// Field must be provided / treated as required by clients and server enforcement layers.
bool required = 4;
// Once locked, this field's validation config cannot be changed.
bool locked = 15;
}
// Character length counting mode
@@ -191,6 +194,9 @@ service TableValidationService {
// Snapshot a reusable set onto a concrete table field.
rpc ApplyValidationSet(ApplyValidationSetRequest) returns (ApplyValidationSetResponse);
// Permanently lock one field's validation config.
rpc LockFieldValidation(LockFieldValidationRequest) returns (LockFieldValidationResponse);
}
message UpdateFieldValidationRequest {
@@ -219,6 +225,7 @@ message ReplaceTableValidationResponse {
}
message ValidationRuleDefinition {
optional int64 id = 4;
string name = 1;
optional string description = 2;
@@ -226,12 +233,28 @@ message ValidationRuleDefinition {
FieldValidation validation = 3;
}
message ValidationSetRuleItem {
int32 position = 1;
optional string name = 2;
optional string description = 3;
oneof source {
string global_rule_name = 10;
FieldValidation inline_validation = 11;
int64 global_rule_id = 12;
}
}
message ValidationSetDefinition {
reserved 3;
string name = 1;
optional string description = 2;
repeated string ruleNames = 3;
// Server-resolved snapshot of all rules in ruleNames order.
// Ordered set items.
repeated ValidationSetRuleItem ruleItems = 5;
// Server-resolved snapshot of all set items in order.
FieldValidation resolvedValidation = 4;
}
@@ -303,3 +326,14 @@ message ApplyValidationSetResponse {
string message = 2;
FieldValidation validation = 3;
}
message LockFieldValidationRequest {
string profileName = 1;
string tableName = 2;
string dataKey = 3;
}
message LockFieldValidationResponse {
bool success = 1;
string message = 2;
}

Binary file not shown.

View File

@@ -43,6 +43,23 @@ pub struct PostTableDefinitionRequest {
#[prost(string, tag = "5")]
pub profile_name: ::prost::alloc::string::String,
}
/// Defines append-only column additions for an existing table.
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct AddTableColumnsRequest {
/// Existing profile/schema name.
#[prost(string, tag = "1")]
pub profile_name: ::prost::alloc::string::String,
/// Existing table name in the profile.
#[prost(string, tag = "2")]
pub table_name: ::prost::alloc::string::String,
/// New user-defined columns only. Existing columns cannot be changed here.
#[prost(message, repeated, tag = "3")]
pub columns: ::prost::alloc::vec::Vec<ColumnDefinition>,
/// Optional indexes for the new columns only.
#[prost(string, repeated, tag = "4")]
pub indexes: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
}
/// Describes one user-defined column for a table.
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
@@ -359,6 +376,37 @@ pub mod table_definition_client {
);
self.inner.unary(req, path, codec).await
}
/// Appends new user-defined columns to an existing table.
/// Existing columns, links, and table logic are never changed by this call.
pub async fn add_table_columns(
&mut self,
request: impl tonic::IntoRequest<super::AddTableColumnsRequest>,
) -> std::result::Result<
tonic::Response<super::TableDefinitionResponse>,
tonic::Status,
> {
self.inner
.ready()
.await
.map_err(|e| {
tonic::Status::unknown(
format!("Service was not ready: {}", e.into()),
)
})?;
let codec = tonic::codec::ProstCodec::default();
let path = http::uri::PathAndQuery::from_static(
"/komp_ac.table_definition.TableDefinition/AddTableColumns",
);
let mut req = request.into_request();
req.extensions_mut()
.insert(
GrpcMethod::new(
"komp_ac.table_definition.TableDefinition",
"AddTableColumns",
),
);
self.inner.unary(req, path, codec).await
}
/// Lists all profiles (schemas) and their tables with declared dependencies.
/// This provides a tree-like overview of table relationships.
pub async fn get_profile_tree(
@@ -536,6 +584,15 @@ pub mod table_definition_server {
tonic::Response<super::TableDefinitionResponse>,
tonic::Status,
>;
/// Appends new user-defined columns to an existing table.
/// Existing columns, links, and table logic are never changed by this call.
async fn add_table_columns(
&self,
request: tonic::Request<super::AddTableColumnsRequest>,
) -> std::result::Result<
tonic::Response<super::TableDefinitionResponse>,
tonic::Status,
>;
/// Lists all profiles (schemas) and their tables with declared dependencies.
/// This provides a tree-like overview of table relationships.
async fn get_profile_tree(
@@ -708,6 +765,52 @@ pub mod table_definition_server {
};
Box::pin(fut)
}
"/komp_ac.table_definition.TableDefinition/AddTableColumns" => {
#[allow(non_camel_case_types)]
struct AddTableColumnsSvc<T: TableDefinition>(pub Arc<T>);
impl<
T: TableDefinition,
> tonic::server::UnaryService<super::AddTableColumnsRequest>
for AddTableColumnsSvc<T> {
type Response = super::TableDefinitionResponse;
type Future = BoxFuture<
tonic::Response<Self::Response>,
tonic::Status,
>;
fn call(
&mut self,
request: tonic::Request<super::AddTableColumnsRequest>,
) -> Self::Future {
let inner = Arc::clone(&self.0);
let fut = async move {
<T as TableDefinition>::add_table_columns(&inner, request)
.await
};
Box::pin(fut)
}
}
let accept_compression_encodings = self.accept_compression_encodings;
let send_compression_encodings = self.send_compression_encodings;
let max_decoding_message_size = self.max_decoding_message_size;
let max_encoding_message_size = self.max_encoding_message_size;
let inner = self.inner.clone();
let fut = async move {
let method = AddTableColumnsSvc(inner);
let codec = tonic::codec::ProstCodec::default();
let mut grpc = tonic::server::Grpc::new(codec)
.apply_compression_config(
accept_compression_encodings,
send_compression_encodings,
)
.apply_max_message_size_config(
max_decoding_message_size,
max_encoding_message_size,
);
let res = grpc.unary(method, req).await;
Ok(res)
};
Box::pin(fut)
}
"/komp_ac.table_definition.TableDefinition/GetProfileTree" => {
#[allow(non_camel_case_types)]
struct GetProfileTreeSvc<T: TableDefinition>(pub Arc<T>);

View File

@@ -43,6 +43,10 @@ pub struct FieldValidation {
/// Field must be provided / treated as required by clients and server enforcement layers.
#[prost(bool, tag = "4")]
pub required: bool,
/// Once locked, this field's validation config cannot be changed.
#[prost(bool, tag = "15")]
#[serde(default)]
pub locked: bool,
}
/// Character limit validation (Validation 1).
/// These rules map directly to canvas CharacterLimits.
@@ -209,6 +213,8 @@ pub struct ReplaceTableValidationResponse {
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ValidationRuleDefinition {
#[prost(int64, optional, tag = "4")]
pub id: ::core::option::Option<i64>,
#[prost(string, tag = "1")]
pub name: ::prost::alloc::string::String,
#[prost(string, optional, tag = "2")]
@@ -219,14 +225,40 @@ pub struct ValidationRuleDefinition {
}
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ValidationSetRuleItem {
#[prost(int32, tag = "1")]
pub position: i32,
#[prost(string, optional, tag = "2")]
pub name: ::core::option::Option<::prost::alloc::string::String>,
#[prost(string, optional, tag = "3")]
pub description: ::core::option::Option<::prost::alloc::string::String>,
#[prost(oneof = "validation_set_rule_item::Source", tags = "10, 11, 12")]
pub source: ::core::option::Option<validation_set_rule_item::Source>,
}
/// Nested message and enum types in `ValidationSetRuleItem`.
pub mod validation_set_rule_item {
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Oneof)]
pub enum Source {
#[prost(string, tag = "10")]
GlobalRuleName(::prost::alloc::string::String),
#[prost(message, tag = "11")]
InlineValidation(super::FieldValidation),
#[prost(int64, tag = "12")]
GlobalRuleId(i64),
}
}
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ValidationSetDefinition {
#[prost(string, tag = "1")]
pub name: ::prost::alloc::string::String,
#[prost(string, optional, tag = "2")]
pub description: ::core::option::Option<::prost::alloc::string::String>,
#[prost(string, repeated, tag = "3")]
pub rule_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
/// Server-resolved snapshot of all rules in ruleNames order.
/// Ordered set items.
#[prost(message, repeated, tag = "5")]
pub rule_items: ::prost::alloc::vec::Vec<ValidationSetRuleItem>,
/// Server-resolved snapshot of all set items in order.
#[prost(message, optional, tag = "4")]
pub resolved_validation: ::core::option::Option<FieldValidation>,
}
@@ -340,6 +372,24 @@ pub struct ApplyValidationSetResponse {
#[prost(message, optional, tag = "3")]
pub validation: ::core::option::Option<FieldValidation>,
}
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct LockFieldValidationRequest {
#[prost(string, tag = "1")]
pub profile_name: ::prost::alloc::string::String,
#[prost(string, tag = "2")]
pub table_name: ::prost::alloc::string::String,
#[prost(string, tag = "3")]
pub data_key: ::prost::alloc::string::String,
}
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct LockFieldValidationResponse {
#[prost(bool, tag = "1")]
pub success: bool,
#[prost(string, tag = "2")]
pub message: ::prost::alloc::string::String,
}
/// Character length counting mode
#[derive(serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
@@ -849,6 +899,36 @@ pub mod table_validation_service_client {
);
self.inner.unary(req, path, codec).await
}
/// Permanently lock one field's validation config.
pub async fn lock_field_validation(
&mut self,
request: impl tonic::IntoRequest<super::LockFieldValidationRequest>,
) -> std::result::Result<
tonic::Response<super::LockFieldValidationResponse>,
tonic::Status,
> {
self.inner
.ready()
.await
.map_err(|e| {
tonic::Status::unknown(
format!("Service was not ready: {}", e.into()),
)
})?;
let codec = tonic::codec::ProstCodec::default();
let path = http::uri::PathAndQuery::from_static(
"/komp_ac.table_validation.TableValidationService/LockFieldValidation",
);
let mut req = request.into_request();
req.extensions_mut()
.insert(
GrpcMethod::new(
"komp_ac.table_validation.TableValidationService",
"LockFieldValidation",
),
);
self.inner.unary(req, path, codec).await
}
}
}
/// Generated server implementations.
@@ -939,6 +1019,14 @@ pub mod table_validation_service_server {
tonic::Response<super::ApplyValidationSetResponse>,
tonic::Status,
>;
/// Permanently lock one field's validation config.
async fn lock_field_validation(
&self,
request: tonic::Request<super::LockFieldValidationRequest>,
) -> std::result::Result<
tonic::Response<super::LockFieldValidationResponse>,
tonic::Status,
>;
}
/// Service for storing and fetching field-validation definitions.
#[derive(Debug)]
@@ -1516,6 +1604,55 @@ pub mod table_validation_service_server {
};
Box::pin(fut)
}
"/komp_ac.table_validation.TableValidationService/LockFieldValidation" => {
#[allow(non_camel_case_types)]
struct LockFieldValidationSvc<T: TableValidationService>(pub Arc<T>);
impl<
T: TableValidationService,
> tonic::server::UnaryService<super::LockFieldValidationRequest>
for LockFieldValidationSvc<T> {
type Response = super::LockFieldValidationResponse;
type Future = BoxFuture<
tonic::Response<Self::Response>,
tonic::Status,
>;
fn call(
&mut self,
request: tonic::Request<super::LockFieldValidationRequest>,
) -> Self::Future {
let inner = Arc::clone(&self.0);
let fut = async move {
<T as TableValidationService>::lock_field_validation(
&inner,
request,
)
.await
};
Box::pin(fut)
}
}
let accept_compression_encodings = self.accept_compression_encodings;
let send_compression_encodings = self.send_compression_encodings;
let max_decoding_message_size = self.max_decoding_message_size;
let max_encoding_message_size = self.max_encoding_message_size;
let inner = self.inner.clone();
let fut = async move {
let method = LockFieldValidationSvc(inner);
let codec = tonic::codec::ProstCodec::default();
let mut grpc = tonic::server::Grpc::new(codec)
.apply_compression_config(
accept_compression_encodings,
send_compression_encodings,
)
.apply_max_message_size_config(
max_decoding_message_size,
max_encoding_message_size,
);
let res = grpc.unary(method, req).await;
Ok(res)
};
Box::pin(fut)
}
_ => {
Box::pin(async move {
let mut response = http::Response::new(

View File

@@ -1,8 +1,8 @@
use std::path::{Path, PathBuf};
use tantivy::schema::{
Field, IndexRecordOption, JsonObjectOptions, Schema, Term, TextFieldIndexing, INDEXED, STORED,
STRING,
Field, IndexRecordOption, JsonObjectOptions, Schema, Term, TextFieldIndexing, TextOptions,
INDEXED, STORED, STRING,
};
use tantivy::tokenizer::{
AsciiFoldingFilter, LowerCaser, NgramTokenizer, RawTokenizer, RemoveLongFilter,
@@ -13,6 +13,7 @@ use tantivy::Index;
pub const F_PG_ID: &str = "pg_id";
pub const F_TABLE_NAME: &str = "table_name";
pub const F_ROW_KEY: &str = "row_key";
pub const F_ALL_TEXT: &str = "all_text";
pub const F_DATA_WORD: &str = "data_word";
pub const F_DATA_NGRAM: &str = "data_ngram";
pub const F_DATA_EXACT: &str = "data_exact";
@@ -59,6 +60,7 @@ pub fn create_search_schema() -> Schema {
schema_builder.add_u64_field(F_PG_ID, INDEXED | STORED);
schema_builder.add_text_field(F_TABLE_NAME, STRING | STORED);
schema_builder.add_text_field(F_ROW_KEY, STRING | STORED);
schema_builder.add_text_field(F_ALL_TEXT, text_options(TOK_WORD));
schema_builder.add_json_field(F_DATA_WORD, json_options(TOK_WORD, true, false));
schema_builder.add_json_field(F_DATA_NGRAM, json_options(TOK_NGRAM, true, false));
@@ -67,6 +69,14 @@ pub fn create_search_schema() -> Schema {
schema_builder.build()
}
fn text_options(tokenizer_name: &str) -> TextOptions {
let indexing = TextFieldIndexing::default()
.set_tokenizer(tokenizer_name)
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
TextOptions::default().set_indexing_options(indexing)
}
fn json_options(tokenizer_name: &str, with_positions: bool, stored: bool) -> JsonObjectOptions {
let index_option = if with_positions {
IndexRecordOption::WithFreqsAndPositions
@@ -153,6 +163,7 @@ pub struct SchemaFields {
pub pg_id: Field,
pub table_name: Field,
pub row_key: Field,
pub all_text: Field,
pub data_word: Field,
pub data_ngram: Field,
pub data_exact: Field,
@@ -164,6 +175,7 @@ impl SchemaFields {
pg_id: get_field(schema, F_PG_ID)?,
table_name: get_field(schema, F_TABLE_NAME)?,
row_key: get_field(schema, F_ROW_KEY)?,
all_text: get_field(schema, F_ALL_TEXT)?,
data_word: get_field(schema, F_DATA_WORD)?,
data_ngram: get_field(schema, F_DATA_NGRAM)?,
data_exact: get_field(schema, F_DATA_EXACT)?,

View File

@@ -112,6 +112,7 @@ impl SearcherService {
Ok(Response::new(SearchResponse { hits }))
}
}
struct ProfileIndex {
@@ -133,7 +134,7 @@ impl ProfileIndex {
.map_err(|e| Status::internal(format!("Failed to build index reader: {}", e)))?;
let fields = SchemaFields::from(&index.schema()).map_err(|e| {
Status::internal(format!(
"Search index schema mismatch. Reindex required: {}",
"Search index schema mismatch. Delete the stale index and create it again: {}",
e
))
})?;
@@ -205,6 +206,22 @@ fn validate_identifier(value: &str, field_name: &str) -> Result<(), Status> {
Ok(())
}
fn validate_search_column(value: &str) -> Result<(), Status> {
if value.is_empty() {
return Err(Status::invalid_argument(
"constraint.column must not be empty",
));
}
if value.chars().any(|ch| ch.is_control() || ch == '\0') {
return Err(Status::invalid_argument(
"constraint.column contains invalid characters",
));
}
Ok(())
}
fn qualify_profile_table(profile_name: &str, table_name: &str) -> String {
format!("\"{}\".\"{}\"", profile_name, table_name)
}
@@ -258,12 +275,7 @@ fn normalize_request(req: SearchRequest) -> Result<NormalizedSearchRequest, Stat
for constraint in req.must {
let column = constraint.column.trim();
if column.is_empty() {
return Err(Status::invalid_argument(
"constraint.column must not be empty",
));
}
validate_identifier(column, "constraint.column")?;
validate_search_column(column)?;
let query = constraint.query.trim();
if query.is_empty() {

View File

@@ -1,5 +1,6 @@
use common::search::{
json_path_term, normalize_exact, tokenize_ngram, tokenize_word, SchemaFields,
json_path_term, normalize_column_name, normalize_exact, tokenize_ngram, tokenize_word,
SchemaFields,
};
use tantivy::query::{
BooleanQuery, BoostQuery, EmptyQuery, FuzzyTermQuery, Occur, PhraseQuery, Query, QueryParser,
@@ -48,7 +49,7 @@ pub fn build_master_query(
let free_words = tokenize_word(free_query);
if !free_words.is_empty() {
let predicate = fuzzy_predicate_unscoped(index, fields, &free_words)?;
clauses.push((Occur::Should, predicate));
clauses.push((Occur::Must, predicate));
has_search_clause = true;
}
@@ -79,7 +80,8 @@ fn exact_predicate(
));
}
let term = json_path_term(fields.data_exact, column, &normalized_value);
let column = normalize_column_name(column);
let term = json_path_term(fields.data_exact, &column, &normalized_value);
Ok(Box::new(TermQuery::new(term, IndexRecordOption::Basic)))
}
@@ -95,11 +97,13 @@ fn fuzzy_predicate_scoped(
));
}
let column = normalize_column_name(column);
let mut layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
let mut per_word_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
for word in &words {
let term = json_path_term(fields.data_word, column, word);
let term = json_path_term(fields.data_word, &column, word);
let mut alternates: Vec<(Occur, Box<dyn Query>)> = Vec::new();
alternates.push((
@@ -136,7 +140,7 @@ fn fuzzy_predicate_scoped(
let phrase_terms: Vec<(usize, Term)> = words
.iter()
.enumerate()
.map(|(offset, word)| (offset, json_path_term(fields.data_word, column, word)))
.map(|(offset, word)| (offset, json_path_term(fields.data_word, &column, word)))
.collect();
let phrase = PhraseQuery::new_with_offset_and_slop(phrase_terms, 3);
layers.push((
@@ -150,7 +154,7 @@ fn fuzzy_predicate_scoped(
let ngram_clauses: Vec<(Occur, Box<dyn Query>)> = ngrams
.into_iter()
.map(|gram| {
let term = json_path_term(fields.data_ngram, column, &gram);
let term = json_path_term(fields.data_ngram, &column, &gram);
(
Occur::Must,
Box::new(TermQuery::new(term, IndexRecordOption::Basic)) as Box<dyn Query>,
@@ -176,35 +180,43 @@ fn fuzzy_predicate_unscoped(
) -> Result<Box<dyn Query>, Status> {
let mut layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
{
let parser = QueryParser::for_index(index, vec![fields.data_word]);
let query_string = words
.iter()
.map(|word| format!("+{}*", word))
.collect::<Vec<_>>()
.join(" ");
if let Ok(query) = parser.parse_query(&query_string) {
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 4.0))));
}
}
let mut per_word_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
for word in words {
let term = Term::from_field_text(fields.all_text, word);
let mut alternates: Vec<(Occur, Box<dyn Query>)> = Vec::new();
{
let parser = QueryParser::for_index(index, vec![fields.data_word]);
let query_string = words
.iter()
.map(|word| match fuzzy_distance(word.chars().count()) {
Some(distance) => format!("+{}~{}", word, distance),
None => format!("+{}", word),
})
.collect::<Vec<_>>()
.join(" ");
if let Ok(query) = parser.parse_query(&query_string) {
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 2.0))));
alternates.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(TermQuery::new(term.clone(), IndexRecordOption::WithFreqs)),
4.0,
)),
));
alternates.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(FuzzyTermQuery::new_prefix(term.clone(), 0, false)),
3.0,
)),
));
if let Some(distance) = fuzzy_distance(word.chars().count()) {
alternates.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(FuzzyTermQuery::new(term, distance, true)),
2.0,
)),
));
}
per_word_clauses.push((Occur::Must, Box::new(BooleanQuery::new(alternates))));
}
layers.push((Occur::Should, Box::new(BooleanQuery::new(per_word_clauses))));
if words.len() > 1 {
let parser = QueryParser::for_index(index, vec![fields.data_word]);
let parser = QueryParser::for_index(index, vec![fields.all_text]);
let query_string = format!("\"{}\"~3", words.join(" "));
if let Ok(query) = parser.parse_query(&query_string) {
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 2.0))));
@@ -212,10 +224,10 @@ fn fuzzy_predicate_unscoped(
}
{
let parser = QueryParser::for_index(index, vec![fields.data_ngram]);
let parser = QueryParser::for_index(index, vec![fields.all_text]);
let query_string = words
.iter()
.map(|word| format!("+{}", word))
.map(|word| format!("+{}*", word))
.collect::<Vec<_>>()
.join(" ");
if let Ok(query) = parser.parse_query(&query_string) {

2
server

Submodule server updated: b178fce273...aa0f9a3108

34
tantivy_todo.md Normal file
View File

@@ -0,0 +1,34 @@
1. Add explicit reindex/backfill tooling.
Right now, only future PostTableData / PutTableData calls index rows. There should be an admin/dev command like:
ReindexProfile(profile_name)
ReindexTable(profile_name, table_name)
ReindexRow(profile_name, table_name, id)
This is the biggest missing piece.
2. Stop using relative ./tantivy_indexes.
Both writer and reader depend on the process working directory. Make it config/env-driven, e.g.
TANTIVY_INDEX_DIR.
3. Add index schema/version metadata.
If you change tokenizers/schema later, old indexes should fail with a clear “index version mismatch, reindex
required” instead of behaving strangely.
4. Batch index commits.
Current code opens a writer and commits per row. Fine for dev, not great for many inserts. A long-lived writer
task batching commits every N docs or every short interval would be more reliable and faster.
5. Make the indexing queue durable.
The current mpsc queue is in-memory. If the server crashes after DB insert but before indexing, search is stale.
For serious use, store pending index jobs in Postgres, process them, mark done.
6. Index only live rows intentionally.
handle_add_or_update currently fetches row by id without checking deleted = false, then search filters deleted
rows later. Id either skip indexing deleted rows or make delete/update semantics explicit.
7. Add typed fields for numbers/dates if you need range queries.
Right now numbers are converted to strings. Good for text search, bad for real numeric filtering/sorting. Tantivy
can do numeric/date fields, but JSON text fields are not enough for robust range search.
8. Decide column-name strategy.
Indexing lowercases raw DB JSON keys. If UI uses display names/aliases, column constraints can miss unless the
frontend sends exactly what the index expects. Id centralize display-name to physical-name mapping before
search.
9. Add delete hooks for table/profile deletion.
When a table or profile is deleted, the matching Tantivy docs/index directory should be cleaned by code, not
manually.

1
tui-pages Submodule

Submodule tui-pages added at 981d0763bd

View File

@@ -136,15 +136,15 @@ profileName: string
set:
name: string
description: optional string
ruleNames: repeated string
ruleItems: repeated ValidationSetRuleItem
```
Frontend rules:
- `set.name` is required and unique inside a profile.
- `ruleNames` must contain at least one rule.
- `ruleNames` are ordered.
- Every rule name must already exist.
- `ruleItems` must contain at least one item.
- `ruleItems` are ordered.
- Every global rule reference must already exist.
- Duplicate rule names in the same set are rejected.
- Conflicting singleton fragments are rejected.
@@ -362,7 +362,7 @@ Recommended UI:
```text
name
description
ordered rule picker
ordered global/inline rule item picker
resolved preview
```
@@ -430,11 +430,11 @@ validation:
Create set `phone`:
```text
ruleNames:
- required
- phone-length
- digits-only
- phone-mask
ruleItems:
- globalRuleName: required
- globalRuleName: phone-length
- globalRuleName: digits-only
- globalRuleName: phone-mask
```
Apply set:

View File

@@ -11,4 +11,6 @@ pub use rules::{
count_text, CharacterFilter, CharacterLimits, CountMode, DisplayMask, LimitCheckResult,
MaskDisplayMode, PatternFilters, PositionFilter, PositionRange,
};
pub use set::{AppliedValidation, ValidationRule, ValidationSet};
pub use set::{
AppliedValidation, ValidationRule, ValidationSet, ValidationSetItem, ValidationSetResolveError,
};

View File

@@ -1,5 +1,6 @@
use crate::{ValidationConfig, ValidationMergeError, ValidationSettings};
use serde::{Deserialize, Serialize};
use thiserror::Error;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidationRule {
@@ -18,19 +19,52 @@ impl ValidationRule {
pub struct ValidationSet {
pub name: String,
pub description: Option<String>,
pub rules: Vec<ValidationRule>,
pub items: Vec<ValidationSetItem>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ValidationSetItem {
GlobalRuleRef(String),
InlineRule {
name: Option<String>,
validation: ValidationSettings,
},
}
impl ValidationSet {
pub fn resolve_settings(&self) -> Result<ValidationSettings, ValidationMergeError> {
ValidationSettings::merge_rules(self.rules.iter().map(|rule| &rule.settings))
pub fn resolve_settings_with_rules<'a>(
&'a self,
rules: impl Fn(&str) -> Option<&'a ValidationRule>,
) -> Result<ValidationSettings, ValidationSetResolveError> {
let settings = self.items.iter().map(|item| match item {
ValidationSetItem::GlobalRuleRef(name) => {
rules(name).map(|rule| &rule.settings).ok_or_else(|| {
ValidationSetResolveError::MissingGlobalRule { name: name.clone() }
})
}
ValidationSetItem::InlineRule { validation, .. } => Ok(validation),
});
let settings = settings.collect::<Result<Vec<_>, _>>()?;
Ok(ValidationSettings::merge_rules(settings)?)
}
pub fn resolve(&self) -> Result<ValidationConfig, ValidationMergeError> {
Ok(self.resolve_settings()?.resolve())
pub fn resolve_with_rules<'a>(
&'a self,
rules: impl Fn(&str) -> Option<&'a ValidationRule>,
) -> Result<ValidationConfig, ValidationSetResolveError> {
Ok(self.resolve_settings_with_rules(rules)?.resolve())
}
}
#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum ValidationSetResolveError {
#[error("validation set references missing global rule '{name}'")]
MissingGlobalRule { name: String },
#[error(transparent)]
Merge(#[from] ValidationMergeError),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AppliedValidation {
pub set_name: Option<String>,
@@ -56,19 +90,17 @@ mod tests {
let set = ValidationSet {
name: "phone".to_string(),
description: None,
rules: vec![
ValidationRule {
name: "phone-length".to_string(),
description: None,
settings: ValidationSettings {
items: vec![
ValidationSetItem::InlineRule {
name: Some("phone-length".to_string()),
validation: ValidationSettings {
character_limits: Some(CharacterLimits::new_range(10, 15)),
..ValidationSettings::default()
},
},
ValidationRule {
name: "digits-only".to_string(),
description: None,
settings: ValidationSettings {
ValidationSetItem::InlineRule {
name: Some("digits-only".to_string()),
validation: ValidationSettings {
pattern: Some(PatternSettings {
filters: vec![PositionFilterSettings {
positions: PositionRange::From(0),
@@ -82,7 +114,9 @@ mod tests {
],
};
let settings = set.resolve_settings().expect("set should resolve");
let settings = set
.resolve_settings_with_rules(|_| None)
.expect("set should resolve");
assert!(settings.character_limits.is_some());
assert_eq!(settings.pattern.expect("pattern").filters.len(), 1);
@@ -93,19 +127,17 @@ mod tests {
let set = ValidationSet {
name: "conflict".to_string(),
description: None,
rules: vec![
ValidationRule {
name: "short".to_string(),
description: None,
settings: ValidationSettings {
items: vec![
ValidationSetItem::InlineRule {
name: Some("short".to_string()),
validation: ValidationSettings {
character_limits: Some(CharacterLimits::new(10)),
..ValidationSettings::default()
},
},
ValidationRule {
name: "long".to_string(),
description: None,
settings: ValidationSettings {
ValidationSetItem::InlineRule {
name: Some("long".to_string()),
validation: ValidationSettings {
character_limits: Some(CharacterLimits::new(20)),
..ValidationSettings::default()
},
@@ -113,6 +145,6 @@ mod tests {
],
};
assert!(set.resolve_settings().is_err());
assert!(set.resolve_settings_with_rules(|_| None).is_err());
}
}