Compare commits

...

20 Commits

Author SHA1 Message Date
Priec
6d8fa0de63 cant change logic once data are in that column 2026-05-19 14:30:56 +02:00
Priec
dc273506b7 working v12 2026-05-17 13:10:44 +02:00
Priec
6a87750329 v0.6.9 nice 2026-05-10 17:14:11 +02:00
Priec
819058ad5c rule page in the validation client2 2026-05-10 09:24:04 +02:00
Priec
def75c00b4 rule page in the validation client 2026-05-10 09:23:33 +02:00
Priec
17a13569d8 cargo fmt 2026-05-06 20:33:53 +02:00
Priec
14f88e6a40 validation docs 2026-05-06 20:29:05 +02:00
Priec
3373e00dfc validation core as a dependency2 2026-05-06 19:50:09 +02:00
Priec
f094346e1b validation core as a dependency 2026-05-06 19:03:26 +02:00
Priec
3b0133640f more advancements 2026-05-03 23:34:03 +02:00
Priec
0600d3deaa table validation for the client from the server 2026-05-03 10:34:59 +02:00
Priec
90f8aedc3b better new functionality of column aliases 2026-05-02 13:56:45 +02:00
Priec
2a811b1f8c rename the column aliases 2026-05-02 00:38:54 +02:00
Priec
1f9c29411e multiple requests to the structure of a tables at once(batching) 2026-04-30 11:48:03 +02:00
Priec
b928004c76 search with multiquery redesigned 2026-04-29 19:56:17 +02:00
Priec
fb4769301c column name indexing 2026-04-29 01:33:48 +02:00
Priec
036e12f345 indexing done by the profile and not table 2026-04-29 01:08:59 +02:00
Priec
1ceab57f3b exact search endpoint 2026-04-29 00:40:36 +02:00
Priec
5de1cd7623 refactoring search based on the profile 2026-04-29 00:38:42 +02:00
Priec
1867de513d get profile details with scripts and tables columns is now working 2026-04-27 22:01:17 +02:00
30 changed files with 5296 additions and 432 deletions

2
.gitignore vendored
View File

@@ -6,3 +6,5 @@ steel_decimal/tests/property_tests.proptest-regressions
.direnv/
canvas/*.toml
.aider*
.codex
TODO.md

45
Cargo.lock generated
View File

@@ -493,7 +493,7 @@ checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
[[package]]
name = "canvas"
version = "0.6.0"
version = "0.6.12"
dependencies = [
"anyhow",
"async-trait",
@@ -512,6 +512,7 @@ dependencies = [
"tracing",
"tracing-subscriber",
"unicode-width 0.2.0",
"validation-core",
]
[[package]]
@@ -585,7 +586,7 @@ dependencies = [
[[package]]
name = "client"
version = "0.6.0"
version = "0.6.12"
dependencies = [
"anyhow",
"async-trait",
@@ -596,6 +597,7 @@ dependencies = [
"dotenvy",
"futures",
"lazy_static",
"nucleo",
"prost 0.13.5",
"prost-types 0.13.5",
"ratatui",
@@ -640,7 +642,7 @@ dependencies = [
[[package]]
name = "common"
version = "0.6.0"
version = "0.6.12"
dependencies = [
"prost 0.13.5",
"prost-build 0.14.1",
@@ -2103,6 +2105,27 @@ dependencies = [
"windows-sys 0.60.2",
]
[[package]]
name = "nucleo"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5262af4c94921c2646c5ac6ff7900c2af9cbb08dc26a797e18130a7019c039d4"
dependencies = [
"nucleo-matcher",
"parking_lot",
"rayon",
]
[[package]]
name = "nucleo-matcher"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf33f538733d1a5a3494b836ba913207f14d9d4a1d3cd67030c5061bdd2cac85"
dependencies = [
"memchr",
"unicode-segmentation",
]
[[package]]
name = "num-bigint"
version = "0.4.6"
@@ -3094,7 +3117,7 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
[[package]]
name = "search"
version = "0.6.0"
version = "0.6.12"
dependencies = [
"anyhow",
"common",
@@ -3193,7 +3216,7 @@ dependencies = [
[[package]]
name = "server"
version = "0.6.0"
version = "0.6.12"
dependencies = [
"anyhow",
"bcrypt",
@@ -3230,7 +3253,9 @@ dependencies = [
"tonic-reflection",
"tracing",
"tracing-subscriber",
"unicode-width 0.2.0",
"uuid",
"validation-core",
"validator",
]
@@ -4522,6 +4547,16 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "validation-core"
version = "0.6.12"
dependencies = [
"regex",
"serde",
"thiserror 2.0.12",
"unicode-width 0.2.0",
]
[[package]]
name = "validator"
version = "0.20.0"

View File

@@ -1,11 +1,11 @@
[workspace]
members = ["client", "server", "common", "search", "canvas"]
members = ["client", "server", "common", "search", "canvas", "validation-core"]
resolver = "2"
[workspace.package]
# TODO: idk how to do the name, fix later
# name = "komp_ac"
version = "0.6.2"
version = "0.6.12"
edition = "2021"
license = "GPL-3.0-or-later"
authors = ["Filip Priečinský <filippriec@gmail.com>"]
@@ -53,3 +53,4 @@ toml = "0.8.20"
unicode-width = "0.2.0"
common = { path = "./common" }
validation-core = { path = "./validation-core" }

2
canvas

Submodule canvas updated: 812ac2a428...e6c942dd41

2
client

Submodule client updated: 2494066140...3badee28b1

View File

@@ -8,6 +8,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
".komp_ac.table_validation.FieldValidation",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.field_attribute(
".komp_ac.table_validation.FieldValidation.locked",
"#[serde(default)]",
)
.type_attribute(
".komp_ac.table_validation.CharacterLimits",
"#[derive(serde::Serialize, serde::Deserialize)]",
@@ -24,6 +28,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
".komp_ac.table_validation.PatternRule",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.PatternPosition",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.CharacterConstraint",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.PatternRules",
"#[derive(serde::Serialize, serde::Deserialize)]",
@@ -32,6 +44,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
".komp_ac.table_validation.CustomFormatter",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.FormatterOption",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.AllowedValues",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.UpdateFieldValidationRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
@@ -40,11 +60,107 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
".komp_ac.table_validation.UpdateFieldValidationResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ReplaceTableValidationRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ReplaceTableValidationResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ValidationRuleDefinition",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ValidationSetRuleItem",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ValidationSetRuleItem.Source",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ValidationSetDefinition",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.UpsertValidationRuleRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.UpsertValidationRuleResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ListValidationRulesRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ListValidationRulesResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.DeleteValidationRuleRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.DeleteValidationRuleResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.UpsertValidationSetRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.UpsertValidationSetResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ListValidationSetsRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ListValidationSetsResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.DeleteValidationSetRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.DeleteValidationSetResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ApplyValidationSetRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ApplyValidationSetResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.LockFieldValidationRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.LockFieldValidationResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
// Enum -> readable strings in JSON ("BYTES", "DISPLAY_WIDTH")
.type_attribute(
".komp_ac.table_validation.CountMode",
"#[derive(serde::Serialize, serde::Deserialize)] #[serde(rename_all = \"SCREAMING_SNAKE_CASE\")]",
)
.type_attribute(
".komp_ac.table_validation.PatternPositionKind",
"#[derive(serde::Serialize, serde::Deserialize)] #[serde(rename_all = \"SCREAMING_SNAKE_CASE\")]",
)
.type_attribute(
".komp_ac.table_validation.CharacterConstraintKind",
"#[derive(serde::Serialize, serde::Deserialize)] #[serde(rename_all = \"SCREAMING_SNAKE_CASE\")]",
)
.type_attribute(
".komp_ac.table_definition.ColumnDefinition",
"#[derive(serde::Serialize, serde::Deserialize)]",
@@ -57,10 +173,34 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
".komp_ac.table_definition.PostTableDefinitionRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_definition.AddTableColumnsRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_definition.TableDefinitionResponse",
"#[derive(serde::Serialize, serde::Deserialize)]"
)
.type_attribute(
".komp_ac.table_definition.GetColumnAliasRenameHistoryRequest",
"#[derive(serde::Serialize, serde::Deserialize)]"
)
.type_attribute(
".komp_ac.table_definition.ColumnAliasRenameHistoryEntry",
"#[derive(serde::Serialize, serde::Deserialize)]"
)
.type_attribute(
".komp_ac.table_definition.GetColumnAliasRenameHistoryResponse",
"#[derive(serde::Serialize, serde::Deserialize)]"
)
.type_attribute(
".komp_ac.table_definition.RenameColumnAliasRequest",
"#[derive(serde::Serialize, serde::Deserialize)]"
)
.type_attribute(
".komp_ac.table_definition.RenameColumnAliasResponse",
"#[derive(serde::Serialize, serde::Deserialize)]"
)
.type_attribute(
".komp_ac.table_script.PostTableScriptRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",

View File

@@ -3,18 +3,34 @@ syntax = "proto3";
package komp_ac.search;
service Searcher {
rpc SearchTable(SearchRequest) returns (SearchResponse);
rpc Search(SearchRequest) returns (SearchResponse);
}
enum MatchMode {
MATCH_MODE_UNSPECIFIED = 0;
MATCH_MODE_FUZZY = 1;
MATCH_MODE_EXACT = 2;
}
message ColumnConstraint {
string column = 1;
string query = 2;
MatchMode mode = 3;
}
message SearchRequest {
string table_name = 1;
string query = 2;
string profile_name = 1;
optional string table_name = 2;
string free_query = 3;
repeated ColumnConstraint must = 4;
optional uint32 limit = 5;
}
message SearchResponse {
message Hit {
int64 id = 1; // PostgreSQL row ID
float score = 2;
string content_json = 3;
string table_name = 4;
}
repeated Hit hits = 1;
}

View File

@@ -14,10 +14,24 @@ service TableDefinition {
// Also inserts metadata and default validation rules. Entirely transactional.
rpc PostTableDefinition(PostTableDefinitionRequest) returns (TableDefinitionResponse);
// Appends new user-defined columns to an existing table.
// Existing columns, links, and table logic are never changed by this call.
rpc AddTableColumns(AddTableColumnsRequest) returns (TableDefinitionResponse);
// Lists all profiles (schemas) and their tables with declared dependencies.
// This provides a tree-like overview of table relationships.
rpc GetProfileTree(komp_ac.common.Empty) returns (ProfileTreeResponse);
// Fetches all tables with their columns and scripts for a specific profile.
// Pure data retrieval - no business logic.
rpc GetProfileDetails(GetProfileDetailsRequest) returns (GetProfileDetailsResponse);
// Returns the stored rename history for column aliases in one profile.
rpc GetColumnAliasRenameHistory(GetColumnAliasRenameHistoryRequest) returns (GetColumnAliasRenameHistoryResponse);
// Renames a user-visible column alias while keeping the physical column unchanged.
rpc RenameColumnAlias(RenameColumnAliasRequest) returns (RenameColumnAliasResponse);
// Drops a table and its metadata, then deletes the profile if it becomes empty.
rpc DeleteTable(DeleteTableRequest) returns (DeleteTableResponse);
}
@@ -62,6 +76,21 @@ message PostTableDefinitionRequest {
string profile_name = 5;
}
// Defines append-only column additions for an existing table.
message AddTableColumnsRequest {
// Existing profile/schema name.
string profile_name = 1;
// Existing table name in the profile.
string table_name = 2;
// New user-defined columns only. Existing columns cannot be changed here.
repeated ColumnDefinition columns = 3;
// Optional indexes for the new columns only.
repeated string indexes = 4;
}
// Describes one user-defined column for a table.
message ColumnDefinition {
// Column name that follows the same validation rules as table_name.
@@ -119,6 +148,74 @@ message ProfileTreeResponse {
repeated Profile profiles = 1;
}
// Request to fetch all tables, columns and scripts for a profile.
message GetProfileDetailsRequest {
// Profile (schema) name to fetch details for.
string profile_name = 1;
}
// Response with all tables, columns and scripts for a profile.
message GetProfileDetailsResponse {
string profile_name = 1;
repeated TableDetail tables = 2;
}
// Request to fetch recorded column alias rename history for one profile.
message GetColumnAliasRenameHistoryRequest {
string profile_name = 1;
// Optional filter. When omitted, returns all tables in the profile.
optional int64 table_definition_id = 2;
}
// One recorded column alias rename.
message ColumnAliasRenameHistoryEntry {
int64 id = 1;
string profile_name = 2;
int64 table_definition_id = 3;
string table_name = 4;
string old_column_name = 5;
string new_column_name = 6;
string created_at = 7;
}
// Response with stored column alias rename history rows.
message GetColumnAliasRenameHistoryResponse {
string profile_name = 1;
repeated ColumnAliasRenameHistoryEntry entries = 2;
}
// Describes a table with its columns and associated scripts.
message TableDetail {
string name = 1;
int64 id = 2;
repeated ColumnDefinition columns = 3;
repeated ScriptInfo scripts = 4;
}
// A script that targets a specific column in a table.
message ScriptInfo {
int64 script_id = 1;
string target_column = 2;
string target_column_type = 3;
string script = 4;
string description = 5;
}
// Request to rename one user-visible column alias in a table.
message RenameColumnAliasRequest {
string profile_name = 1;
string table_name = 2;
string old_column_name = 3;
string new_column_name = 4;
}
// Response after renaming one column alias.
message RenameColumnAliasResponse {
bool success = 1;
string message = 2;
}
// Request to delete one table definition entirely.
message DeleteTableRequest {
// Profile (schema) name owning the table (must exist).

View File

@@ -4,40 +4,45 @@ package komp_ac.table_structure;
import "common.proto";
// Introspects the physical PostgreSQL table for a given logical table
// (defined in table_definitions) and returns its column structure.
// Introspects the physical PostgreSQL tables for one or more logical tables
// (defined in table_definitions) and returns their column structures.
// The server validates that:
// - The profile (schema) exists in `schemas`
// - The table is defined for that profile in `table_definitions`
// It then queries information_schema for the physical table and returns
// normalized column metadata. If the physical table is missing despite
// a definition, the response may contain an empty `columns` list.
// - Every table is defined for that profile in `table_definitions`
// It then queries information_schema for the physical tables and returns
// normalized column metadata.
service TableStructureService {
// Return the physical column list (name, normalized data_type,
// nullability, primary key flag) for a table in a profile.
// nullability, primary key flag) for one or more tables in a profile.
//
// Behavior:
// - NOT_FOUND if profile doesn't exist in `schemas`
// - NOT_FOUND if table not defined for that profile in `table_definitions`
// - NOT_FOUND if any table is not defined for that profile in `table_definitions`
// - Queries information_schema.columns ordered by ordinal position
// - Normalizes data_type text (details under TableColumn.data_type)
// - Returns an empty list if the table is validated but has no visible
// columns in information_schema (e.g., physical table missing)
rpc GetTableStructure(GetTableStructureRequest) returns (TableStructureResponse);
// - Returns an error if any validated table has no visible columns in
// information_schema (e.g., physical table missing)
rpc GetTableStructure(GetTableStructureRequest) returns (GetTableStructureResponse);
}
// Request identifying the profile (schema) and table to inspect.
// Request identifying the profile (schema) and tables to inspect.
message GetTableStructureRequest {
// Required. Profile (PostgreSQL schema) name. Must exist in `schemas`.
string profile_name = 1;
// Required. Table name within the profile. Must exist in `table_definitions`
// for the given profile. The physical table is then introspected via
// information_schema.
string table_name = 2;
// Required. Table names within the profile. Each must exist in
// `table_definitions` for the given profile. The physical tables are then
// introspected via information_schema.
repeated string table_names = 2;
}
// Response with the ordered list of columns (by ordinal position).
// Batched response keyed by table name.
message GetTableStructureResponse {
// Per-table physical column lists keyed by requested table name.
map<string, TableStructureResponse> table_structures = 1;
}
// Response with the ordered list of columns (by ordinal position) for one table.
message TableStructureResponse {
// Columns of the physical table, including system columns (id, deleted,
// created_at), user-defined columns, and any foreign-key columns such as

View File

@@ -2,32 +2,59 @@
syntax = "proto3";
package komp_ac.table_validation;
// This proto is the canonical server-side storage and distribution contract for
// client validation configuration.
//
// Design goals:
// - The server stores the entire field validation definition in one structured payload.
// - Clients fetch the validation rules for a table in one batch and map them to
// their local validation/runtime system (for example canvas).
// - Common validation must be represented as typed data, not as string mini-languages.
//
// Important split:
// - limits / pattern / allowed_values / required are validation rules.
// - mask / formatter are presentation and input-shaping metadata for clients.
// Request validation rules for a table
message GetTableValidationRequest {
string profileName = 1;
string tableName = 2;
}
// Response with field-level validations; if a field is omitted,
// no validation is applied (default unspecified).
// Response with field-level validations for the whole table.
// If a field is omitted, no validation configuration exists for that field.
message TableValidationResponse {
repeated FieldValidation fields = 1;
}
// Field-level validation (extensible for future kinds)
// Field-level validation definition stored on the server and distributed to clients.
message FieldValidation {
// MUST match your frontend FormState.dataKey for the column
string dataKey = 1;
// Current: only CharacterLimits. More rules can be added later.
// Validation 1: length and counting rules.
CharacterLimits limits = 10;
// Future expansion:
PatternRules pattern = 11; // Validation 2
optional CustomFormatter formatter = 14; // Validation 4 custom formatting logic
// Validation 2: position-based character constraints.
PatternRules pattern = 11;
// Exact-value whitelist.
AllowedValues allowed_values = 12;
// Client-side hint that this field participates in external/asynchronous validation UI.
bool external_validation_enabled = 13;
// Client-side formatter metadata. This is intentionally data-only, not executable code.
optional CustomFormatter formatter = 14;
// Client-side display mask metadata. The server stores raw data without mask literals.
DisplayMask mask = 3;
// ExternalValidation external = 13;
// CustomFormatter formatter = 14;
// Field must be provided / treated as required by clients and server enforcement layers.
bool required = 4;
// Once locked, this field's validation config cannot be changed.
bool locked = 15;
}
// Character length counting mode
@@ -38,7 +65,8 @@ enum CountMode {
DISPLAY_WIDTH = 3;
}
// Character limit validation (Validation 1)
// Character limit validation (Validation 1).
// These rules map directly to canvas CharacterLimits.
message CharacterLimits {
// When zero, the field is considered "not set". If both min/max are zero,
// the server should avoid sending this FieldValidation (no validation).
@@ -51,39 +79,91 @@ message CharacterLimits {
CountMode countMode = 4; // defaults to CHARS if unspecified
}
// Mask for pretty display
// Mask for pretty display only.
//
// This is not a validation rule by itself. It exists so clients can render and
// navigate masked input while still storing raw values server-side.
message DisplayMask {
string pattern = 1; // e.g., "(###) ###-####" or "####-##-##"
string input_char = 2; // e.g., "#"
optional string template_char = 3; // e.g., "_"
}
// One positionbased validation rule, similar to CharacterFilter + PositionRange
message PatternRule {
// Range descriptor: how far the rule applies
// Examples:
// - "0" → Single position 0
// - "0-3" → Range 0..3 inclusive
// - "from:5" → From position 5 onward
// - "0,2,5" → Multiple discrete positions
string range = 1;
// Character filter type, caseinsensitive keywords:
// "ALPHABETIC", "NUMERIC", "ALPHANUMERIC",
// "ONEOF(<chars>)", "EXACT(:)", "CUSTOM(<name>)"
string filter = 2;
// Which positions a pattern rule applies to.
// This exists instead of a string syntax like "0-3" so the server can validate
// the structure directly and clients do not need to parse a DSL.
message PatternPosition {
PatternPositionKind kind = 1;
uint32 single = 2;
uint32 start = 3;
uint32 end = 4;
repeated uint32 positions = 5;
}
enum PatternPositionKind {
PATTERN_POSITION_KIND_UNSPECIFIED = 0;
PATTERN_POSITION_SINGLE = 1;
PATTERN_POSITION_RANGE = 2;
PATTERN_POSITION_FROM = 3;
PATTERN_POSITION_MULTIPLE = 4;
}
// What type of character constraint a pattern rule applies.
// This mirrors the typed character filters used by canvas.
message CharacterConstraint {
CharacterConstraintKind kind = 1;
// Used when kind == CHARACTER_CONSTRAINT_EXACT.
optional string exact = 2;
// Used when kind == CHARACTER_CONSTRAINT_ONE_OF.
repeated string one_of = 3;
// Used when kind == CHARACTER_CONSTRAINT_REGEX.
optional string regex = 4;
}
enum CharacterConstraintKind {
CHARACTER_CONSTRAINT_KIND_UNSPECIFIED = 0;
CHARACTER_CONSTRAINT_ALPHABETIC = 1;
CHARACTER_CONSTRAINT_NUMERIC = 2;
CHARACTER_CONSTRAINT_ALPHANUMERIC = 3;
CHARACTER_CONSTRAINT_EXACT = 4;
CHARACTER_CONSTRAINT_ONE_OF = 5;
CHARACTER_CONSTRAINT_REGEX = 6;
}
// One position-based validation rule, similar to canvas PositionFilter.
message PatternRule {
PatternPosition position = 1;
CharacterConstraint constraint = 2;
}
// Client-side formatter metadata.
// The formatter "type" is intended to be resolved by a client-side formatter registry.
message CustomFormatter {
// Formatter type identifier; handled clientside.
// Examples: "PSCFormatter", "PhoneFormatter", "CreditCardFormatter", "DateFormatter"
string type = 1;
// Optional freetext note or parameters (e.g. locale, pattern)
optional string description = 2;
repeated FormatterOption options = 2;
optional string description = 3;
}
// Collection of pattern rules for one field
message FormatterOption {
string key = 1;
string value = 2;
}
// Exact-value whitelist configuration.
// This maps to canvas AllowedValues semantics.
message AllowedValues {
repeated string values = 1;
bool allow_empty = 2;
bool case_insensitive = 3;
}
// Collection of pattern rules for one field.
message PatternRules {
// All rules that make up the validation logic
repeated PatternRule rules = 1;
@@ -92,11 +172,31 @@ message PatternRules {
optional string description = 2;
}
// Service to fetch validations for a table
// Service for storing and fetching field-validation definitions.
service TableValidationService {
rpc GetTableValidation(GetTableValidationRequest) returns (TableValidationResponse);
// Upsert a single field validation definition.
rpc UpdateFieldValidation(UpdateFieldValidationRequest) returns (UpdateFieldValidationResponse);
// Replace the full validation definition set for a table in one transaction.
rpc ReplaceTableValidation(ReplaceTableValidationRequest) returns (ReplaceTableValidationResponse);
// Reusable named rule fragments.
rpc UpsertValidationRule(UpsertValidationRuleRequest) returns (UpsertValidationRuleResponse);
rpc ListValidationRules(ListValidationRulesRequest) returns (ListValidationRulesResponse);
rpc DeleteValidationRule(DeleteValidationRuleRequest) returns (DeleteValidationRuleResponse);
// Reusable named sets composed from rules.
rpc UpsertValidationSet(UpsertValidationSetRequest) returns (UpsertValidationSetResponse);
rpc ListValidationSets(ListValidationSetsRequest) returns (ListValidationSetsResponse);
rpc DeleteValidationSet(DeleteValidationSetRequest) returns (DeleteValidationSetResponse);
// Snapshot a reusable set onto a concrete table field.
rpc ApplyValidationSet(ApplyValidationSetRequest) returns (ApplyValidationSetResponse);
// Permanently lock one field's validation config.
rpc LockFieldValidation(LockFieldValidationRequest) returns (LockFieldValidationResponse);
}
message UpdateFieldValidationRequest {
@@ -110,3 +210,130 @@ message UpdateFieldValidationResponse {
bool success = 1;
string message = 2;
}
message ReplaceTableValidationRequest {
string profileName = 1;
string tableName = 2;
// Full replacement set. Fields omitted here are removed from the stored config.
repeated FieldValidation fields = 3;
}
message ReplaceTableValidationResponse {
bool success = 1;
string message = 2;
}
message ValidationRuleDefinition {
optional int64 id = 4;
string name = 1;
optional string description = 2;
// Reusable rule fragment. dataKey is ignored by the server for reusable rules.
FieldValidation validation = 3;
}
message ValidationSetRuleItem {
int32 position = 1;
optional string name = 2;
optional string description = 3;
oneof source {
string global_rule_name = 10;
FieldValidation inline_validation = 11;
int64 global_rule_id = 12;
}
}
message ValidationSetDefinition {
reserved 3;
string name = 1;
optional string description = 2;
// Ordered set items.
repeated ValidationSetRuleItem ruleItems = 5;
// Server-resolved snapshot of all set items in order.
FieldValidation resolvedValidation = 4;
}
message UpsertValidationRuleRequest {
string profileName = 1;
ValidationRuleDefinition rule = 2;
}
message UpsertValidationRuleResponse {
bool success = 1;
string message = 2;
}
message ListValidationRulesRequest {
string profileName = 1;
}
message ListValidationRulesResponse {
repeated ValidationRuleDefinition rules = 1;
}
message DeleteValidationRuleRequest {
string profileName = 1;
string name = 2;
}
message DeleteValidationRuleResponse {
bool success = 1;
string message = 2;
}
message UpsertValidationSetRequest {
string profileName = 1;
ValidationSetDefinition set = 2;
}
message UpsertValidationSetResponse {
bool success = 1;
string message = 2;
}
message ListValidationSetsRequest {
string profileName = 1;
}
message ListValidationSetsResponse {
repeated ValidationSetDefinition sets = 1;
}
message DeleteValidationSetRequest {
string profileName = 1;
string name = 2;
}
message DeleteValidationSetResponse {
bool success = 1;
string message = 2;
}
message ApplyValidationSetRequest {
string profileName = 1;
string tableName = 2;
string dataKey = 3;
string setName = 4;
}
message ApplyValidationSetResponse {
bool success = 1;
string message = 2;
FieldValidation validation = 3;
}
message LockFieldValidationRequest {
string profileName = 1;
string tableName = 2;
string dataKey = 3;
}
message LockFieldValidationResponse {
bool success = 1;
string message = 2;
}

Binary file not shown.

View File

@@ -1,10 +1,25 @@
// This file is @generated by prost-build.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ColumnConstraint {
#[prost(string, tag = "1")]
pub column: ::prost::alloc::string::String,
#[prost(string, tag = "2")]
pub query: ::prost::alloc::string::String,
#[prost(enumeration = "MatchMode", tag = "3")]
pub mode: i32,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SearchRequest {
#[prost(string, tag = "1")]
pub table_name: ::prost::alloc::string::String,
#[prost(string, tag = "2")]
pub query: ::prost::alloc::string::String,
pub profile_name: ::prost::alloc::string::String,
#[prost(string, optional, tag = "2")]
pub table_name: ::core::option::Option<::prost::alloc::string::String>,
#[prost(string, tag = "3")]
pub free_query: ::prost::alloc::string::String,
#[prost(message, repeated, tag = "4")]
pub must: ::prost::alloc::vec::Vec<ColumnConstraint>,
#[prost(uint32, optional, tag = "5")]
pub limit: ::core::option::Option<u32>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SearchResponse {
@@ -22,6 +37,37 @@ pub mod search_response {
pub score: f32,
#[prost(string, tag = "3")]
pub content_json: ::prost::alloc::string::String,
#[prost(string, tag = "4")]
pub table_name: ::prost::alloc::string::String,
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
#[repr(i32)]
pub enum MatchMode {
Unspecified = 0,
Fuzzy = 1,
Exact = 2,
}
impl MatchMode {
/// String value of the enum field names used in the ProtoBuf definition.
///
/// The values are not transformed in any way and thus are considered stable
/// (if the ProtoBuf definition does not change) and safe for programmatic use.
pub fn as_str_name(&self) -> &'static str {
match self {
Self::Unspecified => "MATCH_MODE_UNSPECIFIED",
Self::Fuzzy => "MATCH_MODE_FUZZY",
Self::Exact => "MATCH_MODE_EXACT",
}
}
/// Creates an enum from field names used in the ProtoBuf definition.
pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
match value {
"MATCH_MODE_UNSPECIFIED" => Some(Self::Unspecified),
"MATCH_MODE_FUZZY" => Some(Self::Fuzzy),
"MATCH_MODE_EXACT" => Some(Self::Exact),
_ => None,
}
}
}
/// Generated client implementations.
@@ -115,7 +161,7 @@ pub mod searcher_client {
self.inner = self.inner.max_encoding_message_size(limit);
self
}
pub async fn search_table(
pub async fn search(
&mut self,
request: impl tonic::IntoRequest<super::SearchRequest>,
) -> std::result::Result<tonic::Response<super::SearchResponse>, tonic::Status> {
@@ -129,11 +175,11 @@ pub mod searcher_client {
})?;
let codec = tonic::codec::ProstCodec::default();
let path = http::uri::PathAndQuery::from_static(
"/komp_ac.search.Searcher/SearchTable",
"/komp_ac.search.Searcher/Search",
);
let mut req = request.into_request();
req.extensions_mut()
.insert(GrpcMethod::new("komp_ac.search.Searcher", "SearchTable"));
.insert(GrpcMethod::new("komp_ac.search.Searcher", "Search"));
self.inner.unary(req, path, codec).await
}
}
@@ -151,7 +197,7 @@ pub mod searcher_server {
/// Generated trait containing gRPC methods that should be implemented for use with SearcherServer.
#[async_trait]
pub trait Searcher: std::marker::Send + std::marker::Sync + 'static {
async fn search_table(
async fn search(
&self,
request: tonic::Request<super::SearchRequest>,
) -> std::result::Result<tonic::Response<super::SearchResponse>, tonic::Status>;
@@ -232,11 +278,11 @@ pub mod searcher_server {
}
fn call(&mut self, req: http::Request<B>) -> Self::Future {
match req.uri().path() {
"/komp_ac.search.Searcher/SearchTable" => {
"/komp_ac.search.Searcher/Search" => {
#[allow(non_camel_case_types)]
struct SearchTableSvc<T: Searcher>(pub Arc<T>);
struct SearchSvc<T: Searcher>(pub Arc<T>);
impl<T: Searcher> tonic::server::UnaryService<super::SearchRequest>
for SearchTableSvc<T> {
for SearchSvc<T> {
type Response = super::SearchResponse;
type Future = BoxFuture<
tonic::Response<Self::Response>,
@@ -248,7 +294,7 @@ pub mod searcher_server {
) -> Self::Future {
let inner = Arc::clone(&self.0);
let fut = async move {
<T as Searcher>::search_table(&inner, request).await
<T as Searcher>::search(&inner, request).await
};
Box::pin(fut)
}
@@ -259,7 +305,7 @@ pub mod searcher_server {
let max_encoding_message_size = self.max_encoding_message_size;
let inner = self.inner.clone();
let fut = async move {
let method = SearchTableSvc(inner);
let method = SearchSvc(inner);
let codec = tonic::codec::ProstCodec::default();
let mut grpc = tonic::server::Grpc::new(codec)
.apply_compression_config(

View File

@@ -43,6 +43,23 @@ pub struct PostTableDefinitionRequest {
#[prost(string, tag = "5")]
pub profile_name: ::prost::alloc::string::String,
}
/// Defines append-only column additions for an existing table.
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct AddTableColumnsRequest {
/// Existing profile/schema name.
#[prost(string, tag = "1")]
pub profile_name: ::prost::alloc::string::String,
/// Existing table name in the profile.
#[prost(string, tag = "2")]
pub table_name: ::prost::alloc::string::String,
/// New user-defined columns only. Existing columns cannot be changed here.
#[prost(message, repeated, tag = "3")]
pub columns: ::prost::alloc::vec::Vec<ColumnDefinition>,
/// Optional indexes for the new columns only.
#[prost(string, repeated, tag = "4")]
pub indexes: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
}
/// Describes one user-defined column for a table.
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
@@ -110,6 +127,107 @@ pub mod profile_tree_response {
pub tables: ::prost::alloc::vec::Vec<Table>,
}
}
/// Request to fetch all tables, columns and scripts for a profile.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GetProfileDetailsRequest {
/// Profile (schema) name to fetch details for.
#[prost(string, tag = "1")]
pub profile_name: ::prost::alloc::string::String,
}
/// Response with all tables, columns and scripts for a profile.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GetProfileDetailsResponse {
#[prost(string, tag = "1")]
pub profile_name: ::prost::alloc::string::String,
#[prost(message, repeated, tag = "2")]
pub tables: ::prost::alloc::vec::Vec<TableDetail>,
}
/// Request to fetch recorded column alias rename history for one profile.
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GetColumnAliasRenameHistoryRequest {
#[prost(string, tag = "1")]
pub profile_name: ::prost::alloc::string::String,
/// Optional filter. When omitted, returns all tables in the profile.
#[prost(int64, optional, tag = "2")]
pub table_definition_id: ::core::option::Option<i64>,
}
/// One recorded column alias rename.
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ColumnAliasRenameHistoryEntry {
#[prost(int64, tag = "1")]
pub id: i64,
#[prost(string, tag = "2")]
pub profile_name: ::prost::alloc::string::String,
#[prost(int64, tag = "3")]
pub table_definition_id: i64,
#[prost(string, tag = "4")]
pub table_name: ::prost::alloc::string::String,
#[prost(string, tag = "5")]
pub old_column_name: ::prost::alloc::string::String,
#[prost(string, tag = "6")]
pub new_column_name: ::prost::alloc::string::String,
#[prost(string, tag = "7")]
pub created_at: ::prost::alloc::string::String,
}
/// Response with stored column alias rename history rows.
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GetColumnAliasRenameHistoryResponse {
#[prost(string, tag = "1")]
pub profile_name: ::prost::alloc::string::String,
#[prost(message, repeated, tag = "2")]
pub entries: ::prost::alloc::vec::Vec<ColumnAliasRenameHistoryEntry>,
}
/// Describes a table with its columns and associated scripts.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TableDetail {
#[prost(string, tag = "1")]
pub name: ::prost::alloc::string::String,
#[prost(int64, tag = "2")]
pub id: i64,
#[prost(message, repeated, tag = "3")]
pub columns: ::prost::alloc::vec::Vec<ColumnDefinition>,
#[prost(message, repeated, tag = "4")]
pub scripts: ::prost::alloc::vec::Vec<ScriptInfo>,
}
/// A script that targets a specific column in a table.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ScriptInfo {
#[prost(int64, tag = "1")]
pub script_id: i64,
#[prost(string, tag = "2")]
pub target_column: ::prost::alloc::string::String,
#[prost(string, tag = "3")]
pub target_column_type: ::prost::alloc::string::String,
#[prost(string, tag = "4")]
pub script: ::prost::alloc::string::String,
#[prost(string, tag = "5")]
pub description: ::prost::alloc::string::String,
}
/// Request to rename one user-visible column alias in a table.
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct RenameColumnAliasRequest {
#[prost(string, tag = "1")]
pub profile_name: ::prost::alloc::string::String,
#[prost(string, tag = "2")]
pub table_name: ::prost::alloc::string::String,
#[prost(string, tag = "3")]
pub old_column_name: ::prost::alloc::string::String,
#[prost(string, tag = "4")]
pub new_column_name: ::prost::alloc::string::String,
}
/// Response after renaming one column alias.
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct RenameColumnAliasResponse {
#[prost(bool, tag = "1")]
pub success: bool,
#[prost(string, tag = "2")]
pub message: ::prost::alloc::string::String,
}
/// Request to delete one table definition entirely.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct DeleteTableRequest {
@@ -258,6 +376,37 @@ pub mod table_definition_client {
);
self.inner.unary(req, path, codec).await
}
/// Appends new user-defined columns to an existing table.
/// Existing columns, links, and table logic are never changed by this call.
pub async fn add_table_columns(
&mut self,
request: impl tonic::IntoRequest<super::AddTableColumnsRequest>,
) -> std::result::Result<
tonic::Response<super::TableDefinitionResponse>,
tonic::Status,
> {
self.inner
.ready()
.await
.map_err(|e| {
tonic::Status::unknown(
format!("Service was not ready: {}", e.into()),
)
})?;
let codec = tonic::codec::ProstCodec::default();
let path = http::uri::PathAndQuery::from_static(
"/komp_ac.table_definition.TableDefinition/AddTableColumns",
);
let mut req = request.into_request();
req.extensions_mut()
.insert(
GrpcMethod::new(
"komp_ac.table_definition.TableDefinition",
"AddTableColumns",
),
);
self.inner.unary(req, path, codec).await
}
/// Lists all profiles (schemas) and their tables with declared dependencies.
/// This provides a tree-like overview of table relationships.
pub async fn get_profile_tree(
@@ -289,6 +438,97 @@ pub mod table_definition_client {
);
self.inner.unary(req, path, codec).await
}
/// Fetches all tables with their columns and scripts for a specific profile.
/// Pure data retrieval - no business logic.
pub async fn get_profile_details(
&mut self,
request: impl tonic::IntoRequest<super::GetProfileDetailsRequest>,
) -> std::result::Result<
tonic::Response<super::GetProfileDetailsResponse>,
tonic::Status,
> {
self.inner
.ready()
.await
.map_err(|e| {
tonic::Status::unknown(
format!("Service was not ready: {}", e.into()),
)
})?;
let codec = tonic::codec::ProstCodec::default();
let path = http::uri::PathAndQuery::from_static(
"/komp_ac.table_definition.TableDefinition/GetProfileDetails",
);
let mut req = request.into_request();
req.extensions_mut()
.insert(
GrpcMethod::new(
"komp_ac.table_definition.TableDefinition",
"GetProfileDetails",
),
);
self.inner.unary(req, path, codec).await
}
/// Returns the stored rename history for column aliases in one profile.
pub async fn get_column_alias_rename_history(
&mut self,
request: impl tonic::IntoRequest<super::GetColumnAliasRenameHistoryRequest>,
) -> std::result::Result<
tonic::Response<super::GetColumnAliasRenameHistoryResponse>,
tonic::Status,
> {
self.inner
.ready()
.await
.map_err(|e| {
tonic::Status::unknown(
format!("Service was not ready: {}", e.into()),
)
})?;
let codec = tonic::codec::ProstCodec::default();
let path = http::uri::PathAndQuery::from_static(
"/komp_ac.table_definition.TableDefinition/GetColumnAliasRenameHistory",
);
let mut req = request.into_request();
req.extensions_mut()
.insert(
GrpcMethod::new(
"komp_ac.table_definition.TableDefinition",
"GetColumnAliasRenameHistory",
),
);
self.inner.unary(req, path, codec).await
}
/// Renames a user-visible column alias while keeping the physical column unchanged.
pub async fn rename_column_alias(
&mut self,
request: impl tonic::IntoRequest<super::RenameColumnAliasRequest>,
) -> std::result::Result<
tonic::Response<super::RenameColumnAliasResponse>,
tonic::Status,
> {
self.inner
.ready()
.await
.map_err(|e| {
tonic::Status::unknown(
format!("Service was not ready: {}", e.into()),
)
})?;
let codec = tonic::codec::ProstCodec::default();
let path = http::uri::PathAndQuery::from_static(
"/komp_ac.table_definition.TableDefinition/RenameColumnAlias",
);
let mut req = request.into_request();
req.extensions_mut()
.insert(
GrpcMethod::new(
"komp_ac.table_definition.TableDefinition",
"RenameColumnAlias",
),
);
self.inner.unary(req, path, codec).await
}
/// Drops a table and its metadata, then deletes the profile if it becomes empty.
pub async fn delete_table(
&mut self,
@@ -344,6 +584,15 @@ pub mod table_definition_server {
tonic::Response<super::TableDefinitionResponse>,
tonic::Status,
>;
/// Appends new user-defined columns to an existing table.
/// Existing columns, links, and table logic are never changed by this call.
async fn add_table_columns(
&self,
request: tonic::Request<super::AddTableColumnsRequest>,
) -> std::result::Result<
tonic::Response<super::TableDefinitionResponse>,
tonic::Status,
>;
/// Lists all profiles (schemas) and their tables with declared dependencies.
/// This provides a tree-like overview of table relationships.
async fn get_profile_tree(
@@ -353,6 +602,31 @@ pub mod table_definition_server {
tonic::Response<super::ProfileTreeResponse>,
tonic::Status,
>;
/// Fetches all tables with their columns and scripts for a specific profile.
/// Pure data retrieval - no business logic.
async fn get_profile_details(
&self,
request: tonic::Request<super::GetProfileDetailsRequest>,
) -> std::result::Result<
tonic::Response<super::GetProfileDetailsResponse>,
tonic::Status,
>;
/// Returns the stored rename history for column aliases in one profile.
async fn get_column_alias_rename_history(
&self,
request: tonic::Request<super::GetColumnAliasRenameHistoryRequest>,
) -> std::result::Result<
tonic::Response<super::GetColumnAliasRenameHistoryResponse>,
tonic::Status,
>;
/// Renames a user-visible column alias while keeping the physical column unchanged.
async fn rename_column_alias(
&self,
request: tonic::Request<super::RenameColumnAliasRequest>,
) -> std::result::Result<
tonic::Response<super::RenameColumnAliasResponse>,
tonic::Status,
>;
/// Drops a table and its metadata, then deletes the profile if it becomes empty.
async fn delete_table(
&self,
@@ -491,6 +765,52 @@ pub mod table_definition_server {
};
Box::pin(fut)
}
"/komp_ac.table_definition.TableDefinition/AddTableColumns" => {
#[allow(non_camel_case_types)]
struct AddTableColumnsSvc<T: TableDefinition>(pub Arc<T>);
impl<
T: TableDefinition,
> tonic::server::UnaryService<super::AddTableColumnsRequest>
for AddTableColumnsSvc<T> {
type Response = super::TableDefinitionResponse;
type Future = BoxFuture<
tonic::Response<Self::Response>,
tonic::Status,
>;
fn call(
&mut self,
request: tonic::Request<super::AddTableColumnsRequest>,
) -> Self::Future {
let inner = Arc::clone(&self.0);
let fut = async move {
<T as TableDefinition>::add_table_columns(&inner, request)
.await
};
Box::pin(fut)
}
}
let accept_compression_encodings = self.accept_compression_encodings;
let send_compression_encodings = self.send_compression_encodings;
let max_decoding_message_size = self.max_decoding_message_size;
let max_encoding_message_size = self.max_encoding_message_size;
let inner = self.inner.clone();
let fut = async move {
let method = AddTableColumnsSvc(inner);
let codec = tonic::codec::ProstCodec::default();
let mut grpc = tonic::server::Grpc::new(codec)
.apply_compression_config(
accept_compression_encodings,
send_compression_encodings,
)
.apply_max_message_size_config(
max_decoding_message_size,
max_encoding_message_size,
);
let res = grpc.unary(method, req).await;
Ok(res)
};
Box::pin(fut)
}
"/komp_ac.table_definition.TableDefinition/GetProfileTree" => {
#[allow(non_camel_case_types)]
struct GetProfileTreeSvc<T: TableDefinition>(pub Arc<T>);
@@ -537,6 +857,152 @@ pub mod table_definition_server {
};
Box::pin(fut)
}
"/komp_ac.table_definition.TableDefinition/GetProfileDetails" => {
#[allow(non_camel_case_types)]
struct GetProfileDetailsSvc<T: TableDefinition>(pub Arc<T>);
impl<
T: TableDefinition,
> tonic::server::UnaryService<super::GetProfileDetailsRequest>
for GetProfileDetailsSvc<T> {
type Response = super::GetProfileDetailsResponse;
type Future = BoxFuture<
tonic::Response<Self::Response>,
tonic::Status,
>;
fn call(
&mut self,
request: tonic::Request<super::GetProfileDetailsRequest>,
) -> Self::Future {
let inner = Arc::clone(&self.0);
let fut = async move {
<T as TableDefinition>::get_profile_details(&inner, request)
.await
};
Box::pin(fut)
}
}
let accept_compression_encodings = self.accept_compression_encodings;
let send_compression_encodings = self.send_compression_encodings;
let max_decoding_message_size = self.max_decoding_message_size;
let max_encoding_message_size = self.max_encoding_message_size;
let inner = self.inner.clone();
let fut = async move {
let method = GetProfileDetailsSvc(inner);
let codec = tonic::codec::ProstCodec::default();
let mut grpc = tonic::server::Grpc::new(codec)
.apply_compression_config(
accept_compression_encodings,
send_compression_encodings,
)
.apply_max_message_size_config(
max_decoding_message_size,
max_encoding_message_size,
);
let res = grpc.unary(method, req).await;
Ok(res)
};
Box::pin(fut)
}
"/komp_ac.table_definition.TableDefinition/GetColumnAliasRenameHistory" => {
#[allow(non_camel_case_types)]
struct GetColumnAliasRenameHistorySvc<T: TableDefinition>(
pub Arc<T>,
);
impl<
T: TableDefinition,
> tonic::server::UnaryService<
super::GetColumnAliasRenameHistoryRequest,
> for GetColumnAliasRenameHistorySvc<T> {
type Response = super::GetColumnAliasRenameHistoryResponse;
type Future = BoxFuture<
tonic::Response<Self::Response>,
tonic::Status,
>;
fn call(
&mut self,
request: tonic::Request<
super::GetColumnAliasRenameHistoryRequest,
>,
) -> Self::Future {
let inner = Arc::clone(&self.0);
let fut = async move {
<T as TableDefinition>::get_column_alias_rename_history(
&inner,
request,
)
.await
};
Box::pin(fut)
}
}
let accept_compression_encodings = self.accept_compression_encodings;
let send_compression_encodings = self.send_compression_encodings;
let max_decoding_message_size = self.max_decoding_message_size;
let max_encoding_message_size = self.max_encoding_message_size;
let inner = self.inner.clone();
let fut = async move {
let method = GetColumnAliasRenameHistorySvc(inner);
let codec = tonic::codec::ProstCodec::default();
let mut grpc = tonic::server::Grpc::new(codec)
.apply_compression_config(
accept_compression_encodings,
send_compression_encodings,
)
.apply_max_message_size_config(
max_decoding_message_size,
max_encoding_message_size,
);
let res = grpc.unary(method, req).await;
Ok(res)
};
Box::pin(fut)
}
"/komp_ac.table_definition.TableDefinition/RenameColumnAlias" => {
#[allow(non_camel_case_types)]
struct RenameColumnAliasSvc<T: TableDefinition>(pub Arc<T>);
impl<
T: TableDefinition,
> tonic::server::UnaryService<super::RenameColumnAliasRequest>
for RenameColumnAliasSvc<T> {
type Response = super::RenameColumnAliasResponse;
type Future = BoxFuture<
tonic::Response<Self::Response>,
tonic::Status,
>;
fn call(
&mut self,
request: tonic::Request<super::RenameColumnAliasRequest>,
) -> Self::Future {
let inner = Arc::clone(&self.0);
let fut = async move {
<T as TableDefinition>::rename_column_alias(&inner, request)
.await
};
Box::pin(fut)
}
}
let accept_compression_encodings = self.accept_compression_encodings;
let send_compression_encodings = self.send_compression_encodings;
let max_decoding_message_size = self.max_decoding_message_size;
let max_encoding_message_size = self.max_encoding_message_size;
let inner = self.inner.clone();
let fut = async move {
let method = RenameColumnAliasSvc(inner);
let codec = tonic::codec::ProstCodec::default();
let mut grpc = tonic::server::Grpc::new(codec)
.apply_compression_config(
accept_compression_encodings,
send_compression_encodings,
)
.apply_max_message_size_config(
max_decoding_message_size,
max_encoding_message_size,
);
let res = grpc.unary(method, req).await;
Ok(res)
};
Box::pin(fut)
}
"/komp_ac.table_definition.TableDefinition/DeleteTable" => {
#[allow(non_camel_case_types)]
struct DeleteTableSvc<T: TableDefinition>(pub Arc<T>);

View File

@@ -1,17 +1,27 @@
// This file is @generated by prost-build.
/// Request identifying the profile (schema) and table to inspect.
/// Request identifying the profile (schema) and tables to inspect.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GetTableStructureRequest {
/// Required. Profile (PostgreSQL schema) name. Must exist in `schemas`.
#[prost(string, tag = "1")]
pub profile_name: ::prost::alloc::string::String,
/// Required. Table name within the profile. Must exist in `table_definitions`
/// for the given profile. The physical table is then introspected via
/// information_schema.
#[prost(string, tag = "2")]
pub table_name: ::prost::alloc::string::String,
/// Required. Table names within the profile. Each must exist in
/// `table_definitions` for the given profile. The physical tables are then
/// introspected via information_schema.
#[prost(string, repeated, tag = "2")]
pub table_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
}
/// Response with the ordered list of columns (by ordinal position).
/// Batched response keyed by table name.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GetTableStructureResponse {
/// Per-table physical column lists keyed by requested table name.
#[prost(map = "string, message", tag = "1")]
pub table_structures: ::std::collections::HashMap<
::prost::alloc::string::String,
TableStructureResponse,
>,
}
/// Response with the ordered list of columns (by ordinal position) for one table.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TableStructureResponse {
/// Columns of the physical table, including system columns (id, deleted,
@@ -55,14 +65,13 @@ pub mod table_structure_service_client {
)]
use tonic::codegen::*;
use tonic::codegen::http::Uri;
/// Introspects the physical PostgreSQL table for a given logical table
/// (defined in table_definitions) and returns its column structure.
/// Introspects the physical PostgreSQL tables for one or more logical tables
/// (defined in table_definitions) and returns their column structures.
/// The server validates that:
/// - The profile (schema) exists in `schemas`
/// - The table is defined for that profile in `table_definitions`
/// It then queries information_schema for the physical table and returns
/// normalized column metadata. If the physical table is missing despite
/// a definition, the response may contain an empty `columns` list.
/// - Every table is defined for that profile in `table_definitions`
/// It then queries information_schema for the physical tables and returns
/// normalized column metadata.
#[derive(Debug, Clone)]
pub struct TableStructureServiceClient<T> {
inner: tonic::client::Grpc<T>,
@@ -144,20 +153,20 @@ pub mod table_structure_service_client {
self
}
/// Return the physical column list (name, normalized data_type,
/// nullability, primary key flag) for a table in a profile.
/// nullability, primary key flag) for one or more tables in a profile.
///
/// Behavior:
/// - NOT_FOUND if profile doesn't exist in `schemas`
/// - NOT_FOUND if table not defined for that profile in `table_definitions`
/// - NOT_FOUND if any table is not defined for that profile in `table_definitions`
/// - Queries information_schema.columns ordered by ordinal position
/// - Normalizes data_type text (details under TableColumn.data_type)
/// - Returns an empty list if the table is validated but has no visible
/// columns in information_schema (e.g., physical table missing)
/// - Returns an error if any validated table has no visible columns in
/// information_schema (e.g., physical table missing)
pub async fn get_table_structure(
&mut self,
request: impl tonic::IntoRequest<super::GetTableStructureRequest>,
) -> std::result::Result<
tonic::Response<super::TableStructureResponse>,
tonic::Response<super::GetTableStructureResponse>,
tonic::Status,
> {
self.inner
@@ -198,31 +207,30 @@ pub mod table_structure_service_server {
#[async_trait]
pub trait TableStructureService: std::marker::Send + std::marker::Sync + 'static {
/// Return the physical column list (name, normalized data_type,
/// nullability, primary key flag) for a table in a profile.
/// nullability, primary key flag) for one or more tables in a profile.
///
/// Behavior:
/// - NOT_FOUND if profile doesn't exist in `schemas`
/// - NOT_FOUND if table not defined for that profile in `table_definitions`
/// - NOT_FOUND if any table is not defined for that profile in `table_definitions`
/// - Queries information_schema.columns ordered by ordinal position
/// - Normalizes data_type text (details under TableColumn.data_type)
/// - Returns an empty list if the table is validated but has no visible
/// columns in information_schema (e.g., physical table missing)
/// - Returns an error if any validated table has no visible columns in
/// information_schema (e.g., physical table missing)
async fn get_table_structure(
&self,
request: tonic::Request<super::GetTableStructureRequest>,
) -> std::result::Result<
tonic::Response<super::TableStructureResponse>,
tonic::Response<super::GetTableStructureResponse>,
tonic::Status,
>;
}
/// Introspects the physical PostgreSQL table for a given logical table
/// (defined in table_definitions) and returns its column structure.
/// Introspects the physical PostgreSQL tables for one or more logical tables
/// (defined in table_definitions) and returns their column structures.
/// The server validates that:
/// - The profile (schema) exists in `schemas`
/// - The table is defined for that profile in `table_definitions`
/// It then queries information_schema for the physical table and returns
/// normalized column metadata. If the physical table is missing despite
/// a definition, the response may contain an empty `columns` list.
/// - Every table is defined for that profile in `table_definitions`
/// It then queries information_schema for the physical tables and returns
/// normalized column metadata.
#[derive(Debug)]
pub struct TableStructureServiceServer<T> {
inner: Arc<T>,
@@ -307,7 +315,7 @@ pub mod table_structure_service_server {
T: TableStructureService,
> tonic::server::UnaryService<super::GetTableStructureRequest>
for GetTableStructureSvc<T> {
type Response = super::TableStructureResponse;
type Response = super::GetTableStructureResponse;
type Future = BoxFuture<
tonic::Response<Self::Response>,
tonic::Status,

File diff suppressed because it is too large Load Diff

View File

@@ -1,75 +1,190 @@
// common/src/search.rs
use std::path::{Path, PathBuf};
use tantivy::schema::*;
use tantivy::tokenizer::*;
use tantivy::schema::{
Field, IndexRecordOption, JsonObjectOptions, Schema, Term, TextFieldIndexing, TextOptions,
INDEXED, STORED, STRING,
};
use tantivy::tokenizer::{
AsciiFoldingFilter, LowerCaser, NgramTokenizer, RawTokenizer, RemoveLongFilter,
SimpleTokenizer, TextAnalyzer, TokenStream,
};
use tantivy::Index;
/// Creates a hybrid Slovak search schema with optimized prefix fields.
pub const F_PG_ID: &str = "pg_id";
pub const F_TABLE_NAME: &str = "table_name";
pub const F_ROW_KEY: &str = "row_key";
pub const F_ALL_TEXT: &str = "all_text";
pub const F_DATA_WORD: &str = "data_word";
pub const F_DATA_NGRAM: &str = "data_ngram";
pub const F_DATA_EXACT: &str = "data_exact";
pub const TOK_WORD: &str = "kw_word";
pub const TOK_NGRAM: &str = "kw_ngram";
pub const TOK_EXACT: &str = "kw_exact";
/// Returns the on-disk path for a profile search index.
pub fn search_index_path(root: &Path, profile_name: &str) -> PathBuf {
root.join(profile_name)
}
/// Returns the unique index key for one table row inside a profile index.
pub fn search_row_key(table_name: &str, row_id: i64) -> String {
format!("{}:{}", table_name, row_id)
}
/// Normalizes user-entered values for exact-mode terms.
pub fn normalize_exact(input: &str) -> String {
let trimmed = input.trim();
if trimmed.is_empty() {
return String::new();
}
let mut analyzer = exact_analyzer();
let mut stream = analyzer.token_stream(trimmed);
let mut out = String::with_capacity(trimmed.len());
while let Some(token) = stream.next() {
out.push_str(&token.text);
}
out
}
/// Normalizes a column name to the JSON-key form used at index time.
pub fn normalize_column_name(column: &str) -> String {
column.to_ascii_lowercase()
}
/// Creates the column-aware search schema.
pub fn create_search_schema() -> Schema {
let mut schema_builder = Schema::builder();
schema_builder.add_u64_field("pg_id", INDEXED | STORED);
schema_builder.add_u64_field(F_PG_ID, INDEXED | STORED);
schema_builder.add_text_field(F_TABLE_NAME, STRING | STORED);
schema_builder.add_text_field(F_ROW_KEY, STRING | STORED);
schema_builder.add_text_field(F_ALL_TEXT, text_options(TOK_WORD));
// FIELD 1: For prefixes (1-4 chars).
let short_prefix_indexing = TextFieldIndexing::default()
.set_tokenizer("slovak_prefix_edge")
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
let short_prefix_options = TextOptions::default()
.set_indexing_options(short_prefix_indexing)
.set_stored();
schema_builder.add_text_field("prefix_edge", short_prefix_options);
// FIELD 2: For the full word.
let full_word_indexing = TextFieldIndexing::default()
.set_tokenizer("slovak_prefix_full")
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
let full_word_options = TextOptions::default()
.set_indexing_options(full_word_indexing)
.set_stored();
schema_builder.add_text_field("prefix_full", full_word_options);
// NGRAM FIELD: For substring matching.
let ngram_field_indexing = TextFieldIndexing::default()
.set_tokenizer("slovak_ngram")
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
let ngram_options = TextOptions::default()
.set_indexing_options(ngram_field_indexing)
.set_stored();
schema_builder.add_text_field("text_ngram", ngram_options);
schema_builder.add_json_field(F_DATA_WORD, json_options(TOK_WORD, true, false));
schema_builder.add_json_field(F_DATA_NGRAM, json_options(TOK_NGRAM, true, false));
schema_builder.add_json_field(F_DATA_EXACT, json_options(TOK_EXACT, false, false));
schema_builder.build()
}
/// Registers all necessary Slovak tokenizers with the index.
///
/// This must be called by ANY process that opens the index
/// to ensure the tokenizers are loaded into memory.
pub fn register_slovak_tokenizers(index: &Index) -> tantivy::Result<()> {
fn text_options(tokenizer_name: &str) -> TextOptions {
let indexing = TextFieldIndexing::default()
.set_tokenizer(tokenizer_name)
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
TextOptions::default().set_indexing_options(indexing)
}
fn json_options(tokenizer_name: &str, with_positions: bool, stored: bool) -> JsonObjectOptions {
let index_option = if with_positions {
IndexRecordOption::WithFreqsAndPositions
} else {
IndexRecordOption::Basic
};
let indexing = TextFieldIndexing::default()
.set_tokenizer(tokenizer_name)
.set_index_option(index_option);
let mut options = JsonObjectOptions::default().set_indexing_options(indexing);
if stored {
options = options.set_stored();
}
options
}
/// Registers all required tokenizers with the index.
pub fn register_tokenizers(index: &Index) -> tantivy::Result<()> {
let tokenizer_manager = index.tokenizers();
// TOKENIZER for `prefix_edge`: Edge N-gram (1-4 chars)
let edge_tokenizer = TextAnalyzer::builder(NgramTokenizer::new(1, 4, true)?)
.filter(RemoveLongFilter::limit(40))
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.build();
tokenizer_manager.register("slovak_prefix_edge", edge_tokenizer);
// TOKENIZER for `prefix_full`: Simple word tokenizer
let full_tokenizer = TextAnalyzer::builder(SimpleTokenizer::default())
.filter(RemoveLongFilter::limit(40))
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.build();
tokenizer_manager.register("slovak_prefix_full", full_tokenizer);
// NGRAM TOKENIZER: For substring matching.
let ngram_tokenizer = TextAnalyzer::builder(NgramTokenizer::new(3, 3, false)?)
.filter(RemoveLongFilter::limit(40))
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.build();
tokenizer_manager.register("slovak_ngram", ngram_tokenizer);
tokenizer_manager.register(TOK_WORD, word_analyzer());
tokenizer_manager.register(TOK_NGRAM, ngram_analyzer()?);
tokenizer_manager.register(TOK_EXACT, exact_analyzer());
Ok(())
}
fn word_analyzer() -> TextAnalyzer {
TextAnalyzer::builder(SimpleTokenizer::default())
.filter(RemoveLongFilter::limit(80))
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.build()
}
fn ngram_analyzer() -> tantivy::Result<TextAnalyzer> {
Ok(TextAnalyzer::builder(NgramTokenizer::new(3, 3, false)?)
.filter(RemoveLongFilter::limit(80))
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.build())
}
fn exact_analyzer() -> TextAnalyzer {
TextAnalyzer::builder(RawTokenizer::default())
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.build()
}
/// Tokenizes text the same way `data_word` is indexed.
pub fn tokenize_word(text: &str) -> Vec<String> {
tokenize_with(word_analyzer(), text)
}
/// Tokenizes text the same way `data_ngram` is indexed.
pub fn tokenize_ngram(text: &str) -> Vec<String> {
match ngram_analyzer() {
Ok(analyzer) => tokenize_with(analyzer, text),
Err(_) => Vec::new(),
}
}
fn tokenize_with(mut analyzer: TextAnalyzer, text: &str) -> Vec<String> {
let mut stream = analyzer.token_stream(text);
let mut out = Vec::new();
while let Some(token) = stream.next() {
out.push(token.text.clone());
}
out
}
/// Builds a term scoped to a specific JSON path within a JSON field.
pub fn json_path_term(field: Field, column: &str, text: &str) -> Term {
let mut term = Term::from_field_json_path(field, column, false);
term.append_type_and_str(text);
term
}
/// Returns all required schema fields or fails loudly on mismatch.
pub struct SchemaFields {
pub pg_id: Field,
pub table_name: Field,
pub row_key: Field,
pub all_text: Field,
pub data_word: Field,
pub data_ngram: Field,
pub data_exact: Field,
}
impl SchemaFields {
pub fn from(schema: &Schema) -> tantivy::Result<Self> {
Ok(Self {
pg_id: get_field(schema, F_PG_ID)?,
table_name: get_field(schema, F_TABLE_NAME)?,
row_key: get_field(schema, F_ROW_KEY)?,
all_text: get_field(schema, F_ALL_TEXT)?,
data_word: get_field(schema, F_DATA_WORD)?,
data_ngram: get_field(schema, F_DATA_NGRAM)?,
data_exact: get_field(schema, F_DATA_EXACT)?,
})
}
}
fn get_field(schema: &Schema, name: &str) -> tantivy::Result<Field> {
schema.get_field(name).map_err(|e| {
tantivy::TantivyError::SchemaError(format!("schema is missing field '{name}': {e}"))
})
}

1
search/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
.codex

View File

@@ -1,279 +1,448 @@
// src/lib.rs
mod query_builder;
use std::collections::HashMap;
use std::path::Path;
use tantivy::collector::TopDocs;
use tantivy::query::{
BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, Query, QueryParser, TermQuery,
};
use tantivy::schema::{IndexRecordOption, Value};
use tantivy::{Index, TantivyDocument, Term};
use tonic::{Request, Response, Status};
use std::sync::{Arc, Mutex};
use common::proto::komp_ac::search::searcher_server::Searcher;
pub use common::proto::komp_ac::search::searcher_server::SearcherServer;
use common::proto::komp_ac::search::{search_response::Hit, SearchRequest, SearchResponse};
use common::search::register_slovak_tokenizers;
use common::search::{register_tokenizers, search_index_path, SchemaFields};
use query_builder::{build_master_query, ConstraintMode, SearchConstraint};
use sqlx::{PgPool, Row};
use tantivy::collector::TopDocs;
use tantivy::schema::Value;
use tantivy::{Index, IndexReader, ReloadPolicy, TantivyDocument};
use tonic::{Request, Response, Status};
use tracing::info;
// We need to hold the database pool in our service struct.
const INDEX_ROOT: &str = "./tantivy_indexes";
const DEFAULT_RESULT_LIMIT: usize = 25;
const HARD_RESULT_LIMIT: usize = 200;
const DEFAULT_LIST_LIMIT: usize = 5;
pub struct SearcherService {
pub pool: PgPool,
profiles: Mutex<HashMap<String, Arc<ProfileIndex>>>,
}
// normalize_slovak_text function remains unchanged...
fn normalize_slovak_text(text: &str) -> String {
// ... function content is unchanged ...
text.chars()
.map(|c| match c {
'á' | 'à' | 'â' | 'ä' | 'ă' | 'ā' => 'a',
'Á' | 'À' | 'Â' | 'Ä' | 'Ă' | 'Ā' => 'A',
'é' | 'è' | 'ê' | 'ë' | 'ě' | 'ē' => 'e',
'É' | 'È' | 'Ê' | 'Ë' | 'Ě' | 'Ē' => 'E',
'í' | 'ì' | 'î' | 'ï' | 'ī' => 'i',
'Í' | 'Ì' | 'Î' | 'Ï' | 'Ī' => 'I',
'ó' | 'ò' | 'ô' | 'ö' | 'ō' | 'ő' => 'o',
'Ó' | 'Ò' | 'Ô' | 'Ö' | 'Ō' | 'Ő' => 'O',
'ú' | 'ù' | 'û' | 'ü' | 'ū' | 'ű' => 'u',
'Ú' | 'Ù' | 'Û' | 'Ü' | 'Ū' | 'Ű' => 'U',
'ý' | 'ỳ' | 'ŷ' | 'ÿ' => 'y',
'Ý' | 'Ỳ' | 'Ŷ' | 'Ÿ' => 'Y',
'č' => 'c',
'Č' => 'C',
'ď' => 'd',
'Ď' => 'D',
'ľ' => 'l',
'Ľ' => 'L',
'ň' => 'n',
'Ň' => 'N',
'ř' => 'r',
'Ř' => 'R',
'š' => 's',
'Š' => 'S',
'ť' => 't',
'Ť' => 'T',
'ž' => 'z',
'Ž' => 'Z',
_ => c,
})
.collect()
}
impl SearcherService {
pub fn new(pool: PgPool) -> Self {
Self {
pool,
profiles: Mutex::new(HashMap::new()),
}
}
#[tonic::async_trait]
impl Searcher for SearcherService {
async fn search_table(
async fn run_rpc(
&self,
request: Request<SearchRequest>,
) -> Result<Response<SearchResponse>, Status> {
let req = request.into_inner();
let table_name = req.table_name;
let query_str = req.query;
let normalized = normalize_request(req)?;
// --- MODIFIED LOGIC ---
// If the query is empty, fetch the 5 most recent records.
if query_str.trim().is_empty() {
info!(
"Empty query for table '{}'. Fetching default results.",
table_name
);
let qualified_table = format!("gen.\"{}\"", table_name);
let sql = format!(
"SELECT id, to_jsonb(t) AS data FROM {} t ORDER BY id DESC LIMIT 5",
qualified_table
);
let rows = sqlx::query(&sql).fetch_all(&self.pool).await.map_err(|e| {
Status::internal(format!("DB query for default results failed: {}", e))
})?;
let hits: Vec<Hit> = rows
.into_iter()
.map(|row| {
let id: i64 = row.try_get("id").unwrap_or_default();
let json_data: serde_json::Value = row.try_get("data").unwrap_or_default();
Hit {
id,
// Score is 0.0 as this is not a relevance-ranked search
score: 0.0,
content_json: json_data.to_string(),
}
})
.collect();
info!(
"--- SERVER: Successfully processed empty query. Returning {} default hits. ---",
hits.len()
);
return Ok(Response::new(SearchResponse { hits }));
}
// --- END OF MODIFIED LOGIC ---
let index_path = Path::new("./tantivy_indexes").join(&table_name);
if !index_path.exists() {
if !profile_exists(&self.pool, &normalized.profile_name).await? {
return Err(Status::not_found(format!(
"No search index found for table '{}'",
table_name
"Profile '{}' was not found",
normalized.profile_name
)));
}
let index = Index::open_in_dir(&index_path)
.map_err(|e| Status::internal(format!("Failed to open index: {}", e)))?;
register_slovak_tokenizers(&index).map_err(|e| {
Status::internal(format!("Failed to register Slovak tokenizers: {}", e))
})?;
let reader = index
.reader()
.map_err(|e| Status::internal(format!("Failed to create index reader: {}", e)))?;
let searcher = reader.searcher();
let schema = index.schema();
let pg_id_field = schema
.get_field("pg_id")
.map_err(|_| Status::internal("Schema is missing the 'pg_id' field."))?;
// --- Query Building Logic (no changes here) ---
let prefix_edge_field = schema.get_field("prefix_edge").unwrap();
let prefix_full_field = schema.get_field("prefix_full").unwrap();
let text_ngram_field = schema.get_field("text_ngram").unwrap();
let normalized_query = normalize_slovak_text(&query_str);
let words: Vec<&str> = normalized_query.split_whitespace().collect();
if words.is_empty() {
return Ok(Response::new(SearchResponse { hits: vec![] }));
}
let mut query_layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
// ... all your query building layers remain exactly the same ...
// ===============================
// LAYER 1: PREFIX MATCHING (HIGHEST PRIORITY, Boost: 4.0)
// ===============================
{
let mut must_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
for word in &words {
let edge_term = Term::from_field_text(prefix_edge_field, word);
let full_term = Term::from_field_text(prefix_full_field, word);
let per_word_query = BooleanQuery::new(vec![
(
Occur::Should,
Box::new(TermQuery::new(edge_term, IndexRecordOption::Basic)),
),
(
Occur::Should,
Box::new(TermQuery::new(full_term, IndexRecordOption::Basic)),
),
]);
must_clauses.push((Occur::Must, Box::new(per_word_query) as Box<dyn Query>));
}
if !must_clauses.is_empty() {
let prefix_query = BooleanQuery::new(must_clauses);
let boosted_query = BoostQuery::new(Box::new(prefix_query), 4.0);
query_layers.push((Occur::Should, Box::new(boosted_query)));
if let Some(table_name) = normalized.table_name.as_deref() {
if !table_exists(&self.pool, &normalized.profile_name, table_name).await? {
return Err(Status::not_found(format!(
"Table '{}' was not found in profile '{}'",
table_name, normalized.profile_name
)));
}
}
// ===============================
// LAYER 2: FUZZY MATCHING (HIGH PRIORITY, Boost: 3.0)
// ===============================
{
let last_word = words.last().unwrap();
let fuzzy_term = Term::from_field_text(prefix_full_field, last_word);
let fuzzy_query = FuzzyTermQuery::new(fuzzy_term, 2, true);
let boosted_query = BoostQuery::new(Box::new(fuzzy_query), 3.0);
query_layers.push((Occur::Should, Box::new(boosted_query)));
if !normalized.has_input() {
let Some(table_name) = normalized.table_name.as_deref() else {
return Err(Status::invalid_argument(
"table_name is required when query is empty",
));
};
let hits = fetch_latest_rows(
&self.pool,
&normalized.profile_name,
table_name,
normalized.limit.unwrap_or(DEFAULT_LIST_LIMIT),
)
.await?;
return Ok(Response::new(SearchResponse { hits }));
}
// ===============================
// LAYER 3: PHRASE MATCHING WITH SLOP (MEDIUM PRIORITY, Boost: 2.0)
// ===============================
if words.len() > 1 {
let slop_parser = QueryParser::for_index(&index, vec![prefix_full_field]);
let slop_query_str = format!("\"{}\"~3", normalized_query);
if let Ok(slop_query) = slop_parser.parse_query(&slop_query_str) {
let boosted_query = BoostQuery::new(slop_query, 2.0);
query_layers.push((Occur::Should, Box::new(boosted_query)));
let index_path = search_index_path(Path::new(INDEX_ROOT), &normalized.profile_name);
if !index_path.exists() {
return Err(Status::not_found(format!(
"No search index found for profile '{}'",
normalized.profile_name
)));
}
let profile = profile_index(&self.profiles, &normalized.profile_name, &index_path)?;
let mut hits = run_search(
&self.pool,
&profile,
&normalized.profile_name,
normalized.table_name.as_deref(),
&normalized.free_query,
&normalized.must,
normalized.limit.unwrap_or(DEFAULT_RESULT_LIMIT),
)
.await?;
hits.sort_by(|left, right| right.score.total_cmp(&left.score));
if let Some(limit) = normalized.limit {
if hits.len() > limit {
hits.truncate(limit);
}
}
// ===============================
// LAYER 4: NGRAM SUBSTRING MATCHING (LOWEST PRIORITY, Boost: 1.0)
// ===============================
{
let ngram_parser = QueryParser::for_index(&index, vec![text_ngram_field]);
if let Ok(ngram_query) = ngram_parser.parse_query(&normalized_query) {
let boosted_query = BoostQuery::new(ngram_query, 1.0);
query_layers.push((Occur::Should, Box::new(boosted_query)));
}
}
let master_query = BooleanQuery::new(query_layers);
// --- End of Query Building Logic ---
let top_docs = searcher
.search(&master_query, &TopDocs::with_limit(100))
.map_err(|e| Status::internal(format!("Search failed: {}", e)))?;
if top_docs.is_empty() {
return Ok(Response::new(SearchResponse { hits: vec![] }));
}
// --- NEW LOGIC: Fetch from DB and combine results ---
// Step 1: Extract (score, pg_id) from Tantivy results.
let mut scored_ids: Vec<(f32, u64)> = Vec::new();
for (score, doc_address) in top_docs {
let doc: TantivyDocument = searcher
.doc(doc_address)
.map_err(|e| Status::internal(format!("Failed to retrieve document: {}", e)))?;
if let Some(pg_id_value) = doc.get_first(pg_id_field) {
if let Some(pg_id) = pg_id_value.as_u64() {
scored_ids.push((score, pg_id));
}
}
}
// Step 2: Fetch all corresponding rows from Postgres in a single query.
let pg_ids: Vec<i64> = scored_ids.iter().map(|(_, id)| *id as i64).collect();
let qualified_table = format!("gen.\"{}\"", table_name);
let query_str = format!(
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE id = ANY($1)",
qualified_table
);
let rows = sqlx::query(&query_str)
.bind(&pg_ids)
.fetch_all(&self.pool)
.await
.map_err(|e| Status::internal(format!("Database query failed: {}", e)))?;
// Step 3: Map the database results by ID for quick lookup.
let mut content_map: HashMap<i64, String> = HashMap::new();
for row in rows {
let id: i64 = row.try_get("id").unwrap_or(0);
let json_data: serde_json::Value =
row.try_get("data").unwrap_or(serde_json::Value::Null);
content_map.insert(id, json_data.to_string());
}
// Step 4: Build the final response, combining Tantivy scores with PG content.
let hits: Vec<Hit> = scored_ids
.into_iter()
.filter_map(|(score, pg_id)| {
content_map.get(&(pg_id as i64)).map(|content_json| Hit {
id: pg_id as i64,
score,
content_json: content_json.clone(),
})
})
.collect();
info!(
"--- SERVER: Successfully processed search. Returning {} hits. ---",
"search: profile={} table={:?} free='{}' constraints={} hits={}",
normalized.profile_name,
normalized.table_name,
normalized.free_query,
normalized.must.len(),
hits.len()
);
let response = SearchResponse { hits };
Ok(Response::new(response))
Ok(Response::new(SearchResponse { hits }))
}
}
struct ProfileIndex {
index: Index,
reader: IndexReader,
fields: SchemaFields,
}
impl ProfileIndex {
fn open(path: &Path) -> Result<Self, Status> {
let index = Index::open_in_dir(path)
.map_err(|e| Status::internal(format!("Failed to open index: {}", e)))?;
register_tokenizers(&index)
.map_err(|e| Status::internal(format!("Failed to register tokenizers: {}", e)))?;
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::OnCommitWithDelay)
.try_into()
.map_err(|e| Status::internal(format!("Failed to build index reader: {}", e)))?;
let fields = SchemaFields::from(&index.schema()).map_err(|e| {
Status::internal(format!(
"Search index schema mismatch. Delete the stale index and create it again: {}",
e
))
})?;
Ok(Self {
index,
reader,
fields,
})
}
}
#[derive(Debug)]
struct NormalizedSearchRequest {
profile_name: String,
table_name: Option<String>,
free_query: String,
must: Vec<SearchConstraint>,
limit: Option<usize>,
}
impl NormalizedSearchRequest {
fn has_input(&self) -> bool {
!self.free_query.is_empty() || !self.must.is_empty()
}
}
fn profile_index(
cache: &Mutex<HashMap<String, Arc<ProfileIndex>>>,
profile_name: &str,
path: &Path,
) -> Result<Arc<ProfileIndex>, Status> {
{
let cache_guard = cache
.lock()
.map_err(|_| Status::internal("Profile index cache lock poisoned"))?;
if let Some(index) = cache_guard.get(profile_name) {
return Ok(index.clone());
}
}
let opened = Arc::new(ProfileIndex::open(path)?);
let mut cache_guard = cache
.lock()
.map_err(|_| Status::internal("Profile index cache lock poisoned"))?;
if let Some(index) = cache_guard.get(profile_name) {
return Ok(index.clone());
}
cache_guard.insert(profile_name.to_string(), opened.clone());
Ok(opened)
}
fn validate_identifier(value: &str, field_name: &str) -> Result<(), Status> {
let mut chars = value.chars();
let Some(first) = chars.next() else {
return Err(Status::invalid_argument(format!(
"{field_name} must not be empty"
)));
};
if !(first.is_ascii_alphabetic() || first == '_')
|| !chars.all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
{
return Err(Status::invalid_argument(format!(
"{field_name} contains invalid characters"
)));
}
Ok(())
}
fn validate_search_column(value: &str) -> Result<(), Status> {
if value.is_empty() {
return Err(Status::invalid_argument(
"constraint.column must not be empty",
));
}
if value.chars().any(|ch| ch.is_control() || ch == '\0') {
return Err(Status::invalid_argument(
"constraint.column contains invalid characters",
));
}
Ok(())
}
fn qualify_profile_table(profile_name: &str, table_name: &str) -> String {
format!("\"{}\".\"{}\"", profile_name, table_name)
}
async fn profile_exists(pool: &PgPool, profile_name: &str) -> Result<bool, Status> {
let exists =
sqlx::query_scalar::<_, bool>("SELECT EXISTS(SELECT 1 FROM schemas WHERE name = $1)")
.bind(profile_name)
.fetch_one(pool)
.await
.map_err(|e| Status::internal(format!("Profile lookup failed: {}", e)))?;
Ok(exists)
}
async fn table_exists(pool: &PgPool, profile_name: &str, table_name: &str) -> Result<bool, Status> {
let exists = sqlx::query_scalar::<_, bool>(
r#"
SELECT EXISTS(
SELECT 1
FROM table_definitions td
JOIN schemas s ON td.schema_id = s.id
WHERE s.name = $1 AND td.table_name = $2
)
"#,
)
.bind(profile_name)
.bind(table_name)
.fetch_one(pool)
.await
.map_err(|e| Status::internal(format!("Table lookup failed: {}", e)))?;
Ok(exists)
}
fn normalize_request(req: SearchRequest) -> Result<NormalizedSearchRequest, Status> {
let profile_name = req.profile_name.trim();
if profile_name.is_empty() {
return Err(Status::invalid_argument("profile_name is required"));
}
validate_identifier(profile_name, "profile_name")?;
let table_name = match req.table_name.as_deref().map(str::trim) {
Some(table_name) if !table_name.is_empty() => {
validate_identifier(table_name, "table_name")?;
Some(table_name.to_string())
}
_ => None,
};
let free_query = req.free_query.trim().to_string();
let mut must = Vec::new();
for constraint in req.must {
let column = constraint.column.trim();
validate_search_column(column)?;
let query = constraint.query.trim();
if query.is_empty() {
return Err(Status::invalid_argument(
"constraint.query must not be empty",
));
}
must.push(SearchConstraint {
column: column.to_string(),
query: query.to_string(),
mode: constraint_mode_from_proto(constraint.mode),
});
}
let limit = req
.limit
.map(|value| (value as usize).min(HARD_RESULT_LIMIT));
Ok(NormalizedSearchRequest {
profile_name: profile_name.to_string(),
table_name,
free_query,
must,
limit,
})
}
fn constraint_mode_from_proto(raw_mode: i32) -> ConstraintMode {
match raw_mode {
2 => ConstraintMode::Exact,
_ => ConstraintMode::Fuzzy,
}
}
async fn fetch_latest_rows(
pool: &PgPool,
profile_name: &str,
table_name: &str,
limit: usize,
) -> Result<Vec<Hit>, Status> {
let sql = format!(
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE deleted = FALSE ORDER BY id DESC LIMIT $1",
qualify_profile_table(profile_name, table_name)
);
let rows = sqlx::query(&sql)
.bind(limit as i64)
.fetch_all(pool)
.await
.map_err(|e| Status::internal(format!("DB query for default results failed: {}", e)))?;
Ok(rows
.into_iter()
.map(|row| {
let id: i64 = row.try_get("id").unwrap_or_default();
let json_data: serde_json::Value = row.try_get("data").unwrap_or_default();
Hit {
id,
score: 0.0,
content_json: json_data.to_string(),
table_name: table_name.to_string(),
}
})
.collect())
}
async fn run_search(
pool: &PgPool,
profile: &ProfileIndex,
profile_name: &str,
table_filter: Option<&str>,
free_query: &str,
must: &[SearchConstraint],
limit: usize,
) -> Result<Vec<Hit>, Status> {
let master_query = build_master_query(
&profile.index,
&profile.fields,
free_query,
must,
table_filter,
)?;
let searcher = profile.reader.searcher();
let top_docs = searcher
.search(&*master_query, &TopDocs::with_limit(limit))
.map_err(|e| Status::internal(format!("Search failed: {}", e)))?;
if top_docs.is_empty() {
return Ok(vec![]);
}
let mut candidates: Vec<(f32, i64, String)> = Vec::with_capacity(top_docs.len());
for (score, doc_address) in top_docs {
let doc: TantivyDocument = searcher
.doc(doc_address)
.map_err(|e| Status::internal(format!("Failed to retrieve document: {}", e)))?;
let Some(pg_id) = doc
.get_first(profile.fields.pg_id)
.and_then(|value| value.as_u64())
else {
continue;
};
let Some(table_name) = doc
.get_first(profile.fields.table_name)
.and_then(|value| value.as_str())
else {
continue;
};
candidates.push((score, pg_id as i64, table_name.to_string()));
}
if candidates.is_empty() {
return Ok(vec![]);
}
let mut ids_by_table: HashMap<String, Vec<i64>> = HashMap::new();
for (_, pg_id, table_name) in &candidates {
ids_by_table
.entry(table_name.clone())
.or_default()
.push(*pg_id);
}
let mut content_map: HashMap<(String, i64), String> = HashMap::new();
for (table_name, pg_ids) in ids_by_table {
validate_identifier(&table_name, "table_name")?;
let sql = format!(
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE deleted = FALSE AND id = ANY($1)",
qualify_profile_table(profile_name, &table_name)
);
let rows = sqlx::query(&sql)
.bind(&pg_ids)
.fetch_all(pool)
.await
.map_err(|e| Status::internal(format!("Database query failed: {}", e)))?;
for row in rows {
let id: i64 = row.try_get("id").unwrap_or_default();
let json_data: serde_json::Value = row.try_get("data").unwrap_or_default();
content_map.insert((table_name.clone(), id), json_data.to_string());
}
}
Ok(candidates
.into_iter()
.filter_map(|(score, pg_id, table_name)| {
content_map
.get(&(table_name.clone(), pg_id))
.map(|content_json| Hit {
id: pg_id,
score,
content_json: content_json.clone(),
table_name,
})
})
.collect())
}
#[tonic::async_trait]
impl Searcher for SearcherService {
async fn search(
&self,
request: Request<SearchRequest>,
) -> Result<Response<SearchResponse>, Status> {
self.run_rpc(request).await
}
}

251
search/src/query_builder.rs Normal file
View File

@@ -0,0 +1,251 @@
use common::search::{
json_path_term, normalize_column_name, normalize_exact, tokenize_ngram, tokenize_word,
SchemaFields,
};
use tantivy::query::{
BooleanQuery, BoostQuery, EmptyQuery, FuzzyTermQuery, Occur, PhraseQuery, Query, QueryParser,
TermQuery,
};
use tantivy::schema::{IndexRecordOption, Term};
use tantivy::Index;
use tonic::Status;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ConstraintMode {
Fuzzy,
Exact,
}
#[derive(Clone, Debug)]
pub struct SearchConstraint {
pub column: String,
pub query: String,
pub mode: ConstraintMode,
}
pub fn build_master_query(
index: &Index,
fields: &SchemaFields,
free_query: &str,
must: &[SearchConstraint],
table_filter: Option<&str>,
) -> Result<Box<dyn Query>, Status> {
let mut clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
let mut has_search_clause = false;
for constraint in must {
let predicate = match constraint.mode {
ConstraintMode::Exact => {
exact_predicate(fields, &constraint.column, &constraint.query)?
}
ConstraintMode::Fuzzy => {
fuzzy_predicate_scoped(fields, &constraint.column, &constraint.query)?
}
};
clauses.push((Occur::Must, predicate));
has_search_clause = true;
}
let free_words = tokenize_word(free_query);
if !free_words.is_empty() {
let predicate = fuzzy_predicate_unscoped(index, fields, &free_words)?;
clauses.push((Occur::Must, predicate));
has_search_clause = true;
}
if let Some(table_name) = table_filter {
let term = Term::from_field_text(fields.table_name, table_name);
clauses.push((
Occur::Must,
Box::new(TermQuery::new(term, IndexRecordOption::Basic)),
));
}
if !has_search_clause {
return Ok(Box::new(EmptyQuery));
}
Ok(Box::new(BooleanQuery::new(clauses)))
}
fn exact_predicate(
fields: &SchemaFields,
column: &str,
query: &str,
) -> Result<Box<dyn Query>, Status> {
let normalized_value = normalize_exact(query);
if normalized_value.is_empty() {
return Err(Status::invalid_argument(
"exact query is empty after normalization",
));
}
let column = normalize_column_name(column);
let term = json_path_term(fields.data_exact, &column, &normalized_value);
Ok(Box::new(TermQuery::new(term, IndexRecordOption::Basic)))
}
fn fuzzy_predicate_scoped(
fields: &SchemaFields,
column: &str,
query: &str,
) -> Result<Box<dyn Query>, Status> {
let words = tokenize_word(query);
if words.is_empty() {
return Err(Status::invalid_argument(
"fuzzy query has no searchable tokens",
));
}
let column = normalize_column_name(column);
let mut layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
let mut per_word_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
for word in &words {
let term = json_path_term(fields.data_word, &column, word);
let mut alternates: Vec<(Occur, Box<dyn Query>)> = Vec::new();
alternates.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(TermQuery::new(term.clone(), IndexRecordOption::WithFreqs)),
4.0,
)),
));
alternates.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(FuzzyTermQuery::new_prefix(term.clone(), 0, false)),
3.0,
)),
));
if let Some(distance) = fuzzy_distance(word.chars().count()) {
alternates.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(FuzzyTermQuery::new(term.clone(), distance, true)),
2.0,
)),
));
}
per_word_clauses.push((Occur::Must, Box::new(BooleanQuery::new(alternates))));
}
layers.push((Occur::Should, Box::new(BooleanQuery::new(per_word_clauses))));
if words.len() > 1 {
let phrase_terms: Vec<(usize, Term)> = words
.iter()
.enumerate()
.map(|(offset, word)| (offset, json_path_term(fields.data_word, &column, word)))
.collect();
let phrase = PhraseQuery::new_with_offset_and_slop(phrase_terms, 3);
layers.push((
Occur::Should,
Box::new(BoostQuery::new(Box::new(phrase), 2.0)),
));
}
let ngrams = tokenize_ngram(query);
if !ngrams.is_empty() {
let ngram_clauses: Vec<(Occur, Box<dyn Query>)> = ngrams
.into_iter()
.map(|gram| {
let term = json_path_term(fields.data_ngram, &column, &gram);
(
Occur::Must,
Box::new(TermQuery::new(term, IndexRecordOption::Basic)) as Box<dyn Query>,
)
})
.collect();
layers.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(BooleanQuery::new(ngram_clauses)),
1.0,
)),
));
}
Ok(Box::new(BooleanQuery::new(layers)))
}
fn fuzzy_predicate_unscoped(
index: &Index,
fields: &SchemaFields,
words: &[String],
) -> Result<Box<dyn Query>, Status> {
let mut layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
let mut per_word_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
for word in words {
let term = Term::from_field_text(fields.all_text, word);
let mut alternates: Vec<(Occur, Box<dyn Query>)> = Vec::new();
alternates.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(TermQuery::new(term.clone(), IndexRecordOption::WithFreqs)),
4.0,
)),
));
alternates.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(FuzzyTermQuery::new_prefix(term.clone(), 0, false)),
3.0,
)),
));
if let Some(distance) = fuzzy_distance(word.chars().count()) {
alternates.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(FuzzyTermQuery::new(term, distance, true)),
2.0,
)),
));
}
per_word_clauses.push((Occur::Must, Box::new(BooleanQuery::new(alternates))));
}
layers.push((Occur::Should, Box::new(BooleanQuery::new(per_word_clauses))));
if words.len() > 1 {
let parser = QueryParser::for_index(index, vec![fields.all_text]);
let query_string = format!("\"{}\"~3", words.join(" "));
if let Ok(query) = parser.parse_query(&query_string) {
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 2.0))));
}
}
{
let parser = QueryParser::for_index(index, vec![fields.all_text]);
let query_string = words
.iter()
.map(|word| format!("+{}*", word))
.collect::<Vec<_>>()
.join(" ");
if let Ok(query) = parser.parse_query(&query_string) {
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 1.0))));
}
}
if layers.is_empty() {
return Ok(Box::new(EmptyQuery));
}
Ok(Box::new(BooleanQuery::new(layers)))
}
fn fuzzy_distance(word_len: usize) -> Option<u8> {
match word_len {
0..=3 => None,
4..=6 => Some(1),
_ => Some(2),
}
}

2
server

Submodule server updated: 6b0c3e63b4...aa0f9a3108

34
tantivy_todo.md Normal file
View File

@@ -0,0 +1,34 @@
1. Add explicit reindex/backfill tooling.
Right now, only future PostTableData / PutTableData calls index rows. There should be an admin/dev command like:
ReindexProfile(profile_name)
ReindexTable(profile_name, table_name)
ReindexRow(profile_name, table_name, id)
This is the biggest missing piece.
2. Stop using relative ./tantivy_indexes.
Both writer and reader depend on the process working directory. Make it config/env-driven, e.g.
TANTIVY_INDEX_DIR.
3. Add index schema/version metadata.
If you change tokenizers/schema later, old indexes should fail with a clear “index version mismatch, reindex
required” instead of behaving strangely.
4. Batch index commits.
Current code opens a writer and commits per row. Fine for dev, not great for many inserts. A long-lived writer
task batching commits every N docs or every short interval would be more reliable and faster.
5. Make the indexing queue durable.
The current mpsc queue is in-memory. If the server crashes after DB insert but before indexing, search is stale.
For serious use, store pending index jobs in Postgres, process them, mark done.
6. Index only live rows intentionally.
handle_add_or_update currently fetches row by id without checking deleted = false, then search filters deleted
rows later. Id either skip indexing deleted rows or make delete/update semantics explicit.
7. Add typed fields for numbers/dates if you need range queries.
Right now numbers are converted to strings. Good for text search, bad for real numeric filtering/sorting. Tantivy
can do numeric/date fields, but JSON text fields are not enough for robust range search.
8. Decide column-name strategy.
Indexing lowercases raw DB JSON keys. If UI uses display names/aliases, column constraints can miss unless the
frontend sends exactly what the index expects. Id centralize display-name to physical-name mapping before
search.
9. Add delete hooks for table/profile deletion.
When a table or profile is deleted, the matching Tantivy docs/index directory should be cleaned by code, not
manually.

View File

@@ -0,0 +1,18 @@
[package]
name = "validation-core"
version.workspace = true
edition.workspace = true
license.workspace = true
authors.workspace = true
description = "Shared validation primitives, rules, and sets."
repository.workspace = true
[dependencies]
serde = { workspace = true }
thiserror = { workspace = true }
unicode-width = { workspace = true }
regex = { workspace = true, optional = true }
[features]
default = []
regex = ["dep:regex"]

View File

@@ -0,0 +1,493 @@
# Validation
This document is the frontend guide for the validation system.
The important idea: reusable validation is built from **rules** and **sets**.
The frontend creates and manages those. When a set is applied to a table field,
the server resolves it into the existing `FieldValidation` shape, and the form
runtime continues to work through the normal table-validation flow.
## Ownership
```mermaid
flowchart LR
Core[validation-core<br/>validation meaning<br/>rule/set merge rules]
Server[server<br/>stores rules/sets<br/>applies sets<br/>enforces writes]
Common[common/proto<br/>gRPC contract]
Client[client/frontend<br/>rule/set UI<br/>calls gRPC]
Canvas[canvas<br/>field editing<br/>mask display<br/>local feedback]
Server --> Core
Canvas --> Core
Client --> Common
Server --> Common
Client --> Canvas
```
`server` stores simple serializable settings. `validation-core` owns how those
settings combine. `canvas` uses resolved field validation to guide editing.
## Terms
| Term | Meaning |
| --- | --- |
| `FieldValidation` | Existing per-column validation config from `common/proto/table_validation.proto`. This is what forms/canvas already consume. |
| `ValidationRule` | One named reusable fragment, for example `digits-only`, `phone-length`, or `required`. Stored by the server as a `FieldValidation` fragment with no meaningful `dataKey`. |
| `ValidationSet` | Ordered collection of rule names, for example `phone = [required, phone-length, digits-only, phone-mask]`. |
| Applied validation | A resolved snapshot of a set written to `table_validation_rules` for a concrete `(table, dataKey)`. |
| Snapshot | Applying a set copies the resolved config to a field. Later edits to the set do not automatically update fields that were already applied. |
## What Backend Enforces
Backend write validation enforces only server-relevant parts:
| FieldValidation part | Backend | Canvas/frontend |
| --- | --- | --- |
| `required` | Yes | Yes |
| `limits` | Yes | Yes |
| `pattern` | Yes | Yes |
| `allowed_values` | Yes | Yes |
| `mask` | Partly: raw value length/literals | Yes: display/editing mask |
| `formatter` | No | Yes |
| `external_validation_enabled` | No | Yes/UI hint |
`mask` is visual metadata, but the backend still uses it to reject incorrectly
submitted raw values. Example: if the mask is `(###) ###-####`, the backend
expects the stored value to be raw digits, not `(123) 456-7890`.
## Main User Flow
```mermaid
sequenceDiagram
participant UI as Frontend UI
participant API as TableValidationService
participant DB as Server DB
participant Form as Existing Form Runtime
UI->>API: UpsertValidationRule(required)
UI->>API: UpsertValidationRule(digits-only)
UI->>API: UpsertValidationRule(phone-length)
UI->>API: UpsertValidationSet(phone: [required, phone-length, digits-only])
UI->>API: ApplyValidationSet(profile, table, dataKey, phone)
API->>DB: write resolved FieldValidation snapshot
Form->>API: GetTableValidation(profile, table)
API->>Form: resolved FieldValidation for dataKey
```
After `ApplyValidationSet`, the existing form code does not need to know that a
set was used. It receives normal `FieldValidation`.
## API
All APIs live on `TableValidationService`.
### Rules
Create or update one reusable rule:
```text
UpsertValidationRule(UpsertValidationRuleRequest)
```
Request shape:
```text
profileName: string
rule:
name: string
description: optional string
validation: FieldValidation
```
Frontend rules:
- `rule.name` is required and unique inside a profile.
- `rule.validation.dataKey` is ignored by the server.
- A rule should usually configure one logical fragment.
- Examples: `required`, `phone-length`, `digits-only`, `phone-mask`.
List rules:
```text
ListValidationRules({ profileName })
```
Delete rule:
```text
DeleteValidationRule({ profileName, name })
```
Deleting a rule removes it from future reusable composition. Already applied
field snapshots are not changed.
### Sets
Create or update one reusable set:
```text
UpsertValidationSet(UpsertValidationSetRequest)
```
Request shape:
```text
profileName: string
set:
name: string
description: optional string
ruleItems: repeated ValidationSetRuleItem
```
Frontend rules:
- `set.name` is required and unique inside a profile.
- `ruleItems` must contain at least one item.
- `ruleItems` are ordered.
- Every global rule reference must already exist.
- Duplicate rule names in the same set are rejected.
- Conflicting singleton fragments are rejected.
Singleton fragments are:
```text
limits
allowed_values
mask
formatter
```
That means a set cannot currently contain two rules that both define `limits`.
Pattern rules are additive: multiple rules with `pattern` are merged into one
combined pattern.
List sets:
```text
ListValidationSets({ profileName })
```
Response includes each set plus `resolvedValidation`, so the frontend can show
what the set expands to.
Delete set:
```text
DeleteValidationSet({ profileName, name })
```
Deleting a set does not change already applied fields.
### Apply Set To Field
Apply a reusable set to one field:
```text
ApplyValidationSet(ApplyValidationSetRequest)
```
Request shape:
```text
profileName: string
tableName: string
dataKey: string
setName: string
```
Server behavior:
1. Loads the set.
2. Loads its ordered rules.
3. Resolves/merges them through `validation-core`.
4. Validates that `dataKey` exists in the table definition.
5. Writes the resolved config into existing `table_validation_rules`.
This is a snapshot. If the user later edits the `phone` set, fields that already
used `phone` keep their old resolved config until the set is applied again.
## FieldValidation Guide
Rules and direct field validation both use `FieldValidation`.
### Required
```text
required: true
```
Backend rejects missing or empty values.
### Limits
```text
limits:
min: 10
max: 10
warnAt: optional
countMode: CHARS | BYTES | DISPLAY_WIDTH
```
Backend enforces `min` and `max`. `warnAt` is mainly UI feedback.
### Pattern
Pattern rules validate characters at positions.
Example digits-only:
```text
pattern:
rules:
- position:
kind: PATTERN_POSITION_FROM
start: 0
constraint:
kind: CHARACTER_CONSTRAINT_NUMERIC
```
Useful constraints:
```text
CHARACTER_CONSTRAINT_ALPHABETIC
CHARACTER_CONSTRAINT_NUMERIC
CHARACTER_CONSTRAINT_ALPHANUMERIC
CHARACTER_CONSTRAINT_EXACT
CHARACTER_CONSTRAINT_ONE_OF
CHARACTER_CONSTRAINT_REGEX
```
Pattern fragments from multiple rules are merged.
### Allowed Values
```text
allowed_values:
values: ["open", "closed"]
allow_empty: false
case_insensitive: true
```
Backend rejects values not in the list.
### Mask
```text
mask:
pattern: "(###) ###-####"
input_char: "#"
template_char: "_"
```
Canvas uses this for display/editing. Backend expects raw values without mask
literals.
### Formatter
```text
formatter:
type: "PhoneFormatter"
options: []
description: optional
```
Formatter is resolved client-side. Backend stores it but does not execute it.
### External Validation
```text
external_validation_enabled: true
```
This is a frontend/UI hint. Backend stores it but does not perform external
validation.
## Recommended Frontend Screens
### Rule List
Show all rules for a profile.
Actions:
```text
create rule
edit rule
delete rule
preview rule config
```
### Rule Editor
Build a `ValidationRuleDefinition`.
Recommended UI:
```text
name
description
required toggle
limits section
pattern section
allowed values section
mask section
formatter section
external validation toggle
```
For v1, encourage one fragment per rule. Example: create `phone-length` and
`digits-only` separately, instead of one huge rule.
### Set List
Show all sets for a profile.
Use `ListValidationSets`, because it returns `resolvedValidation`.
Actions:
```text
create set
edit set
delete set
preview resolved validation
```
### Set Editor
Build a `ValidationSetDefinition`.
Recommended UI:
```text
name
description
ordered global/inline rule item picker
resolved preview
```
When rule ordering changes, call `UpsertValidationSet` and then refresh
`ListValidationSets`.
### Apply Set
On the table/field validation screen, add:
```text
Apply validation set
```
Flow:
1. Load sets with `ListValidationSets`.
2. User selects a set.
3. Call `ApplyValidationSet(profileName, tableName, dataKey, setName)`.
4. Refresh `GetTableValidation(profileName, tableName)`.
The field should now behave exactly like a directly configured field validation.
## Example: Phone
Create rule `required`:
```text
validation:
required: true
```
Create rule `phone-length`:
```text
validation:
limits:
min: 10
max: 10
countMode: CHARS
```
Create rule `digits-only`:
```text
validation:
pattern:
rules:
- position:
kind: PATTERN_POSITION_FROM
start: 0
constraint:
kind: CHARACTER_CONSTRAINT_NUMERIC
```
Create rule `phone-mask`:
```text
validation:
mask:
pattern: "(###) ###-####"
input_char: "#"
```
Create set `phone`:
```text
ruleItems:
- globalRuleName: required
- globalRuleName: phone-length
- globalRuleName: digits-only
- globalRuleName: phone-mask
```
Apply set:
```text
profileName: "default"
tableName: "customers"
dataKey: "customer_phone"
setName: "phone"
```
Then refresh:
```text
GetTableValidation(default, customers)
```
The response contains a normal `FieldValidation` for `customer_phone`.
## Important UX Notes
- Applying a set is not a live link.
- Editing a rule or set does not mutate fields where it was already applied.
- To update a field after set changes, apply the set again.
- If a set has conflicting singleton rules, the server rejects it.
- For now, the system does not store field metadata like `sourceSetName` on
applied fields. The field only stores the resolved validation snapshot.
## Files
Core model:
```text
validation-core/src/set.rs
validation-core/src/config.rs
```
Wire contract:
```text
common/proto/table_validation.proto
```
Server implementation:
```text
server/src/table_validation/get/service.rs
server/src/table_validation/post/repo.rs
server/src/table_validation/config.rs
```
Storage:
```text
server/migrations/20260506170000_create_validation_rules_and_sets.sql
```

View File

@@ -0,0 +1,311 @@
use crate::rules::{
CharacterFilter, CharacterLimits, DisplayMask, PatternFilters, PositionFilter, PositionRange,
};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use thiserror::Error;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AllowedValues {
pub values: Vec<String>,
pub allow_empty: bool,
pub case_insensitive: bool,
}
impl AllowedValues {
pub fn new(values: Vec<String>) -> Self {
Self {
values,
allow_empty: true,
case_insensitive: false,
}
}
pub fn allow_empty(mut self, allow_empty: bool) -> Self {
self.allow_empty = allow_empty;
self
}
pub fn case_insensitive(mut self, case_insensitive: bool) -> Self {
self.case_insensitive = case_insensitive;
self
}
pub fn matches(&self, text: &str) -> bool {
if self.case_insensitive {
self.values
.iter()
.any(|allowed| allowed.eq_ignore_ascii_case(text))
} else {
self.values.iter().any(|allowed| allowed == text)
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FormatterSettings {
pub formatter_type: String,
pub options: Vec<FormatterOption>,
pub description: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FormatterOption {
pub key: String,
pub value: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum CharacterFilterSettings {
Alphabetic,
Numeric,
Alphanumeric,
Exact(char),
OneOf(Vec<char>),
Regex(String),
}
impl CharacterFilterSettings {
pub fn resolve(&self) -> CharacterFilter {
match self {
Self::Alphabetic => CharacterFilter::Alphabetic,
Self::Numeric => CharacterFilter::Numeric,
Self::Alphanumeric => CharacterFilter::Alphanumeric,
Self::Exact(ch) => CharacterFilter::Exact(*ch),
Self::OneOf(chars) => CharacterFilter::OneOf(chars.clone()),
Self::Regex(pattern) => {
#[cfg(feature = "regex")]
{
match regex::Regex::new(pattern) {
Ok(regex) => CharacterFilter::Custom(Arc::new(move |ch| {
regex.is_match(&ch.to_string())
})),
Err(_) => CharacterFilter::Custom(Arc::new(|_| false)),
}
}
#[cfg(not(feature = "regex"))]
{
let _ = pattern;
CharacterFilter::Custom(Arc::new(|_| false))
}
}
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PositionFilterSettings {
pub positions: PositionRange,
pub filter: CharacterFilterSettings,
}
impl PositionFilterSettings {
pub fn resolve(&self) -> PositionFilter {
PositionFilter::new(self.positions.clone(), self.filter.resolve())
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct PatternSettings {
pub filters: Vec<PositionFilterSettings>,
pub description: Option<String>,
}
impl PatternSettings {
pub fn resolve(&self) -> PatternFilters {
PatternFilters::new().add_filters(
self.filters
.iter()
.map(PositionFilterSettings::resolve)
.collect(),
)
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ValidationSettings {
pub required: bool,
pub character_limits: Option<CharacterLimits>,
pub pattern: Option<PatternSettings>,
pub allowed_values: Option<AllowedValues>,
pub display_mask: Option<DisplayMask>,
pub formatter: Option<FormatterSettings>,
pub external_validation_enabled: bool,
}
impl ValidationSettings {
pub fn resolve(&self) -> ValidationConfig {
ValidationConfig {
required: self.required,
character_limits: self.character_limits.clone(),
pattern_filters: self.pattern.as_ref().map(PatternSettings::resolve),
allowed_values: self.allowed_values.clone(),
display_mask: self.display_mask.clone(),
formatter: self.formatter.clone(),
external_validation_enabled: self.external_validation_enabled,
}
}
pub fn merge_rules<'a>(
rules: impl IntoIterator<Item = &'a ValidationSettings>,
) -> Result<Self, ValidationMergeError> {
let mut merged = ValidationSettings::default();
for rule in rules {
merged.merge_rule(rule)?;
}
Ok(merged)
}
pub fn merge_rule(&mut self, rule: &ValidationSettings) -> Result<(), ValidationMergeError> {
self.required |= rule.required;
self.external_validation_enabled |= rule.external_validation_enabled;
merge_singleton(
"character_limits",
&mut self.character_limits,
&rule.character_limits,
)?;
merge_singleton(
"allowed_values",
&mut self.allowed_values,
&rule.allowed_values,
)?;
merge_singleton("display_mask", &mut self.display_mask, &rule.display_mask)?;
merge_singleton("formatter", &mut self.formatter, &rule.formatter)?;
if let Some(pattern) = &rule.pattern {
match &mut self.pattern {
Some(existing) => {
existing.filters.extend(pattern.filters.clone());
if existing.description.is_none() {
existing.description = pattern.description.clone();
}
}
None => self.pattern = Some(pattern.clone()),
}
}
Ok(())
}
}
fn merge_singleton<T: Clone>(
field_name: &'static str,
target: &mut Option<T>,
source: &Option<T>,
) -> Result<(), ValidationMergeError> {
if let Some(source) = source {
if target.is_some() {
return Err(ValidationMergeError::DuplicateSingleton { field_name });
}
*target = Some(source.clone());
}
Ok(())
}
#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum ValidationMergeError {
#[error("validation set contains more than one rule configuring {field_name}")]
DuplicateSingleton { field_name: &'static str },
}
#[derive(Debug, Clone, Default)]
pub struct ValidationConfig {
pub required: bool,
pub character_limits: Option<CharacterLimits>,
pub pattern_filters: Option<PatternFilters>,
pub allowed_values: Option<AllowedValues>,
pub display_mask: Option<DisplayMask>,
pub formatter: Option<FormatterSettings>,
pub external_validation_enabled: bool,
}
impl ValidationConfig {
pub fn validate_content(&self, text: &str) -> ValidationResult {
if text.is_empty() {
if self.required {
return ValidationResult::error("Value required");
}
if let Some(allowed_values) = &self.allowed_values {
if !allowed_values.allow_empty {
return ValidationResult::error("Empty value is not allowed");
}
}
return ValidationResult::Valid;
}
if let Some(limits) = &self.character_limits {
if let Some(result) = limits.validate_content(text) {
if !result.is_acceptable() {
return result;
}
}
}
if let Some(pattern_filters) = &self.pattern_filters {
if let Err(message) = pattern_filters.validate_text(text) {
return ValidationResult::error(message);
}
}
if let Some(allowed_values) = &self.allowed_values {
if !allowed_values.matches(text) {
return ValidationResult::error("Value must be one of the allowed options");
}
}
ValidationResult::Valid
}
pub fn has_validation(&self) -> bool {
self.required
|| self.character_limits.is_some()
|| self.pattern_filters.is_some()
|| self.allowed_values.is_some()
|| self.display_mask.is_some()
|| self.formatter.is_some()
|| self.external_validation_enabled
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ValidationResult {
Valid,
Warning { message: String },
Error { message: String },
}
impl ValidationResult {
pub fn is_acceptable(&self) -> bool {
matches!(self, Self::Valid | Self::Warning { .. })
}
pub fn is_error(&self) -> bool {
matches!(self, Self::Error { .. })
}
pub fn message(&self) -> Option<&str> {
match self {
Self::Valid => None,
Self::Warning { message } | Self::Error { message } => Some(message),
}
}
pub fn warning(message: impl Into<String>) -> Self {
Self::Warning {
message: message.into(),
}
}
pub fn error(message: impl Into<String>) -> Self {
Self::Error {
message: message.into(),
}
}
}

View File

@@ -0,0 +1,16 @@
pub mod config;
pub mod rules;
pub mod set;
pub use config::{
AllowedValues, CharacterFilterSettings, FormatterOption, FormatterSettings, PatternSettings,
PositionFilterSettings, ValidationConfig, ValidationMergeError, ValidationResult,
ValidationSettings,
};
pub use rules::{
count_text, CharacterFilter, CharacterLimits, CountMode, DisplayMask, LimitCheckResult,
MaskDisplayMode, PatternFilters, PositionFilter, PositionRange,
};
pub use set::{
AppliedValidation, ValidationRule, ValidationSet, ValidationSetItem, ValidationSetResolveError,
};

View File

@@ -0,0 +1,452 @@
// src/validation/limits.rs
//! Character limits validation implementation
use crate::ValidationResult;
use serde::{Deserialize, Serialize};
use unicode_width::UnicodeWidthStr;
/// Character limits configuration for a field
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CharacterLimits {
/// Maximum number of characters allowed (None = unlimited)
max_length: Option<usize>,
/// Minimum number of characters required (None = no minimum)
min_length: Option<usize>,
/// Warning threshold (warn when approaching max limit)
warning_threshold: Option<usize>,
/// Count mode: characters vs display width
count_mode: CountMode,
}
/// How to count characters for limit checking
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
pub enum CountMode {
/// Count actual characters (default)
#[default]
Characters,
/// Count display width (useful for CJK characters)
DisplayWidth,
/// Count bytes (rarely used, but available)
Bytes,
}
/// Result of a character limit check
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LimitCheckResult {
/// Within limits
Ok,
/// Approaching limit (warning)
Warning { current: usize, max: usize },
/// At or exceeding limit (error)
Exceeded { current: usize, max: usize },
/// Below minimum length
TooShort { current: usize, min: usize },
}
impl CharacterLimits {
/// Create new character limits with just max length
pub fn new(max_length: usize) -> Self {
Self {
max_length: Some(max_length),
min_length: None,
warning_threshold: None,
count_mode: CountMode::default(),
}
}
/// Create new character limits with min and max
pub fn new_range(min_length: usize, max_length: usize) -> Self {
Self {
max_length: Some(max_length),
min_length: Some(min_length),
warning_threshold: None,
count_mode: CountMode::default(),
}
}
/// Create new character limits with just minimum length
pub fn new_min(min_length: usize) -> Self {
Self {
max_length: None,
min_length: Some(min_length),
warning_threshold: None,
count_mode: CountMode::default(),
}
}
/// Create new character limits with only a warning threshold.
pub fn new_warning(threshold: usize) -> Self {
Self {
max_length: None,
min_length: None,
warning_threshold: Some(threshold),
count_mode: CountMode::default(),
}
}
/// Set warning threshold (when to show warning before hitting limit)
pub fn with_warning_threshold(mut self, threshold: usize) -> Self {
self.warning_threshold = Some(threshold);
self
}
/// Set count mode (characters vs display width vs bytes)
pub fn with_count_mode(mut self, mode: CountMode) -> Self {
self.count_mode = mode;
self
}
/// Get maximum length
pub fn max_length(&self) -> Option<usize> {
self.max_length
}
/// Get minimum length
pub fn min_length(&self) -> Option<usize> {
self.min_length
}
/// Get warning threshold
pub fn warning_threshold(&self) -> Option<usize> {
self.warning_threshold
}
/// Get count mode
pub fn count_mode(&self) -> CountMode {
self.count_mode
}
/// Count characters/width/bytes according to the configured mode
fn count(&self, text: &str) -> usize {
match self.count_mode {
CountMode::Characters => text.chars().count(),
CountMode::DisplayWidth => text.width(),
CountMode::Bytes => text.len(),
}
}
/// Check if inserting a character would exceed limits
pub fn validate_insertion(
&self,
current_text: &str,
position: usize,
character: char,
) -> Option<ValidationResult> {
let mut new_text = String::with_capacity(current_text.len() + character.len_utf8());
let mut chars = current_text.chars();
let clamped_pos = position.min(current_text.chars().count());
for _ in 0..clamped_pos {
if let Some(ch) = chars.next() {
new_text.push(ch);
}
}
new_text.push(character);
for ch in chars {
new_text.push(ch);
}
let new_count = self.count(&new_text);
let current_count = self.count(current_text);
if let Some(max) = self.max_length {
if new_count > max {
return Some(ValidationResult::error(format!(
"Character limit exceeded: {new_count}/{max}"
)));
}
if let Some(warning_threshold) = self.warning_threshold {
if new_count >= warning_threshold && current_count < warning_threshold {
return Some(ValidationResult::warning(format!(
"Approaching character limit: {new_count}/{max}"
)));
}
}
}
None // No validation issues
}
/// Validate the current content
pub fn validate_content(&self, text: &str) -> Option<ValidationResult> {
let count = self.count(text);
if let Some(min) = self.min_length {
if count < min {
return Some(ValidationResult::warning(format!(
"Minimum length not met: {count}/{min}"
)));
}
}
if let Some(max) = self.max_length {
if count > max {
return Some(ValidationResult::error(format!(
"Character limit exceeded: {count}/{max}"
)));
}
if let Some(warning_threshold) = self.warning_threshold {
if count >= warning_threshold {
return Some(ValidationResult::warning(format!(
"Approaching character limit: {count}/{max}"
)));
}
}
}
None // No validation issues
}
/// Get the current status of the text against limits
pub fn check_limits(&self, text: &str) -> LimitCheckResult {
let count = self.count(text);
if let Some(max) = self.max_length {
if count > max {
return LimitCheckResult::Exceeded {
current: count,
max,
};
}
if let Some(warning_threshold) = self.warning_threshold {
if count >= warning_threshold {
return LimitCheckResult::Warning {
current: count,
max,
};
}
}
}
// Check min length
if let Some(min) = self.min_length {
if count < min {
return LimitCheckResult::TooShort {
current: count,
min,
};
}
}
LimitCheckResult::Ok
}
/// Get a human-readable status string
pub fn status_text(&self, text: &str) -> Option<String> {
match self.check_limits(text) {
LimitCheckResult::Ok => {
// Show current/max if we have a max limit
self.max_length
.map(|max| format!("{}/{}", self.count(text), max))
}
LimitCheckResult::Warning { current, max } => {
Some(format!("{current}/{max} (approaching limit)"))
}
LimitCheckResult::Exceeded { current, max } => {
Some(format!("{current}/{max} (exceeded)"))
}
LimitCheckResult::TooShort { current, min } => Some(format!("{current}/{min} minimum")),
}
}
pub fn allows_field_switch(&self, text: &str) -> bool {
if let Some(min) = self.min_length {
let count = self.count(text);
// Allow switching if field is empty OR meets minimum requirement
count == 0 || count >= min
} else {
true // No minimum requirement, always allow switching
}
}
/// Get reason why field switching is not allowed (if any)
pub fn field_switch_block_reason(&self, text: &str) -> Option<String> {
if let Some(min) = self.min_length {
let count = self.count(text);
if count > 0 && count < min {
return Some(format!(
"Field must be empty or have at least {min} characters (currently: {count})"
));
}
}
None
}
}
pub fn count_text(text: &str, mode: CountMode) -> usize {
match mode {
CountMode::Characters => text.chars().count(),
CountMode::DisplayWidth => text.width(),
CountMode::Bytes => text.len(),
}
}
impl Default for CharacterLimits {
fn default() -> Self {
Self {
max_length: Some(30), // Default 30 character limit as specified
min_length: None,
warning_threshold: None,
count_mode: CountMode::default(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_character_limits_creation() {
let limits = CharacterLimits::new(10);
assert_eq!(limits.max_length(), Some(10));
assert_eq!(limits.min_length(), None);
let range_limits = CharacterLimits::new_range(5, 15);
assert_eq!(range_limits.min_length(), Some(5));
assert_eq!(range_limits.max_length(), Some(15));
}
#[test]
fn test_default_limits() {
let limits = CharacterLimits::default();
assert_eq!(limits.max_length(), Some(30));
}
#[test]
fn test_character_counting() {
let limits = CharacterLimits::new(5);
// Test character mode (default)
assert_eq!(limits.count("hello"), 5);
assert_eq!(limits.count("héllo"), 5); // Accented character counts as 1
// Test display width mode
let limits = limits.with_count_mode(CountMode::DisplayWidth);
assert_eq!(limits.count("hello"), 5);
// Test bytes mode
let limits = limits.with_count_mode(CountMode::Bytes);
assert_eq!(limits.count("hello"), 5);
assert_eq!(limits.count("héllo"), 6); // é takes 2 bytes in UTF-8
}
#[test]
fn test_insertion_validation() {
let limits = CharacterLimits::new(5);
// Valid insertion
let result = limits.validate_insertion("test", 4, 'x');
assert!(result.is_none()); // No validation issues
// Invalid insertion (would exceed limit)
let result = limits.validate_insertion("tests", 5, 'x');
assert!(result.is_some());
assert!(!result.unwrap().is_acceptable());
}
#[test]
fn test_content_validation() {
let limits = CharacterLimits::new_range(3, 10);
// Too short
let result = limits.validate_content("hi");
assert!(result.is_some());
assert!(result.unwrap().is_acceptable()); // Warning, not error
// Just right
let result = limits.validate_content("hello");
assert!(result.is_none());
// Too long
let result = limits.validate_content("hello world!");
assert!(result.is_some());
assert!(!result.unwrap().is_acceptable()); // Error
}
#[test]
fn test_warning_threshold() {
let limits = CharacterLimits::new(10).with_warning_threshold(8);
// Below warning threshold
let result = limits.validate_insertion("123456", 6, 'x');
assert!(result.is_none());
// At warning threshold
let result = limits.validate_insertion("1234567", 7, 'x');
assert!(result.is_some()); // This brings us to 8 chars
assert!(result.unwrap().is_acceptable()); // Warning, not error
let result = limits.validate_insertion("12345678", 8, 'x');
assert!(result.is_none());
}
#[test]
fn test_status_text() {
let limits = CharacterLimits::new(10);
assert_eq!(limits.status_text("hello"), Some("5/10".to_string()));
let limits = limits.with_warning_threshold(8);
assert_eq!(
limits.status_text("12345678"),
Some("8/10 (approaching limit)".to_string())
);
assert_eq!(
limits.status_text("1234567890x"),
Some("11/10 (exceeded)".to_string())
);
}
#[test]
fn test_field_switch_blocking() {
let limits = CharacterLimits::new_range(3, 10);
// Empty field: should allow switching
assert!(limits.allows_field_switch(""));
assert!(limits.field_switch_block_reason("").is_none());
// Field with content below minimum: should block switching
assert!(!limits.allows_field_switch("hi"));
assert!(limits.field_switch_block_reason("hi").is_some());
assert!(limits
.field_switch_block_reason("hi")
.unwrap()
.contains("at least 3 characters"));
// Field meeting minimum: should allow switching
assert!(limits.allows_field_switch("hello"));
assert!(limits.field_switch_block_reason("hello").is_none());
// Field exceeding maximum: should still allow switching (validation shows error but doesn't block)
assert!(limits.allows_field_switch("this is way too long"));
assert!(limits
.field_switch_block_reason("this is way too long")
.is_none());
}
#[test]
fn test_field_switch_no_minimum() {
let limits = CharacterLimits::new(10); // Only max, no minimum
// Should always allow switching when there's no minimum
assert!(limits.allows_field_switch(""));
assert!(limits.allows_field_switch("a"));
assert!(limits.allows_field_switch("hello"));
assert!(limits.field_switch_block_reason("").is_none());
assert!(limits.field_switch_block_reason("a").is_none());
}
}

View File

@@ -0,0 +1,348 @@
// src/validation/mask.rs
//! Pure display mask system - user-defined patterns only
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub enum MaskDisplayMode {
/// Only show separators as user types
/// Example: "" → "", "123" → "123", "12345" → "(123) 45"
#[default]
Dynamic,
/// Show full template with placeholders from start
/// Example: "" → "(___) ___-____", "123" → "(123) ___-____"
Template {
/// Character to use as placeholder for empty input positions
placeholder: char,
},
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DisplayMask {
/// Mask pattern like "##-##-####" where # = input position, others are visual separators
pattern: String,
/// Character used to represent input positions (usually '#')
input_char: char,
/// How to display the mask (dynamic vs template)
display_mode: MaskDisplayMode,
}
impl DisplayMask {
/// Create a new display mask with dynamic mode (current behavior)
///
/// # Arguments
/// * `pattern` - The mask pattern (e.g., "##-##-####", "(###) ###-####")
/// * `input_char` - Character representing input positions (usually '#')
///
/// # Examples
/// ```
/// use validation_core::DisplayMask;
///
/// // Phone number format
/// let phone_mask = DisplayMask::new("(###) ###-####", '#');
///
/// // Date format
/// let date_mask = DisplayMask::new("##/##/####", '#');
///
/// // Custom business format
/// let employee_id = DisplayMask::new("EMP-####-##", '#');
/// ```
pub fn new(pattern: impl Into<String>, input_char: char) -> Self {
Self {
pattern: pattern.into(),
input_char,
display_mode: MaskDisplayMode::Dynamic,
}
}
/// Set the display mode for this mask
///
/// # Examples
/// ```
/// use validation_core::{DisplayMask, MaskDisplayMode};
///
/// let dynamic_mask = DisplayMask::new("##-##", '#')
/// .with_mode(MaskDisplayMode::Dynamic);
///
/// let template_mask = DisplayMask::new("##-##", '#')
/// .with_mode(MaskDisplayMode::Template { placeholder: '_' });
/// ```
pub fn with_mode(mut self, mode: MaskDisplayMode) -> Self {
self.display_mode = mode;
self
}
/// Set template mode with custom placeholder
///
/// # Examples
/// ```
/// use validation_core::DisplayMask;
///
/// let phone_template = DisplayMask::new("(###) ###-####", '#')
/// .with_template('_'); // Shows "(___) ___-____" when empty
///
/// let date_dots = DisplayMask::new("##/##/####", '#')
/// .with_template('•'); // Shows "••/••/••••" when empty
/// ```
pub fn with_template(self, placeholder: char) -> Self {
self.with_mode(MaskDisplayMode::Template { placeholder })
}
/// Apply mask to raw input, showing visual separators and handling display mode
pub fn apply_to_display(&self, raw_input: &str) -> String {
match &self.display_mode {
MaskDisplayMode::Dynamic => self.apply_dynamic(raw_input),
MaskDisplayMode::Template { placeholder } => {
self.apply_template(raw_input, *placeholder)
}
}
}
/// Dynamic mode - only show separators as user types
fn apply_dynamic(&self, raw_input: &str) -> String {
if raw_input.is_empty() {
return String::new();
}
let mut result = String::new();
let mut raw_chars = raw_input.chars();
for pattern_char in self.pattern.chars() {
if pattern_char == self.input_char {
// Input position - take from raw input
if let Some(input_char) = raw_chars.next() {
result.push(input_char);
} else {
// No more input - stop here in dynamic mode
break;
}
} else {
// Visual separator - always show
result.push(pattern_char);
}
}
// Append any remaining raw characters that don't fit the pattern
for remaining_char in raw_chars {
result.push(remaining_char);
}
result
}
/// Template mode - show full pattern with placeholders
fn apply_template(&self, raw_input: &str, placeholder: char) -> String {
let mut result = String::new();
let mut raw_chars = raw_input.chars().peekable();
for pattern_char in self.pattern.chars() {
if pattern_char == self.input_char {
// Input position - take from raw input or use placeholder
if let Some(input_char) = raw_chars.next() {
result.push(input_char);
} else {
// No more input - use placeholder to show template
result.push(placeholder);
}
} else {
// Visual separator - always show in template mode
result.push(pattern_char);
}
}
// In template mode, we don't append extra characters beyond the pattern
// This keeps the template consistent
result
}
/// Check if a display position should accept cursor/input
pub fn is_input_position(&self, display_position: usize) -> bool {
self.pattern
.chars()
.nth(display_position)
.map(|c| c == self.input_char)
.unwrap_or(true) // Beyond pattern = accept input
}
/// Map display position to raw position
pub fn display_pos_to_raw_pos(&self, display_pos: usize) -> usize {
let mut raw_pos = 0;
for (i, pattern_char) in self.pattern.chars().enumerate() {
if i >= display_pos {
break;
}
if pattern_char == self.input_char {
raw_pos += 1;
}
}
raw_pos
}
/// Map raw position to display position
pub fn raw_pos_to_display_pos(&self, raw_pos: usize) -> usize {
let mut input_positions_seen = 0;
for (display_pos, pattern_char) in self.pattern.chars().enumerate() {
if pattern_char == self.input_char {
if input_positions_seen == raw_pos {
return display_pos;
}
input_positions_seen += 1;
}
}
// Beyond pattern, return position after pattern
self.pattern.len() + (raw_pos - input_positions_seen)
}
/// Find next input position at or after the given display position
pub fn next_input_position(&self, display_pos: usize) -> usize {
for (i, pattern_char) in self.pattern.chars().enumerate().skip(display_pos) {
if pattern_char == self.input_char {
return i;
}
}
// Beyond pattern = all positions are input positions
display_pos.max(self.pattern.len())
}
/// Find previous input position at or before the given display position
pub fn prev_input_position(&self, display_pos: usize) -> Option<usize> {
// Collect pattern chars with indices first, then search backwards
let pattern_chars: Vec<(usize, char)> = self.pattern.chars().enumerate().collect();
// Search backwards from display_pos
for &(i, pattern_char) in pattern_chars.iter().rev() {
if i <= display_pos && pattern_char == self.input_char {
return Some(i);
}
}
None
}
/// Get the display mode
pub fn display_mode(&self) -> &MaskDisplayMode {
&self.display_mode
}
/// Check if this mask uses template mode
pub fn is_template_mode(&self) -> bool {
matches!(self.display_mode, MaskDisplayMode::Template { .. })
}
/// Get the pattern string
pub fn pattern(&self) -> &str {
&self.pattern
}
/// Get the input placeholder character
pub fn input_char(&self) -> char {
self.input_char
}
/// Get the position of the first input character in the pattern
pub fn first_input_position(&self) -> usize {
for (pos, ch) in self.pattern.chars().enumerate() {
if ch == self.input_char {
return pos;
}
}
0
}
}
impl Default for DisplayMask {
fn default() -> Self {
Self::new("", '#')
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_user_defined_phone_mask() {
// User creates their own phone mask
let dynamic = DisplayMask::new("(###) ###-####", '#');
let template = DisplayMask::new("(###) ###-####", '#').with_template('_');
// Dynamic mode
assert_eq!(dynamic.apply_to_display(""), "");
assert_eq!(dynamic.apply_to_display("1234567890"), "(123) 456-7890");
// Template mode
assert_eq!(template.apply_to_display(""), "(___) ___-____");
assert_eq!(template.apply_to_display("123"), "(123) ___-____");
}
#[test]
fn test_user_defined_date_mask() {
// User creates their own date formats
let us_date = DisplayMask::new("##/##/####", '#');
let eu_date = DisplayMask::new("##.##.####", '#');
let iso_date = DisplayMask::new("####-##-##", '#');
assert_eq!(us_date.apply_to_display("12252024"), "12/25/2024");
assert_eq!(eu_date.apply_to_display("25122024"), "25.12.2024");
assert_eq!(iso_date.apply_to_display("20241225"), "2024-12-25");
}
#[test]
fn test_user_defined_business_formats() {
// User creates custom business formats
let employee_id = DisplayMask::new("EMP-####-##", '#');
let product_code = DisplayMask::new("###-###-###", '#');
let invoice = DisplayMask::new("INV####/##", '#');
assert_eq!(employee_id.apply_to_display("123456"), "EMP-1234-56");
assert_eq!(product_code.apply_to_display("123456789"), "123-456-789");
assert_eq!(invoice.apply_to_display("123456"), "INV1234/56");
}
#[test]
fn test_custom_input_characters() {
// User can define their own input character
let mask_with_x = DisplayMask::new("XXX-XX-XXXX", 'X');
let mask_with_hash = DisplayMask::new("###-##-####", '#');
let mask_with_n = DisplayMask::new("NNN-NN-NNNN", 'N');
assert_eq!(mask_with_x.apply_to_display("123456789"), "123-45-6789");
assert_eq!(mask_with_hash.apply_to_display("123456789"), "123-45-6789");
assert_eq!(mask_with_n.apply_to_display("123456789"), "123-45-6789");
}
#[test]
fn test_custom_placeholders() {
// User can define custom placeholder characters
let underscores = DisplayMask::new("##-##", '#').with_template('_');
let dots = DisplayMask::new("##-##", '#').with_template('•');
let dashes = DisplayMask::new("##-##", '#').with_template('-');
assert_eq!(underscores.apply_to_display(""), "__-__");
assert_eq!(dots.apply_to_display(""), "••-••");
assert_eq!(dashes.apply_to_display(""), "-----"); // Note: dashes blend with separator
}
#[test]
fn test_position_mapping_user_patterns() {
let custom = DisplayMask::new("ABC-###-XYZ", '#');
// Position mapping should work correctly with any pattern
assert_eq!(custom.raw_pos_to_display_pos(0), 4); // First # at position 4
assert_eq!(custom.raw_pos_to_display_pos(1), 5); // Second # at position 5
assert_eq!(custom.raw_pos_to_display_pos(2), 6); // Third # at position 6
assert_eq!(custom.display_pos_to_raw_pos(4), 0); // Position 4 -> first input
assert_eq!(custom.display_pos_to_raw_pos(5), 1); // Position 5 -> second input
assert_eq!(custom.display_pos_to_raw_pos(6), 2); // Position 6 -> third input
assert!(!custom.is_input_position(0)); // A
assert!(!custom.is_input_position(3)); // -
assert!(custom.is_input_position(4)); // #
assert!(!custom.is_input_position(8)); // Y
}
}

View File

@@ -0,0 +1,7 @@
pub mod character_limits;
pub mod display_mask;
pub mod pattern_rules;
pub use character_limits::{count_text, CharacterLimits, CountMode, LimitCheckResult};
pub use display_mask::{DisplayMask, MaskDisplayMode};
pub use pattern_rules::{CharacterFilter, PatternFilters, PositionFilter, PositionRange};

View File

@@ -0,0 +1,330 @@
// src/validation/patterns.rs
//! Position-based pattern filtering for validation
use serde::{Deserialize, Serialize};
use std::sync::Arc;
/// A filter that applies to specific character positions in a field
#[derive(Debug, Clone)]
pub struct PositionFilter {
/// Which positions this filter applies to
pub positions: PositionRange,
/// What type of character filter to apply
pub filter: CharacterFilter,
}
/// Defines which character positions a filter applies to
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum PositionRange {
/// Single position (e.g., position 3 only)
Single(usize),
/// Range of positions (e.g., positions 0-2, inclusive)
Range(usize, usize),
/// From position onwards (e.g., position 4 and beyond)
From(usize),
/// Multiple specific positions (e.g., positions 0, 2, 5)
Multiple(Vec<usize>),
}
/// Types of character filters that can be applied
pub enum CharacterFilter {
/// Allow only alphabetic characters (a-z, A-Z)
Alphabetic,
/// Allow only numeric characters (0-9)
Numeric,
/// Allow alphanumeric characters (a-z, A-Z, 0-9)
Alphanumeric,
/// Allow only exact character match
Exact(char),
/// Allow any character from the provided set
OneOf(Vec<char>),
/// Custom user-defined filter function
Custom(Arc<dyn Fn(char) -> bool + Send + Sync>),
}
// Manual implementations for Debug and Clone
impl std::fmt::Debug for CharacterFilter {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
CharacterFilter::Alphabetic => write!(f, "Alphabetic"),
CharacterFilter::Numeric => write!(f, "Numeric"),
CharacterFilter::Alphanumeric => write!(f, "Alphanumeric"),
CharacterFilter::Exact(ch) => write!(f, "Exact('{ch}')"),
CharacterFilter::OneOf(chars) => write!(f, "OneOf({chars:?})"),
CharacterFilter::Custom(_) => write!(f, "Custom(<function>)"),
}
}
}
impl Clone for CharacterFilter {
fn clone(&self) -> Self {
match self {
CharacterFilter::Alphabetic => CharacterFilter::Alphabetic,
CharacterFilter::Numeric => CharacterFilter::Numeric,
CharacterFilter::Alphanumeric => CharacterFilter::Alphanumeric,
CharacterFilter::Exact(ch) => CharacterFilter::Exact(*ch),
CharacterFilter::OneOf(chars) => CharacterFilter::OneOf(chars.clone()),
CharacterFilter::Custom(func) => CharacterFilter::Custom(Arc::clone(func)),
}
}
}
impl PositionRange {
/// Check if a position is included in this range
pub fn contains(&self, position: usize) -> bool {
match self {
PositionRange::Single(pos) => position == *pos,
PositionRange::Range(start, end) => position >= *start && position <= *end,
PositionRange::From(start) => position >= *start,
PositionRange::Multiple(positions) => positions.contains(&position),
}
}
/// Get all positions up to a given length that this range covers
pub fn positions_up_to(&self, max_length: usize) -> Vec<usize> {
match self {
PositionRange::Single(pos) => {
if *pos < max_length {
vec![*pos]
} else {
vec![]
}
}
PositionRange::Range(start, end) => {
let actual_end = (*end).min(max_length.saturating_sub(1));
if *start <= actual_end {
(*start..=actual_end).collect()
} else {
vec![]
}
}
PositionRange::From(start) => {
if *start < max_length {
(*start..max_length).collect()
} else {
vec![]
}
}
PositionRange::Multiple(positions) => positions
.iter()
.filter(|&&pos| pos < max_length)
.copied()
.collect(),
}
}
}
impl CharacterFilter {
/// Test if a character passes this filter
pub fn accepts(&self, ch: char) -> bool {
match self {
CharacterFilter::Alphabetic => ch.is_alphabetic(),
CharacterFilter::Numeric => ch.is_numeric(),
CharacterFilter::Alphanumeric => ch.is_alphanumeric(),
CharacterFilter::Exact(expected) => ch == *expected,
CharacterFilter::OneOf(chars) => chars.contains(&ch),
CharacterFilter::Custom(func) => func(ch),
}
}
/// Get a human-readable description of this filter
pub fn description(&self) -> String {
match self {
CharacterFilter::Alphabetic => "alphabetic characters (a-z, A-Z)".to_string(),
CharacterFilter::Numeric => "numeric characters (0-9)".to_string(),
CharacterFilter::Alphanumeric => "alphanumeric characters (a-z, A-Z, 0-9)".to_string(),
CharacterFilter::Exact(ch) => format!("exactly '{ch}'"),
CharacterFilter::OneOf(chars) => {
let char_list: String = chars.iter().collect();
format!("one of: {char_list}")
}
CharacterFilter::Custom(_) => "custom filter".to_string(),
}
}
}
impl PositionFilter {
/// Create a new position filter
pub fn new(positions: PositionRange, filter: CharacterFilter) -> Self {
Self { positions, filter }
}
/// Validate a character at a specific position
pub fn validate_position(&self, position: usize, character: char) -> bool {
if self.positions.contains(position) {
self.filter.accepts(character)
} else {
true // Position not covered by this filter, allow any character
}
}
/// Get error message for invalid character at position
pub fn error_message(&self, position: usize, character: char) -> Option<String> {
if self.positions.contains(position) && !self.filter.accepts(character) {
Some(format!(
"Position {} requires {} but got '{}'",
position,
self.filter.description(),
character
))
} else {
None
}
}
}
/// A collection of position filters for a field
#[derive(Debug, Clone, Default)]
pub struct PatternFilters {
filters: Vec<PositionFilter>,
}
impl PatternFilters {
/// Create empty pattern filters
pub fn new() -> Self {
Self::default()
}
/// Add a position filter
pub fn add_filter(mut self, filter: PositionFilter) -> Self {
self.filters.push(filter);
self
}
/// Add multiple filters
pub fn add_filters(mut self, filters: Vec<PositionFilter>) -> Self {
self.filters.extend(filters);
self
}
/// Validate a character at a specific position against all applicable filters
pub fn validate_char_at_position(
&self,
position: usize,
character: char,
) -> Result<(), String> {
for filter in &self.filters {
if let Some(error) = filter.error_message(position, character) {
return Err(error);
}
}
Ok(())
}
/// Validate entire text against all filters
pub fn validate_text(&self, text: &str) -> Result<(), String> {
for (position, character) in text.char_indices() {
self.validate_char_at_position(position, character)?
}
Ok(())
}
/// Check if any filters are configured
pub fn has_filters(&self) -> bool {
!self.filters.is_empty()
}
/// Get all configured filters
pub fn filters(&self) -> &[PositionFilter] {
&self.filters
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_position_range_contains() {
assert!(PositionRange::Single(3).contains(3));
assert!(!PositionRange::Single(3).contains(2));
assert!(PositionRange::Range(1, 4).contains(3));
assert!(!PositionRange::Range(1, 4).contains(5));
assert!(PositionRange::From(2).contains(5));
assert!(!PositionRange::From(2).contains(1));
assert!(PositionRange::Multiple(vec![0, 2, 5]).contains(2));
assert!(!PositionRange::Multiple(vec![0, 2, 5]).contains(3));
}
#[test]
fn test_position_range_positions_up_to() {
assert_eq!(PositionRange::Single(3).positions_up_to(5), vec![3]);
assert_eq!(PositionRange::Single(5).positions_up_to(3), vec![]);
assert_eq!(PositionRange::Range(1, 3).positions_up_to(5), vec![1, 2, 3]);
assert_eq!(PositionRange::Range(1, 5).positions_up_to(3), vec![1, 2]);
assert_eq!(PositionRange::From(2).positions_up_to(5), vec![2, 3, 4]);
assert_eq!(
PositionRange::Multiple(vec![0, 2, 5]).positions_up_to(4),
vec![0, 2]
);
}
#[test]
fn test_character_filter_accepts() {
assert!(CharacterFilter::Alphabetic.accepts('a'));
assert!(CharacterFilter::Alphabetic.accepts('Z'));
assert!(!CharacterFilter::Alphabetic.accepts('1'));
assert!(CharacterFilter::Numeric.accepts('5'));
assert!(!CharacterFilter::Numeric.accepts('a'));
assert!(CharacterFilter::Alphanumeric.accepts('a'));
assert!(CharacterFilter::Alphanumeric.accepts('5'));
assert!(!CharacterFilter::Alphanumeric.accepts('-'));
assert!(CharacterFilter::Exact('x').accepts('x'));
assert!(!CharacterFilter::Exact('x').accepts('y'));
assert!(CharacterFilter::OneOf(vec!['a', 'b', 'c']).accepts('b'));
assert!(!CharacterFilter::OneOf(vec!['a', 'b', 'c']).accepts('d'));
}
#[test]
fn test_position_filter_validation() {
let filter = PositionFilter::new(PositionRange::Range(0, 1), CharacterFilter::Alphabetic);
assert!(filter.validate_position(0, 'A'));
assert!(filter.validate_position(1, 'b'));
assert!(!filter.validate_position(0, '1'));
assert!(filter.validate_position(2, '1')); // Position 2 not covered, allow anything
}
#[test]
fn test_pattern_filters_validation() {
let patterns = PatternFilters::new()
.add_filter(PositionFilter::new(
PositionRange::Range(0, 1),
CharacterFilter::Alphabetic,
))
.add_filter(PositionFilter::new(
PositionRange::Range(2, 4),
CharacterFilter::Numeric,
));
// Valid pattern: AB123
assert!(patterns.validate_text("AB123").is_ok());
// Invalid: number in alphabetic position
assert!(patterns.validate_text("A1123").is_err());
// Invalid: letter in numeric position
assert!(patterns.validate_text("AB1A3").is_err());
}
#[test]
fn test_custom_filter() {
let pattern = PatternFilters::new().add_filter(PositionFilter::new(
PositionRange::From(0),
CharacterFilter::Custom(Arc::new(|c| c.is_lowercase())),
));
assert!(pattern.validate_text("hello").is_ok());
assert!(pattern.validate_text("Hello").is_err()); // Uppercase not allowed
}
}

150
validation-core/src/set.rs Normal file
View File

@@ -0,0 +1,150 @@
use crate::{ValidationConfig, ValidationMergeError, ValidationSettings};
use serde::{Deserialize, Serialize};
use thiserror::Error;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidationRule {
pub name: String,
pub description: Option<String>,
pub settings: ValidationSettings,
}
impl ValidationRule {
pub fn resolve(&self) -> ValidationConfig {
self.settings.resolve()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidationSet {
pub name: String,
pub description: Option<String>,
pub items: Vec<ValidationSetItem>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ValidationSetItem {
GlobalRuleRef(String),
InlineRule {
name: Option<String>,
validation: ValidationSettings,
},
}
impl ValidationSet {
pub fn resolve_settings_with_rules<'a>(
&'a self,
rules: impl Fn(&str) -> Option<&'a ValidationRule>,
) -> Result<ValidationSettings, ValidationSetResolveError> {
let settings = self.items.iter().map(|item| match item {
ValidationSetItem::GlobalRuleRef(name) => {
rules(name).map(|rule| &rule.settings).ok_or_else(|| {
ValidationSetResolveError::MissingGlobalRule { name: name.clone() }
})
}
ValidationSetItem::InlineRule { validation, .. } => Ok(validation),
});
let settings = settings.collect::<Result<Vec<_>, _>>()?;
Ok(ValidationSettings::merge_rules(settings)?)
}
pub fn resolve_with_rules<'a>(
&'a self,
rules: impl Fn(&str) -> Option<&'a ValidationRule>,
) -> Result<ValidationConfig, ValidationSetResolveError> {
Ok(self.resolve_settings_with_rules(rules)?.resolve())
}
}
#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum ValidationSetResolveError {
#[error("validation set references missing global rule '{name}'")]
MissingGlobalRule { name: String },
#[error(transparent)]
Merge(#[from] ValidationMergeError),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AppliedValidation {
pub set_name: Option<String>,
pub settings: ValidationSettings,
}
impl AppliedValidation {
pub fn resolve(&self) -> ValidationConfig {
self.settings.resolve()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
CharacterFilterSettings, CharacterLimits, PatternSettings, PositionFilterSettings,
PositionRange,
};
#[test]
fn validation_set_merges_rule_fragments() {
let set = ValidationSet {
name: "phone".to_string(),
description: None,
items: vec![
ValidationSetItem::InlineRule {
name: Some("phone-length".to_string()),
validation: ValidationSettings {
character_limits: Some(CharacterLimits::new_range(10, 15)),
..ValidationSettings::default()
},
},
ValidationSetItem::InlineRule {
name: Some("digits-only".to_string()),
validation: ValidationSettings {
pattern: Some(PatternSettings {
filters: vec![PositionFilterSettings {
positions: PositionRange::From(0),
filter: CharacterFilterSettings::Numeric,
}],
description: None,
}),
..ValidationSettings::default()
},
},
],
};
let settings = set
.resolve_settings_with_rules(|_| None)
.expect("set should resolve");
assert!(settings.character_limits.is_some());
assert_eq!(settings.pattern.expect("pattern").filters.len(), 1);
}
#[test]
fn validation_set_rejects_duplicate_singleton_rules() {
let set = ValidationSet {
name: "conflict".to_string(),
description: None,
items: vec![
ValidationSetItem::InlineRule {
name: Some("short".to_string()),
validation: ValidationSettings {
character_limits: Some(CharacterLimits::new(10)),
..ValidationSettings::default()
},
},
ValidationSetItem::InlineRule {
name: Some("long".to_string()),
validation: ValidationSettings {
character_limits: Some(CharacterLimits::new(20)),
..ValidationSettings::default()
},
},
],
};
assert!(set.resolve_settings_with_rules(|_| None).is_err());
}
}