Compare commits

..

15 Commits

Author SHA1 Message Date
Priec
17a13569d8 cargo fmt 2026-05-06 20:33:53 +02:00
Priec
14f88e6a40 validation docs 2026-05-06 20:29:05 +02:00
Priec
3373e00dfc validation core as a dependency2 2026-05-06 19:50:09 +02:00
Priec
f094346e1b validation core as a dependency 2026-05-06 19:03:26 +02:00
Priec
3b0133640f more advancements 2026-05-03 23:34:03 +02:00
Priec
0600d3deaa table validation for the client from the server 2026-05-03 10:34:59 +02:00
Priec
90f8aedc3b better new functionality of column aliases 2026-05-02 13:56:45 +02:00
Priec
2a811b1f8c rename the column aliases 2026-05-02 00:38:54 +02:00
Priec
1f9c29411e multiple requests to the structure of a tables at once(batching) 2026-04-30 11:48:03 +02:00
Priec
b928004c76 search with multiquery redesigned 2026-04-29 19:56:17 +02:00
Priec
fb4769301c column name indexing 2026-04-29 01:33:48 +02:00
Priec
036e12f345 indexing done by the profile and not table 2026-04-29 01:08:59 +02:00
Priec
1ceab57f3b exact search endpoint 2026-04-29 00:40:36 +02:00
Priec
5de1cd7623 refactoring search based on the profile 2026-04-29 00:38:42 +02:00
Priec
1867de513d get profile details with scripts and tables columns is now working 2026-04-27 22:01:17 +02:00
29 changed files with 4874 additions and 432 deletions

1
.gitignore vendored
View File

@@ -6,3 +6,4 @@ steel_decimal/tests/property_tests.proptest-regressions
.direnv/
canvas/*.toml
.aider*
.codex

45
Cargo.lock generated
View File

@@ -493,7 +493,7 @@ checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
[[package]]
name = "canvas"
version = "0.6.0"
version = "0.6.7"
dependencies = [
"anyhow",
"async-trait",
@@ -512,6 +512,7 @@ dependencies = [
"tracing",
"tracing-subscriber",
"unicode-width 0.2.0",
"validation-core",
]
[[package]]
@@ -585,7 +586,7 @@ dependencies = [
[[package]]
name = "client"
version = "0.6.0"
version = "0.6.7"
dependencies = [
"anyhow",
"async-trait",
@@ -596,6 +597,7 @@ dependencies = [
"dotenvy",
"futures",
"lazy_static",
"nucleo",
"prost 0.13.5",
"prost-types 0.13.5",
"ratatui",
@@ -640,7 +642,7 @@ dependencies = [
[[package]]
name = "common"
version = "0.6.0"
version = "0.6.7"
dependencies = [
"prost 0.13.5",
"prost-build 0.14.1",
@@ -2103,6 +2105,27 @@ dependencies = [
"windows-sys 0.60.2",
]
[[package]]
name = "nucleo"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5262af4c94921c2646c5ac6ff7900c2af9cbb08dc26a797e18130a7019c039d4"
dependencies = [
"nucleo-matcher",
"parking_lot",
"rayon",
]
[[package]]
name = "nucleo-matcher"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf33f538733d1a5a3494b836ba913207f14d9d4a1d3cd67030c5061bdd2cac85"
dependencies = [
"memchr",
"unicode-segmentation",
]
[[package]]
name = "num-bigint"
version = "0.4.6"
@@ -3094,7 +3117,7 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
[[package]]
name = "search"
version = "0.6.0"
version = "0.6.7"
dependencies = [
"anyhow",
"common",
@@ -3193,7 +3216,7 @@ dependencies = [
[[package]]
name = "server"
version = "0.6.0"
version = "0.6.7"
dependencies = [
"anyhow",
"bcrypt",
@@ -3230,7 +3253,9 @@ dependencies = [
"tonic-reflection",
"tracing",
"tracing-subscriber",
"unicode-width 0.2.0",
"uuid",
"validation-core",
"validator",
]
@@ -4522,6 +4547,16 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "validation-core"
version = "0.6.7"
dependencies = [
"regex",
"serde",
"thiserror 2.0.12",
"unicode-width 0.2.0",
]
[[package]]
name = "validator"
version = "0.20.0"

View File

@@ -1,11 +1,11 @@
[workspace]
members = ["client", "server", "common", "search", "canvas"]
members = ["client", "server", "common", "search", "canvas", "validation-core"]
resolver = "2"
[workspace.package]
# TODO: idk how to do the name, fix later
# name = "komp_ac"
version = "0.6.2"
version = "0.6.7"
edition = "2021"
license = "GPL-3.0-or-later"
authors = ["Filip Priečinský <filippriec@gmail.com>"]
@@ -53,3 +53,4 @@ toml = "0.8.20"
unicode-width = "0.2.0"
common = { path = "./common" }
validation-core = { path = "./validation-core" }

2
canvas

Submodule canvas updated: 812ac2a428...e6c942dd41

2
client

Submodule client updated: 2494066140...25a901ff5e

View File

@@ -24,6 +24,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
".komp_ac.table_validation.PatternRule",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.PatternPosition",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.CharacterConstraint",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.PatternRules",
"#[derive(serde::Serialize, serde::Deserialize)]",
@@ -32,6 +40,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
".komp_ac.table_validation.CustomFormatter",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.FormatterOption",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.AllowedValues",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.UpdateFieldValidationRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
@@ -40,11 +56,91 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
".komp_ac.table_validation.UpdateFieldValidationResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ReplaceTableValidationRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ReplaceTableValidationResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ValidationRuleDefinition",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ValidationSetDefinition",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.UpsertValidationRuleRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.UpsertValidationRuleResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ListValidationRulesRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ListValidationRulesResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.DeleteValidationRuleRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.DeleteValidationRuleResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.UpsertValidationSetRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.UpsertValidationSetResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ListValidationSetsRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ListValidationSetsResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.DeleteValidationSetRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.DeleteValidationSetResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ApplyValidationSetRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
".komp_ac.table_validation.ApplyValidationSetResponse",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
// Enum -> readable strings in JSON ("BYTES", "DISPLAY_WIDTH")
.type_attribute(
".komp_ac.table_validation.CountMode",
"#[derive(serde::Serialize, serde::Deserialize)] #[serde(rename_all = \"SCREAMING_SNAKE_CASE\")]",
)
.type_attribute(
".komp_ac.table_validation.PatternPositionKind",
"#[derive(serde::Serialize, serde::Deserialize)] #[serde(rename_all = \"SCREAMING_SNAKE_CASE\")]",
)
.type_attribute(
".komp_ac.table_validation.CharacterConstraintKind",
"#[derive(serde::Serialize, serde::Deserialize)] #[serde(rename_all = \"SCREAMING_SNAKE_CASE\")]",
)
.type_attribute(
".komp_ac.table_definition.ColumnDefinition",
"#[derive(serde::Serialize, serde::Deserialize)]",
@@ -61,6 +157,26 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
".komp_ac.table_definition.TableDefinitionResponse",
"#[derive(serde::Serialize, serde::Deserialize)]"
)
.type_attribute(
".komp_ac.table_definition.GetColumnAliasRenameHistoryRequest",
"#[derive(serde::Serialize, serde::Deserialize)]"
)
.type_attribute(
".komp_ac.table_definition.ColumnAliasRenameHistoryEntry",
"#[derive(serde::Serialize, serde::Deserialize)]"
)
.type_attribute(
".komp_ac.table_definition.GetColumnAliasRenameHistoryResponse",
"#[derive(serde::Serialize, serde::Deserialize)]"
)
.type_attribute(
".komp_ac.table_definition.RenameColumnAliasRequest",
"#[derive(serde::Serialize, serde::Deserialize)]"
)
.type_attribute(
".komp_ac.table_definition.RenameColumnAliasResponse",
"#[derive(serde::Serialize, serde::Deserialize)]"
)
.type_attribute(
".komp_ac.table_script.PostTableScriptRequest",
"#[derive(serde::Serialize, serde::Deserialize)]",

View File

@@ -3,18 +3,34 @@ syntax = "proto3";
package komp_ac.search;
service Searcher {
rpc SearchTable(SearchRequest) returns (SearchResponse);
rpc Search(SearchRequest) returns (SearchResponse);
}
enum MatchMode {
MATCH_MODE_UNSPECIFIED = 0;
MATCH_MODE_FUZZY = 1;
MATCH_MODE_EXACT = 2;
}
message ColumnConstraint {
string column = 1;
string query = 2;
MatchMode mode = 3;
}
message SearchRequest {
string table_name = 1;
string query = 2;
string profile_name = 1;
optional string table_name = 2;
string free_query = 3;
repeated ColumnConstraint must = 4;
optional uint32 limit = 5;
}
message SearchResponse {
message Hit {
int64 id = 1; // PostgreSQL row ID
float score = 2;
string content_json = 3;
string table_name = 4;
}
repeated Hit hits = 1;
}

View File

@@ -18,6 +18,16 @@ service TableDefinition {
// This provides a tree-like overview of table relationships.
rpc GetProfileTree(komp_ac.common.Empty) returns (ProfileTreeResponse);
// Fetches all tables with their columns and scripts for a specific profile.
// Pure data retrieval - no business logic.
rpc GetProfileDetails(GetProfileDetailsRequest) returns (GetProfileDetailsResponse);
// Returns the stored rename history for column aliases in one profile.
rpc GetColumnAliasRenameHistory(GetColumnAliasRenameHistoryRequest) returns (GetColumnAliasRenameHistoryResponse);
// Renames a user-visible column alias while keeping the physical column unchanged.
rpc RenameColumnAlias(RenameColumnAliasRequest) returns (RenameColumnAliasResponse);
// Drops a table and its metadata, then deletes the profile if it becomes empty.
rpc DeleteTable(DeleteTableRequest) returns (DeleteTableResponse);
}
@@ -119,6 +129,74 @@ message ProfileTreeResponse {
repeated Profile profiles = 1;
}
// Request to fetch all tables, columns and scripts for a profile.
message GetProfileDetailsRequest {
// Profile (schema) name to fetch details for.
string profile_name = 1;
}
// Response with all tables, columns and scripts for a profile.
message GetProfileDetailsResponse {
string profile_name = 1;
repeated TableDetail tables = 2;
}
// Request to fetch recorded column alias rename history for one profile.
message GetColumnAliasRenameHistoryRequest {
string profile_name = 1;
// Optional filter. When omitted, returns all tables in the profile.
optional int64 table_definition_id = 2;
}
// One recorded column alias rename.
message ColumnAliasRenameHistoryEntry {
int64 id = 1;
string profile_name = 2;
int64 table_definition_id = 3;
string table_name = 4;
string old_column_name = 5;
string new_column_name = 6;
string created_at = 7;
}
// Response with stored column alias rename history rows.
message GetColumnAliasRenameHistoryResponse {
string profile_name = 1;
repeated ColumnAliasRenameHistoryEntry entries = 2;
}
// Describes a table with its columns and associated scripts.
message TableDetail {
string name = 1;
int64 id = 2;
repeated ColumnDefinition columns = 3;
repeated ScriptInfo scripts = 4;
}
// A script that targets a specific column in a table.
message ScriptInfo {
int64 script_id = 1;
string target_column = 2;
string target_column_type = 3;
string script = 4;
string description = 5;
}
// Request to rename one user-visible column alias in a table.
message RenameColumnAliasRequest {
string profile_name = 1;
string table_name = 2;
string old_column_name = 3;
string new_column_name = 4;
}
// Response after renaming one column alias.
message RenameColumnAliasResponse {
bool success = 1;
string message = 2;
}
// Request to delete one table definition entirely.
message DeleteTableRequest {
// Profile (schema) name owning the table (must exist).

View File

@@ -4,40 +4,45 @@ package komp_ac.table_structure;
import "common.proto";
// Introspects the physical PostgreSQL table for a given logical table
// (defined in table_definitions) and returns its column structure.
// Introspects the physical PostgreSQL tables for one or more logical tables
// (defined in table_definitions) and returns their column structures.
// The server validates that:
// - The profile (schema) exists in `schemas`
// - The table is defined for that profile in `table_definitions`
// It then queries information_schema for the physical table and returns
// normalized column metadata. If the physical table is missing despite
// a definition, the response may contain an empty `columns` list.
// - Every table is defined for that profile in `table_definitions`
// It then queries information_schema for the physical tables and returns
// normalized column metadata.
service TableStructureService {
// Return the physical column list (name, normalized data_type,
// nullability, primary key flag) for a table in a profile.
// nullability, primary key flag) for one or more tables in a profile.
//
// Behavior:
// - NOT_FOUND if profile doesn't exist in `schemas`
// - NOT_FOUND if table not defined for that profile in `table_definitions`
// - NOT_FOUND if any table is not defined for that profile in `table_definitions`
// - Queries information_schema.columns ordered by ordinal position
// - Normalizes data_type text (details under TableColumn.data_type)
// - Returns an empty list if the table is validated but has no visible
// columns in information_schema (e.g., physical table missing)
rpc GetTableStructure(GetTableStructureRequest) returns (TableStructureResponse);
// - Returns an error if any validated table has no visible columns in
// information_schema (e.g., physical table missing)
rpc GetTableStructure(GetTableStructureRequest) returns (GetTableStructureResponse);
}
// Request identifying the profile (schema) and table to inspect.
// Request identifying the profile (schema) and tables to inspect.
message GetTableStructureRequest {
// Required. Profile (PostgreSQL schema) name. Must exist in `schemas`.
string profile_name = 1;
// Required. Table name within the profile. Must exist in `table_definitions`
// for the given profile. The physical table is then introspected via
// information_schema.
string table_name = 2;
// Required. Table names within the profile. Each must exist in
// `table_definitions` for the given profile. The physical tables are then
// introspected via information_schema.
repeated string table_names = 2;
}
// Response with the ordered list of columns (by ordinal position).
// Batched response keyed by table name.
message GetTableStructureResponse {
// Per-table physical column lists keyed by requested table name.
map<string, TableStructureResponse> table_structures = 1;
}
// Response with the ordered list of columns (by ordinal position) for one table.
message TableStructureResponse {
// Columns of the physical table, including system columns (id, deleted,
// created_at), user-defined columns, and any foreign-key columns such as

View File

@@ -2,31 +2,55 @@
syntax = "proto3";
package komp_ac.table_validation;
// This proto is the canonical server-side storage and distribution contract for
// client validation configuration.
//
// Design goals:
// - The server stores the entire field validation definition in one structured payload.
// - Clients fetch the validation rules for a table in one batch and map them to
// their local validation/runtime system (for example canvas).
// - Common validation must be represented as typed data, not as string mini-languages.
//
// Important split:
// - limits / pattern / allowed_values / required are validation rules.
// - mask / formatter are presentation and input-shaping metadata for clients.
// Request validation rules for a table
message GetTableValidationRequest {
string profileName = 1;
string tableName = 2;
}
// Response with field-level validations; if a field is omitted,
// no validation is applied (default unspecified).
// Response with field-level validations for the whole table.
// If a field is omitted, no validation configuration exists for that field.
message TableValidationResponse {
repeated FieldValidation fields = 1;
}
// Field-level validation (extensible for future kinds)
// Field-level validation definition stored on the server and distributed to clients.
message FieldValidation {
// MUST match your frontend FormState.dataKey for the column
string dataKey = 1;
// Current: only CharacterLimits. More rules can be added later.
// Validation 1: length and counting rules.
CharacterLimits limits = 10;
// Future expansion:
PatternRules pattern = 11; // Validation 2
optional CustomFormatter formatter = 14; // Validation 4 custom formatting logic
// Validation 2: position-based character constraints.
PatternRules pattern = 11;
// Exact-value whitelist.
AllowedValues allowed_values = 12;
// Client-side hint that this field participates in external/asynchronous validation UI.
bool external_validation_enabled = 13;
// Client-side formatter metadata. This is intentionally data-only, not executable code.
optional CustomFormatter formatter = 14;
// Client-side display mask metadata. The server stores raw data without mask literals.
DisplayMask mask = 3;
// ExternalValidation external = 13;
// CustomFormatter formatter = 14;
// Field must be provided / treated as required by clients and server enforcement layers.
bool required = 4;
}
@@ -38,7 +62,8 @@ enum CountMode {
DISPLAY_WIDTH = 3;
}
// Character limit validation (Validation 1)
// Character limit validation (Validation 1).
// These rules map directly to canvas CharacterLimits.
message CharacterLimits {
// When zero, the field is considered "not set". If both min/max are zero,
// the server should avoid sending this FieldValidation (no validation).
@@ -51,39 +76,91 @@ message CharacterLimits {
CountMode countMode = 4; // defaults to CHARS if unspecified
}
// Mask for pretty display
// Mask for pretty display only.
//
// This is not a validation rule by itself. It exists so clients can render and
// navigate masked input while still storing raw values server-side.
message DisplayMask {
string pattern = 1; // e.g., "(###) ###-####" or "####-##-##"
string input_char = 2; // e.g., "#"
optional string template_char = 3; // e.g., "_"
}
// One positionbased validation rule, similar to CharacterFilter + PositionRange
message PatternRule {
// Range descriptor: how far the rule applies
// Examples:
// - "0" → Single position 0
// - "0-3" → Range 0..3 inclusive
// - "from:5" → From position 5 onward
// - "0,2,5" → Multiple discrete positions
string range = 1;
// Character filter type, caseinsensitive keywords:
// "ALPHABETIC", "NUMERIC", "ALPHANUMERIC",
// "ONEOF(<chars>)", "EXACT(:)", "CUSTOM(<name>)"
string filter = 2;
// Which positions a pattern rule applies to.
// This exists instead of a string syntax like "0-3" so the server can validate
// the structure directly and clients do not need to parse a DSL.
message PatternPosition {
PatternPositionKind kind = 1;
uint32 single = 2;
uint32 start = 3;
uint32 end = 4;
repeated uint32 positions = 5;
}
enum PatternPositionKind {
PATTERN_POSITION_KIND_UNSPECIFIED = 0;
PATTERN_POSITION_SINGLE = 1;
PATTERN_POSITION_RANGE = 2;
PATTERN_POSITION_FROM = 3;
PATTERN_POSITION_MULTIPLE = 4;
}
// What type of character constraint a pattern rule applies.
// This mirrors the typed character filters used by canvas.
message CharacterConstraint {
CharacterConstraintKind kind = 1;
// Used when kind == CHARACTER_CONSTRAINT_EXACT.
optional string exact = 2;
// Used when kind == CHARACTER_CONSTRAINT_ONE_OF.
repeated string one_of = 3;
// Used when kind == CHARACTER_CONSTRAINT_REGEX.
optional string regex = 4;
}
enum CharacterConstraintKind {
CHARACTER_CONSTRAINT_KIND_UNSPECIFIED = 0;
CHARACTER_CONSTRAINT_ALPHABETIC = 1;
CHARACTER_CONSTRAINT_NUMERIC = 2;
CHARACTER_CONSTRAINT_ALPHANUMERIC = 3;
CHARACTER_CONSTRAINT_EXACT = 4;
CHARACTER_CONSTRAINT_ONE_OF = 5;
CHARACTER_CONSTRAINT_REGEX = 6;
}
// One position-based validation rule, similar to canvas PositionFilter.
message PatternRule {
PatternPosition position = 1;
CharacterConstraint constraint = 2;
}
// Client-side formatter metadata.
// The formatter "type" is intended to be resolved by a client-side formatter registry.
message CustomFormatter {
// Formatter type identifier; handled clientside.
// Examples: "PSCFormatter", "PhoneFormatter", "CreditCardFormatter", "DateFormatter"
string type = 1;
// Optional freetext note or parameters (e.g. locale, pattern)
optional string description = 2;
repeated FormatterOption options = 2;
optional string description = 3;
}
// Collection of pattern rules for one field
message FormatterOption {
string key = 1;
string value = 2;
}
// Exact-value whitelist configuration.
// This maps to canvas AllowedValues semantics.
message AllowedValues {
repeated string values = 1;
bool allow_empty = 2;
bool case_insensitive = 3;
}
// Collection of pattern rules for one field.
message PatternRules {
// All rules that make up the validation logic
repeated PatternRule rules = 1;
@@ -92,11 +169,28 @@ message PatternRules {
optional string description = 2;
}
// Service to fetch validations for a table
// Service for storing and fetching field-validation definitions.
service TableValidationService {
rpc GetTableValidation(GetTableValidationRequest) returns (TableValidationResponse);
// Upsert a single field validation definition.
rpc UpdateFieldValidation(UpdateFieldValidationRequest) returns (UpdateFieldValidationResponse);
// Replace the full validation definition set for a table in one transaction.
rpc ReplaceTableValidation(ReplaceTableValidationRequest) returns (ReplaceTableValidationResponse);
// Reusable named rule fragments.
rpc UpsertValidationRule(UpsertValidationRuleRequest) returns (UpsertValidationRuleResponse);
rpc ListValidationRules(ListValidationRulesRequest) returns (ListValidationRulesResponse);
rpc DeleteValidationRule(DeleteValidationRuleRequest) returns (DeleteValidationRuleResponse);
// Reusable named sets composed from rules.
rpc UpsertValidationSet(UpsertValidationSetRequest) returns (UpsertValidationSetResponse);
rpc ListValidationSets(ListValidationSetsRequest) returns (ListValidationSetsResponse);
rpc DeleteValidationSet(DeleteValidationSetRequest) returns (DeleteValidationSetResponse);
// Snapshot a reusable set onto a concrete table field.
rpc ApplyValidationSet(ApplyValidationSetRequest) returns (ApplyValidationSetResponse);
}
message UpdateFieldValidationRequest {
@@ -110,3 +204,102 @@ message UpdateFieldValidationResponse {
bool success = 1;
string message = 2;
}
message ReplaceTableValidationRequest {
string profileName = 1;
string tableName = 2;
// Full replacement set. Fields omitted here are removed from the stored config.
repeated FieldValidation fields = 3;
}
message ReplaceTableValidationResponse {
bool success = 1;
string message = 2;
}
message ValidationRuleDefinition {
string name = 1;
optional string description = 2;
// Reusable rule fragment. dataKey is ignored by the server for reusable rules.
FieldValidation validation = 3;
}
message ValidationSetDefinition {
string name = 1;
optional string description = 2;
repeated string ruleNames = 3;
// Server-resolved snapshot of all rules in ruleNames order.
FieldValidation resolvedValidation = 4;
}
message UpsertValidationRuleRequest {
string profileName = 1;
ValidationRuleDefinition rule = 2;
}
message UpsertValidationRuleResponse {
bool success = 1;
string message = 2;
}
message ListValidationRulesRequest {
string profileName = 1;
}
message ListValidationRulesResponse {
repeated ValidationRuleDefinition rules = 1;
}
message DeleteValidationRuleRequest {
string profileName = 1;
string name = 2;
}
message DeleteValidationRuleResponse {
bool success = 1;
string message = 2;
}
message UpsertValidationSetRequest {
string profileName = 1;
ValidationSetDefinition set = 2;
}
message UpsertValidationSetResponse {
bool success = 1;
string message = 2;
}
message ListValidationSetsRequest {
string profileName = 1;
}
message ListValidationSetsResponse {
repeated ValidationSetDefinition sets = 1;
}
message DeleteValidationSetRequest {
string profileName = 1;
string name = 2;
}
message DeleteValidationSetResponse {
bool success = 1;
string message = 2;
}
message ApplyValidationSetRequest {
string profileName = 1;
string tableName = 2;
string dataKey = 3;
string setName = 4;
}
message ApplyValidationSetResponse {
bool success = 1;
string message = 2;
FieldValidation validation = 3;
}

Binary file not shown.

View File

@@ -1,10 +1,25 @@
// This file is @generated by prost-build.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ColumnConstraint {
#[prost(string, tag = "1")]
pub column: ::prost::alloc::string::String,
#[prost(string, tag = "2")]
pub query: ::prost::alloc::string::String,
#[prost(enumeration = "MatchMode", tag = "3")]
pub mode: i32,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SearchRequest {
#[prost(string, tag = "1")]
pub table_name: ::prost::alloc::string::String,
#[prost(string, tag = "2")]
pub query: ::prost::alloc::string::String,
pub profile_name: ::prost::alloc::string::String,
#[prost(string, optional, tag = "2")]
pub table_name: ::core::option::Option<::prost::alloc::string::String>,
#[prost(string, tag = "3")]
pub free_query: ::prost::alloc::string::String,
#[prost(message, repeated, tag = "4")]
pub must: ::prost::alloc::vec::Vec<ColumnConstraint>,
#[prost(uint32, optional, tag = "5")]
pub limit: ::core::option::Option<u32>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SearchResponse {
@@ -22,6 +37,37 @@ pub mod search_response {
pub score: f32,
#[prost(string, tag = "3")]
pub content_json: ::prost::alloc::string::String,
#[prost(string, tag = "4")]
pub table_name: ::prost::alloc::string::String,
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
#[repr(i32)]
pub enum MatchMode {
Unspecified = 0,
Fuzzy = 1,
Exact = 2,
}
impl MatchMode {
/// String value of the enum field names used in the ProtoBuf definition.
///
/// The values are not transformed in any way and thus are considered stable
/// (if the ProtoBuf definition does not change) and safe for programmatic use.
pub fn as_str_name(&self) -> &'static str {
match self {
Self::Unspecified => "MATCH_MODE_UNSPECIFIED",
Self::Fuzzy => "MATCH_MODE_FUZZY",
Self::Exact => "MATCH_MODE_EXACT",
}
}
/// Creates an enum from field names used in the ProtoBuf definition.
pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
match value {
"MATCH_MODE_UNSPECIFIED" => Some(Self::Unspecified),
"MATCH_MODE_FUZZY" => Some(Self::Fuzzy),
"MATCH_MODE_EXACT" => Some(Self::Exact),
_ => None,
}
}
}
/// Generated client implementations.
@@ -115,7 +161,7 @@ pub mod searcher_client {
self.inner = self.inner.max_encoding_message_size(limit);
self
}
pub async fn search_table(
pub async fn search(
&mut self,
request: impl tonic::IntoRequest<super::SearchRequest>,
) -> std::result::Result<tonic::Response<super::SearchResponse>, tonic::Status> {
@@ -129,11 +175,11 @@ pub mod searcher_client {
})?;
let codec = tonic::codec::ProstCodec::default();
let path = http::uri::PathAndQuery::from_static(
"/komp_ac.search.Searcher/SearchTable",
"/komp_ac.search.Searcher/Search",
);
let mut req = request.into_request();
req.extensions_mut()
.insert(GrpcMethod::new("komp_ac.search.Searcher", "SearchTable"));
.insert(GrpcMethod::new("komp_ac.search.Searcher", "Search"));
self.inner.unary(req, path, codec).await
}
}
@@ -151,7 +197,7 @@ pub mod searcher_server {
/// Generated trait containing gRPC methods that should be implemented for use with SearcherServer.
#[async_trait]
pub trait Searcher: std::marker::Send + std::marker::Sync + 'static {
async fn search_table(
async fn search(
&self,
request: tonic::Request<super::SearchRequest>,
) -> std::result::Result<tonic::Response<super::SearchResponse>, tonic::Status>;
@@ -232,11 +278,11 @@ pub mod searcher_server {
}
fn call(&mut self, req: http::Request<B>) -> Self::Future {
match req.uri().path() {
"/komp_ac.search.Searcher/SearchTable" => {
"/komp_ac.search.Searcher/Search" => {
#[allow(non_camel_case_types)]
struct SearchTableSvc<T: Searcher>(pub Arc<T>);
struct SearchSvc<T: Searcher>(pub Arc<T>);
impl<T: Searcher> tonic::server::UnaryService<super::SearchRequest>
for SearchTableSvc<T> {
for SearchSvc<T> {
type Response = super::SearchResponse;
type Future = BoxFuture<
tonic::Response<Self::Response>,
@@ -248,7 +294,7 @@ pub mod searcher_server {
) -> Self::Future {
let inner = Arc::clone(&self.0);
let fut = async move {
<T as Searcher>::search_table(&inner, request).await
<T as Searcher>::search(&inner, request).await
};
Box::pin(fut)
}
@@ -259,7 +305,7 @@ pub mod searcher_server {
let max_encoding_message_size = self.max_encoding_message_size;
let inner = self.inner.clone();
let fut = async move {
let method = SearchTableSvc(inner);
let method = SearchSvc(inner);
let codec = tonic::codec::ProstCodec::default();
let mut grpc = tonic::server::Grpc::new(codec)
.apply_compression_config(

View File

@@ -110,6 +110,107 @@ pub mod profile_tree_response {
pub tables: ::prost::alloc::vec::Vec<Table>,
}
}
/// Request to fetch all tables, columns and scripts for a profile.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GetProfileDetailsRequest {
/// Profile (schema) name to fetch details for.
#[prost(string, tag = "1")]
pub profile_name: ::prost::alloc::string::String,
}
/// Response with all tables, columns and scripts for a profile.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GetProfileDetailsResponse {
#[prost(string, tag = "1")]
pub profile_name: ::prost::alloc::string::String,
#[prost(message, repeated, tag = "2")]
pub tables: ::prost::alloc::vec::Vec<TableDetail>,
}
/// Request to fetch recorded column alias rename history for one profile.
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GetColumnAliasRenameHistoryRequest {
#[prost(string, tag = "1")]
pub profile_name: ::prost::alloc::string::String,
/// Optional filter. When omitted, returns all tables in the profile.
#[prost(int64, optional, tag = "2")]
pub table_definition_id: ::core::option::Option<i64>,
}
/// One recorded column alias rename.
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ColumnAliasRenameHistoryEntry {
#[prost(int64, tag = "1")]
pub id: i64,
#[prost(string, tag = "2")]
pub profile_name: ::prost::alloc::string::String,
#[prost(int64, tag = "3")]
pub table_definition_id: i64,
#[prost(string, tag = "4")]
pub table_name: ::prost::alloc::string::String,
#[prost(string, tag = "5")]
pub old_column_name: ::prost::alloc::string::String,
#[prost(string, tag = "6")]
pub new_column_name: ::prost::alloc::string::String,
#[prost(string, tag = "7")]
pub created_at: ::prost::alloc::string::String,
}
/// Response with stored column alias rename history rows.
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GetColumnAliasRenameHistoryResponse {
#[prost(string, tag = "1")]
pub profile_name: ::prost::alloc::string::String,
#[prost(message, repeated, tag = "2")]
pub entries: ::prost::alloc::vec::Vec<ColumnAliasRenameHistoryEntry>,
}
/// Describes a table with its columns and associated scripts.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TableDetail {
#[prost(string, tag = "1")]
pub name: ::prost::alloc::string::String,
#[prost(int64, tag = "2")]
pub id: i64,
#[prost(message, repeated, tag = "3")]
pub columns: ::prost::alloc::vec::Vec<ColumnDefinition>,
#[prost(message, repeated, tag = "4")]
pub scripts: ::prost::alloc::vec::Vec<ScriptInfo>,
}
/// A script that targets a specific column in a table.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ScriptInfo {
#[prost(int64, tag = "1")]
pub script_id: i64,
#[prost(string, tag = "2")]
pub target_column: ::prost::alloc::string::String,
#[prost(string, tag = "3")]
pub target_column_type: ::prost::alloc::string::String,
#[prost(string, tag = "4")]
pub script: ::prost::alloc::string::String,
#[prost(string, tag = "5")]
pub description: ::prost::alloc::string::String,
}
/// Request to rename one user-visible column alias in a table.
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct RenameColumnAliasRequest {
#[prost(string, tag = "1")]
pub profile_name: ::prost::alloc::string::String,
#[prost(string, tag = "2")]
pub table_name: ::prost::alloc::string::String,
#[prost(string, tag = "3")]
pub old_column_name: ::prost::alloc::string::String,
#[prost(string, tag = "4")]
pub new_column_name: ::prost::alloc::string::String,
}
/// Response after renaming one column alias.
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct RenameColumnAliasResponse {
#[prost(bool, tag = "1")]
pub success: bool,
#[prost(string, tag = "2")]
pub message: ::prost::alloc::string::String,
}
/// Request to delete one table definition entirely.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct DeleteTableRequest {
@@ -289,6 +390,97 @@ pub mod table_definition_client {
);
self.inner.unary(req, path, codec).await
}
/// Fetches all tables with their columns and scripts for a specific profile.
/// Pure data retrieval - no business logic.
pub async fn get_profile_details(
&mut self,
request: impl tonic::IntoRequest<super::GetProfileDetailsRequest>,
) -> std::result::Result<
tonic::Response<super::GetProfileDetailsResponse>,
tonic::Status,
> {
self.inner
.ready()
.await
.map_err(|e| {
tonic::Status::unknown(
format!("Service was not ready: {}", e.into()),
)
})?;
let codec = tonic::codec::ProstCodec::default();
let path = http::uri::PathAndQuery::from_static(
"/komp_ac.table_definition.TableDefinition/GetProfileDetails",
);
let mut req = request.into_request();
req.extensions_mut()
.insert(
GrpcMethod::new(
"komp_ac.table_definition.TableDefinition",
"GetProfileDetails",
),
);
self.inner.unary(req, path, codec).await
}
/// Returns the stored rename history for column aliases in one profile.
pub async fn get_column_alias_rename_history(
&mut self,
request: impl tonic::IntoRequest<super::GetColumnAliasRenameHistoryRequest>,
) -> std::result::Result<
tonic::Response<super::GetColumnAliasRenameHistoryResponse>,
tonic::Status,
> {
self.inner
.ready()
.await
.map_err(|e| {
tonic::Status::unknown(
format!("Service was not ready: {}", e.into()),
)
})?;
let codec = tonic::codec::ProstCodec::default();
let path = http::uri::PathAndQuery::from_static(
"/komp_ac.table_definition.TableDefinition/GetColumnAliasRenameHistory",
);
let mut req = request.into_request();
req.extensions_mut()
.insert(
GrpcMethod::new(
"komp_ac.table_definition.TableDefinition",
"GetColumnAliasRenameHistory",
),
);
self.inner.unary(req, path, codec).await
}
/// Renames a user-visible column alias while keeping the physical column unchanged.
pub async fn rename_column_alias(
&mut self,
request: impl tonic::IntoRequest<super::RenameColumnAliasRequest>,
) -> std::result::Result<
tonic::Response<super::RenameColumnAliasResponse>,
tonic::Status,
> {
self.inner
.ready()
.await
.map_err(|e| {
tonic::Status::unknown(
format!("Service was not ready: {}", e.into()),
)
})?;
let codec = tonic::codec::ProstCodec::default();
let path = http::uri::PathAndQuery::from_static(
"/komp_ac.table_definition.TableDefinition/RenameColumnAlias",
);
let mut req = request.into_request();
req.extensions_mut()
.insert(
GrpcMethod::new(
"komp_ac.table_definition.TableDefinition",
"RenameColumnAlias",
),
);
self.inner.unary(req, path, codec).await
}
/// Drops a table and its metadata, then deletes the profile if it becomes empty.
pub async fn delete_table(
&mut self,
@@ -353,6 +545,31 @@ pub mod table_definition_server {
tonic::Response<super::ProfileTreeResponse>,
tonic::Status,
>;
/// Fetches all tables with their columns and scripts for a specific profile.
/// Pure data retrieval - no business logic.
async fn get_profile_details(
&self,
request: tonic::Request<super::GetProfileDetailsRequest>,
) -> std::result::Result<
tonic::Response<super::GetProfileDetailsResponse>,
tonic::Status,
>;
/// Returns the stored rename history for column aliases in one profile.
async fn get_column_alias_rename_history(
&self,
request: tonic::Request<super::GetColumnAliasRenameHistoryRequest>,
) -> std::result::Result<
tonic::Response<super::GetColumnAliasRenameHistoryResponse>,
tonic::Status,
>;
/// Renames a user-visible column alias while keeping the physical column unchanged.
async fn rename_column_alias(
&self,
request: tonic::Request<super::RenameColumnAliasRequest>,
) -> std::result::Result<
tonic::Response<super::RenameColumnAliasResponse>,
tonic::Status,
>;
/// Drops a table and its metadata, then deletes the profile if it becomes empty.
async fn delete_table(
&self,
@@ -537,6 +754,152 @@ pub mod table_definition_server {
};
Box::pin(fut)
}
"/komp_ac.table_definition.TableDefinition/GetProfileDetails" => {
#[allow(non_camel_case_types)]
struct GetProfileDetailsSvc<T: TableDefinition>(pub Arc<T>);
impl<
T: TableDefinition,
> tonic::server::UnaryService<super::GetProfileDetailsRequest>
for GetProfileDetailsSvc<T> {
type Response = super::GetProfileDetailsResponse;
type Future = BoxFuture<
tonic::Response<Self::Response>,
tonic::Status,
>;
fn call(
&mut self,
request: tonic::Request<super::GetProfileDetailsRequest>,
) -> Self::Future {
let inner = Arc::clone(&self.0);
let fut = async move {
<T as TableDefinition>::get_profile_details(&inner, request)
.await
};
Box::pin(fut)
}
}
let accept_compression_encodings = self.accept_compression_encodings;
let send_compression_encodings = self.send_compression_encodings;
let max_decoding_message_size = self.max_decoding_message_size;
let max_encoding_message_size = self.max_encoding_message_size;
let inner = self.inner.clone();
let fut = async move {
let method = GetProfileDetailsSvc(inner);
let codec = tonic::codec::ProstCodec::default();
let mut grpc = tonic::server::Grpc::new(codec)
.apply_compression_config(
accept_compression_encodings,
send_compression_encodings,
)
.apply_max_message_size_config(
max_decoding_message_size,
max_encoding_message_size,
);
let res = grpc.unary(method, req).await;
Ok(res)
};
Box::pin(fut)
}
"/komp_ac.table_definition.TableDefinition/GetColumnAliasRenameHistory" => {
#[allow(non_camel_case_types)]
struct GetColumnAliasRenameHistorySvc<T: TableDefinition>(
pub Arc<T>,
);
impl<
T: TableDefinition,
> tonic::server::UnaryService<
super::GetColumnAliasRenameHistoryRequest,
> for GetColumnAliasRenameHistorySvc<T> {
type Response = super::GetColumnAliasRenameHistoryResponse;
type Future = BoxFuture<
tonic::Response<Self::Response>,
tonic::Status,
>;
fn call(
&mut self,
request: tonic::Request<
super::GetColumnAliasRenameHistoryRequest,
>,
) -> Self::Future {
let inner = Arc::clone(&self.0);
let fut = async move {
<T as TableDefinition>::get_column_alias_rename_history(
&inner,
request,
)
.await
};
Box::pin(fut)
}
}
let accept_compression_encodings = self.accept_compression_encodings;
let send_compression_encodings = self.send_compression_encodings;
let max_decoding_message_size = self.max_decoding_message_size;
let max_encoding_message_size = self.max_encoding_message_size;
let inner = self.inner.clone();
let fut = async move {
let method = GetColumnAliasRenameHistorySvc(inner);
let codec = tonic::codec::ProstCodec::default();
let mut grpc = tonic::server::Grpc::new(codec)
.apply_compression_config(
accept_compression_encodings,
send_compression_encodings,
)
.apply_max_message_size_config(
max_decoding_message_size,
max_encoding_message_size,
);
let res = grpc.unary(method, req).await;
Ok(res)
};
Box::pin(fut)
}
"/komp_ac.table_definition.TableDefinition/RenameColumnAlias" => {
#[allow(non_camel_case_types)]
struct RenameColumnAliasSvc<T: TableDefinition>(pub Arc<T>);
impl<
T: TableDefinition,
> tonic::server::UnaryService<super::RenameColumnAliasRequest>
for RenameColumnAliasSvc<T> {
type Response = super::RenameColumnAliasResponse;
type Future = BoxFuture<
tonic::Response<Self::Response>,
tonic::Status,
>;
fn call(
&mut self,
request: tonic::Request<super::RenameColumnAliasRequest>,
) -> Self::Future {
let inner = Arc::clone(&self.0);
let fut = async move {
<T as TableDefinition>::rename_column_alias(&inner, request)
.await
};
Box::pin(fut)
}
}
let accept_compression_encodings = self.accept_compression_encodings;
let send_compression_encodings = self.send_compression_encodings;
let max_decoding_message_size = self.max_decoding_message_size;
let max_encoding_message_size = self.max_encoding_message_size;
let inner = self.inner.clone();
let fut = async move {
let method = RenameColumnAliasSvc(inner);
let codec = tonic::codec::ProstCodec::default();
let mut grpc = tonic::server::Grpc::new(codec)
.apply_compression_config(
accept_compression_encodings,
send_compression_encodings,
)
.apply_max_message_size_config(
max_decoding_message_size,
max_encoding_message_size,
);
let res = grpc.unary(method, req).await;
Ok(res)
};
Box::pin(fut)
}
"/komp_ac.table_definition.TableDefinition/DeleteTable" => {
#[allow(non_camel_case_types)]
struct DeleteTableSvc<T: TableDefinition>(pub Arc<T>);

View File

@@ -1,17 +1,27 @@
// This file is @generated by prost-build.
/// Request identifying the profile (schema) and table to inspect.
/// Request identifying the profile (schema) and tables to inspect.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GetTableStructureRequest {
/// Required. Profile (PostgreSQL schema) name. Must exist in `schemas`.
#[prost(string, tag = "1")]
pub profile_name: ::prost::alloc::string::String,
/// Required. Table name within the profile. Must exist in `table_definitions`
/// for the given profile. The physical table is then introspected via
/// information_schema.
#[prost(string, tag = "2")]
pub table_name: ::prost::alloc::string::String,
/// Required. Table names within the profile. Each must exist in
/// `table_definitions` for the given profile. The physical tables are then
/// introspected via information_schema.
#[prost(string, repeated, tag = "2")]
pub table_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
}
/// Response with the ordered list of columns (by ordinal position).
/// Batched response keyed by table name.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GetTableStructureResponse {
/// Per-table physical column lists keyed by requested table name.
#[prost(map = "string, message", tag = "1")]
pub table_structures: ::std::collections::HashMap<
::prost::alloc::string::String,
TableStructureResponse,
>,
}
/// Response with the ordered list of columns (by ordinal position) for one table.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TableStructureResponse {
/// Columns of the physical table, including system columns (id, deleted,
@@ -55,14 +65,13 @@ pub mod table_structure_service_client {
)]
use tonic::codegen::*;
use tonic::codegen::http::Uri;
/// Introspects the physical PostgreSQL table for a given logical table
/// (defined in table_definitions) and returns its column structure.
/// Introspects the physical PostgreSQL tables for one or more logical tables
/// (defined in table_definitions) and returns their column structures.
/// The server validates that:
/// - The profile (schema) exists in `schemas`
/// - The table is defined for that profile in `table_definitions`
/// It then queries information_schema for the physical table and returns
/// normalized column metadata. If the physical table is missing despite
/// a definition, the response may contain an empty `columns` list.
/// - Every table is defined for that profile in `table_definitions`
/// It then queries information_schema for the physical tables and returns
/// normalized column metadata.
#[derive(Debug, Clone)]
pub struct TableStructureServiceClient<T> {
inner: tonic::client::Grpc<T>,
@@ -144,20 +153,20 @@ pub mod table_structure_service_client {
self
}
/// Return the physical column list (name, normalized data_type,
/// nullability, primary key flag) for a table in a profile.
/// nullability, primary key flag) for one or more tables in a profile.
///
/// Behavior:
/// - NOT_FOUND if profile doesn't exist in `schemas`
/// - NOT_FOUND if table not defined for that profile in `table_definitions`
/// - NOT_FOUND if any table is not defined for that profile in `table_definitions`
/// - Queries information_schema.columns ordered by ordinal position
/// - Normalizes data_type text (details under TableColumn.data_type)
/// - Returns an empty list if the table is validated but has no visible
/// columns in information_schema (e.g., physical table missing)
/// - Returns an error if any validated table has no visible columns in
/// information_schema (e.g., physical table missing)
pub async fn get_table_structure(
&mut self,
request: impl tonic::IntoRequest<super::GetTableStructureRequest>,
) -> std::result::Result<
tonic::Response<super::TableStructureResponse>,
tonic::Response<super::GetTableStructureResponse>,
tonic::Status,
> {
self.inner
@@ -198,31 +207,30 @@ pub mod table_structure_service_server {
#[async_trait]
pub trait TableStructureService: std::marker::Send + std::marker::Sync + 'static {
/// Return the physical column list (name, normalized data_type,
/// nullability, primary key flag) for a table in a profile.
/// nullability, primary key flag) for one or more tables in a profile.
///
/// Behavior:
/// - NOT_FOUND if profile doesn't exist in `schemas`
/// - NOT_FOUND if table not defined for that profile in `table_definitions`
/// - NOT_FOUND if any table is not defined for that profile in `table_definitions`
/// - Queries information_schema.columns ordered by ordinal position
/// - Normalizes data_type text (details under TableColumn.data_type)
/// - Returns an empty list if the table is validated but has no visible
/// columns in information_schema (e.g., physical table missing)
/// - Returns an error if any validated table has no visible columns in
/// information_schema (e.g., physical table missing)
async fn get_table_structure(
&self,
request: tonic::Request<super::GetTableStructureRequest>,
) -> std::result::Result<
tonic::Response<super::TableStructureResponse>,
tonic::Response<super::GetTableStructureResponse>,
tonic::Status,
>;
}
/// Introspects the physical PostgreSQL table for a given logical table
/// (defined in table_definitions) and returns its column structure.
/// Introspects the physical PostgreSQL tables for one or more logical tables
/// (defined in table_definitions) and returns their column structures.
/// The server validates that:
/// - The profile (schema) exists in `schemas`
/// - The table is defined for that profile in `table_definitions`
/// It then queries information_schema for the physical table and returns
/// normalized column metadata. If the physical table is missing despite
/// a definition, the response may contain an empty `columns` list.
/// - Every table is defined for that profile in `table_definitions`
/// It then queries information_schema for the physical tables and returns
/// normalized column metadata.
#[derive(Debug)]
pub struct TableStructureServiceServer<T> {
inner: Arc<T>,
@@ -307,7 +315,7 @@ pub mod table_structure_service_server {
T: TableStructureService,
> tonic::server::UnaryService<super::GetTableStructureRequest>
for GetTableStructureSvc<T> {
type Response = super::TableStructureResponse;
type Response = super::GetTableStructureResponse;
type Future = BoxFuture<
tonic::Response<Self::Response>,
tonic::Status,

File diff suppressed because it is too large Load Diff

View File

@@ -1,75 +1,178 @@
// common/src/search.rs
use std::path::{Path, PathBuf};
use tantivy::schema::*;
use tantivy::tokenizer::*;
use tantivy::schema::{
Field, IndexRecordOption, JsonObjectOptions, Schema, Term, TextFieldIndexing, INDEXED, STORED,
STRING,
};
use tantivy::tokenizer::{
AsciiFoldingFilter, LowerCaser, NgramTokenizer, RawTokenizer, RemoveLongFilter,
SimpleTokenizer, TextAnalyzer, TokenStream,
};
use tantivy::Index;
/// Creates a hybrid Slovak search schema with optimized prefix fields.
pub const F_PG_ID: &str = "pg_id";
pub const F_TABLE_NAME: &str = "table_name";
pub const F_ROW_KEY: &str = "row_key";
pub const F_DATA_WORD: &str = "data_word";
pub const F_DATA_NGRAM: &str = "data_ngram";
pub const F_DATA_EXACT: &str = "data_exact";
pub const TOK_WORD: &str = "kw_word";
pub const TOK_NGRAM: &str = "kw_ngram";
pub const TOK_EXACT: &str = "kw_exact";
/// Returns the on-disk path for a profile search index.
pub fn search_index_path(root: &Path, profile_name: &str) -> PathBuf {
root.join(profile_name)
}
/// Returns the unique index key for one table row inside a profile index.
pub fn search_row_key(table_name: &str, row_id: i64) -> String {
format!("{}:{}", table_name, row_id)
}
/// Normalizes user-entered values for exact-mode terms.
pub fn normalize_exact(input: &str) -> String {
let trimmed = input.trim();
if trimmed.is_empty() {
return String::new();
}
let mut analyzer = exact_analyzer();
let mut stream = analyzer.token_stream(trimmed);
let mut out = String::with_capacity(trimmed.len());
while let Some(token) = stream.next() {
out.push_str(&token.text);
}
out
}
/// Normalizes a column name to the JSON-key form used at index time.
pub fn normalize_column_name(column: &str) -> String {
column.to_ascii_lowercase()
}
/// Creates the column-aware search schema.
pub fn create_search_schema() -> Schema {
let mut schema_builder = Schema::builder();
schema_builder.add_u64_field("pg_id", INDEXED | STORED);
schema_builder.add_u64_field(F_PG_ID, INDEXED | STORED);
schema_builder.add_text_field(F_TABLE_NAME, STRING | STORED);
schema_builder.add_text_field(F_ROW_KEY, STRING | STORED);
// FIELD 1: For prefixes (1-4 chars).
let short_prefix_indexing = TextFieldIndexing::default()
.set_tokenizer("slovak_prefix_edge")
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
let short_prefix_options = TextOptions::default()
.set_indexing_options(short_prefix_indexing)
.set_stored();
schema_builder.add_text_field("prefix_edge", short_prefix_options);
// FIELD 2: For the full word.
let full_word_indexing = TextFieldIndexing::default()
.set_tokenizer("slovak_prefix_full")
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
let full_word_options = TextOptions::default()
.set_indexing_options(full_word_indexing)
.set_stored();
schema_builder.add_text_field("prefix_full", full_word_options);
// NGRAM FIELD: For substring matching.
let ngram_field_indexing = TextFieldIndexing::default()
.set_tokenizer("slovak_ngram")
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
let ngram_options = TextOptions::default()
.set_indexing_options(ngram_field_indexing)
.set_stored();
schema_builder.add_text_field("text_ngram", ngram_options);
schema_builder.add_json_field(F_DATA_WORD, json_options(TOK_WORD, true, false));
schema_builder.add_json_field(F_DATA_NGRAM, json_options(TOK_NGRAM, true, false));
schema_builder.add_json_field(F_DATA_EXACT, json_options(TOK_EXACT, false, false));
schema_builder.build()
}
/// Registers all necessary Slovak tokenizers with the index.
///
/// This must be called by ANY process that opens the index
/// to ensure the tokenizers are loaded into memory.
pub fn register_slovak_tokenizers(index: &Index) -> tantivy::Result<()> {
fn json_options(tokenizer_name: &str, with_positions: bool, stored: bool) -> JsonObjectOptions {
let index_option = if with_positions {
IndexRecordOption::WithFreqsAndPositions
} else {
IndexRecordOption::Basic
};
let indexing = TextFieldIndexing::default()
.set_tokenizer(tokenizer_name)
.set_index_option(index_option);
let mut options = JsonObjectOptions::default().set_indexing_options(indexing);
if stored {
options = options.set_stored();
}
options
}
/// Registers all required tokenizers with the index.
pub fn register_tokenizers(index: &Index) -> tantivy::Result<()> {
let tokenizer_manager = index.tokenizers();
// TOKENIZER for `prefix_edge`: Edge N-gram (1-4 chars)
let edge_tokenizer = TextAnalyzer::builder(NgramTokenizer::new(1, 4, true)?)
.filter(RemoveLongFilter::limit(40))
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.build();
tokenizer_manager.register("slovak_prefix_edge", edge_tokenizer);
// TOKENIZER for `prefix_full`: Simple word tokenizer
let full_tokenizer = TextAnalyzer::builder(SimpleTokenizer::default())
.filter(RemoveLongFilter::limit(40))
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.build();
tokenizer_manager.register("slovak_prefix_full", full_tokenizer);
// NGRAM TOKENIZER: For substring matching.
let ngram_tokenizer = TextAnalyzer::builder(NgramTokenizer::new(3, 3, false)?)
.filter(RemoveLongFilter::limit(40))
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.build();
tokenizer_manager.register("slovak_ngram", ngram_tokenizer);
tokenizer_manager.register(TOK_WORD, word_analyzer());
tokenizer_manager.register(TOK_NGRAM, ngram_analyzer()?);
tokenizer_manager.register(TOK_EXACT, exact_analyzer());
Ok(())
}
fn word_analyzer() -> TextAnalyzer {
TextAnalyzer::builder(SimpleTokenizer::default())
.filter(RemoveLongFilter::limit(80))
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.build()
}
fn ngram_analyzer() -> tantivy::Result<TextAnalyzer> {
Ok(TextAnalyzer::builder(NgramTokenizer::new(3, 3, false)?)
.filter(RemoveLongFilter::limit(80))
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.build())
}
fn exact_analyzer() -> TextAnalyzer {
TextAnalyzer::builder(RawTokenizer::default())
.filter(LowerCaser)
.filter(AsciiFoldingFilter)
.build()
}
/// Tokenizes text the same way `data_word` is indexed.
pub fn tokenize_word(text: &str) -> Vec<String> {
tokenize_with(word_analyzer(), text)
}
/// Tokenizes text the same way `data_ngram` is indexed.
pub fn tokenize_ngram(text: &str) -> Vec<String> {
match ngram_analyzer() {
Ok(analyzer) => tokenize_with(analyzer, text),
Err(_) => Vec::new(),
}
}
fn tokenize_with(mut analyzer: TextAnalyzer, text: &str) -> Vec<String> {
let mut stream = analyzer.token_stream(text);
let mut out = Vec::new();
while let Some(token) = stream.next() {
out.push(token.text.clone());
}
out
}
/// Builds a term scoped to a specific JSON path within a JSON field.
pub fn json_path_term(field: Field, column: &str, text: &str) -> Term {
let mut term = Term::from_field_json_path(field, column, false);
term.append_type_and_str(text);
term
}
/// Returns all required schema fields or fails loudly on mismatch.
pub struct SchemaFields {
pub pg_id: Field,
pub table_name: Field,
pub row_key: Field,
pub data_word: Field,
pub data_ngram: Field,
pub data_exact: Field,
}
impl SchemaFields {
pub fn from(schema: &Schema) -> tantivy::Result<Self> {
Ok(Self {
pg_id: get_field(schema, F_PG_ID)?,
table_name: get_field(schema, F_TABLE_NAME)?,
row_key: get_field(schema, F_ROW_KEY)?,
data_word: get_field(schema, F_DATA_WORD)?,
data_ngram: get_field(schema, F_DATA_NGRAM)?,
data_exact: get_field(schema, F_DATA_EXACT)?,
})
}
}
fn get_field(schema: &Schema, name: &str) -> tantivy::Result<Field> {
schema.get_field(name).map_err(|e| {
tantivy::TantivyError::SchemaError(format!("schema is missing field '{name}': {e}"))
})
}

1
search/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
.codex

View File

@@ -1,279 +1,436 @@
// src/lib.rs
mod query_builder;
use std::collections::HashMap;
use std::path::Path;
use tantivy::collector::TopDocs;
use tantivy::query::{
BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, Query, QueryParser, TermQuery,
};
use tantivy::schema::{IndexRecordOption, Value};
use tantivy::{Index, TantivyDocument, Term};
use tonic::{Request, Response, Status};
use std::sync::{Arc, Mutex};
use common::proto::komp_ac::search::searcher_server::Searcher;
pub use common::proto::komp_ac::search::searcher_server::SearcherServer;
use common::proto::komp_ac::search::{search_response::Hit, SearchRequest, SearchResponse};
use common::search::register_slovak_tokenizers;
use common::search::{register_tokenizers, search_index_path, SchemaFields};
use query_builder::{build_master_query, ConstraintMode, SearchConstraint};
use sqlx::{PgPool, Row};
use tantivy::collector::TopDocs;
use tantivy::schema::Value;
use tantivy::{Index, IndexReader, ReloadPolicy, TantivyDocument};
use tonic::{Request, Response, Status};
use tracing::info;
// We need to hold the database pool in our service struct.
const INDEX_ROOT: &str = "./tantivy_indexes";
const DEFAULT_RESULT_LIMIT: usize = 25;
const HARD_RESULT_LIMIT: usize = 200;
const DEFAULT_LIST_LIMIT: usize = 5;
pub struct SearcherService {
pub pool: PgPool,
profiles: Mutex<HashMap<String, Arc<ProfileIndex>>>,
}
// normalize_slovak_text function remains unchanged...
fn normalize_slovak_text(text: &str) -> String {
// ... function content is unchanged ...
text.chars()
.map(|c| match c {
'á' | 'à' | 'â' | 'ä' | 'ă' | 'ā' => 'a',
'Á' | 'À' | 'Â' | 'Ä' | 'Ă' | 'Ā' => 'A',
'é' | 'è' | 'ê' | 'ë' | 'ě' | 'ē' => 'e',
'É' | 'È' | 'Ê' | 'Ë' | 'Ě' | 'Ē' => 'E',
'í' | 'ì' | 'î' | 'ï' | 'ī' => 'i',
'Í' | 'Ì' | 'Î' | 'Ï' | 'Ī' => 'I',
'ó' | 'ò' | 'ô' | 'ö' | 'ō' | 'ő' => 'o',
'Ó' | 'Ò' | 'Ô' | 'Ö' | 'Ō' | 'Ő' => 'O',
'ú' | 'ù' | 'û' | 'ü' | 'ū' | 'ű' => 'u',
'Ú' | 'Ù' | 'Û' | 'Ü' | 'Ū' | 'Ű' => 'U',
'ý' | 'ỳ' | 'ŷ' | 'ÿ' => 'y',
'Ý' | 'Ỳ' | 'Ŷ' | 'Ÿ' => 'Y',
'č' => 'c',
'Č' => 'C',
'ď' => 'd',
'Ď' => 'D',
'ľ' => 'l',
'Ľ' => 'L',
'ň' => 'n',
'Ň' => 'N',
'ř' => 'r',
'Ř' => 'R',
'š' => 's',
'Š' => 'S',
'ť' => 't',
'Ť' => 'T',
'ž' => 'z',
'Ž' => 'Z',
_ => c,
})
.collect()
}
impl SearcherService {
pub fn new(pool: PgPool) -> Self {
Self {
pool,
profiles: Mutex::new(HashMap::new()),
}
}
#[tonic::async_trait]
impl Searcher for SearcherService {
async fn search_table(
async fn run_rpc(
&self,
request: Request<SearchRequest>,
) -> Result<Response<SearchResponse>, Status> {
let req = request.into_inner();
let table_name = req.table_name;
let query_str = req.query;
let normalized = normalize_request(req)?;
if !profile_exists(&self.pool, &normalized.profile_name).await? {
return Err(Status::not_found(format!(
"Profile '{}' was not found",
normalized.profile_name
)));
}
if let Some(table_name) = normalized.table_name.as_deref() {
if !table_exists(&self.pool, &normalized.profile_name, table_name).await? {
return Err(Status::not_found(format!(
"Table '{}' was not found in profile '{}'",
table_name, normalized.profile_name
)));
}
}
if !normalized.has_input() {
let Some(table_name) = normalized.table_name.as_deref() else {
return Err(Status::invalid_argument(
"table_name is required when query is empty",
));
};
let hits = fetch_latest_rows(
&self.pool,
&normalized.profile_name,
table_name,
normalized.limit.unwrap_or(DEFAULT_LIST_LIMIT),
)
.await?;
return Ok(Response::new(SearchResponse { hits }));
}
let index_path = search_index_path(Path::new(INDEX_ROOT), &normalized.profile_name);
if !index_path.exists() {
return Err(Status::not_found(format!(
"No search index found for profile '{}'",
normalized.profile_name
)));
}
let profile = profile_index(&self.profiles, &normalized.profile_name, &index_path)?;
let mut hits = run_search(
&self.pool,
&profile,
&normalized.profile_name,
normalized.table_name.as_deref(),
&normalized.free_query,
&normalized.must,
normalized.limit.unwrap_or(DEFAULT_RESULT_LIMIT),
)
.await?;
hits.sort_by(|left, right| right.score.total_cmp(&left.score));
if let Some(limit) = normalized.limit {
if hits.len() > limit {
hits.truncate(limit);
}
}
// --- MODIFIED LOGIC ---
// If the query is empty, fetch the 5 most recent records.
if query_str.trim().is_empty() {
info!(
"Empty query for table '{}'. Fetching default results.",
table_name
);
let qualified_table = format!("gen.\"{}\"", table_name);
let sql = format!(
"SELECT id, to_jsonb(t) AS data FROM {} t ORDER BY id DESC LIMIT 5",
qualified_table
"search: profile={} table={:?} free='{}' constraints={} hits={}",
normalized.profile_name,
normalized.table_name,
normalized.free_query,
normalized.must.len(),
hits.len()
);
let rows = sqlx::query(&sql).fetch_all(&self.pool).await.map_err(|e| {
Status::internal(format!("DB query for default results failed: {}", e))
Ok(Response::new(SearchResponse { hits }))
}
}
struct ProfileIndex {
index: Index,
reader: IndexReader,
fields: SchemaFields,
}
impl ProfileIndex {
fn open(path: &Path) -> Result<Self, Status> {
let index = Index::open_in_dir(path)
.map_err(|e| Status::internal(format!("Failed to open index: {}", e)))?;
register_tokenizers(&index)
.map_err(|e| Status::internal(format!("Failed to register tokenizers: {}", e)))?;
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::OnCommitWithDelay)
.try_into()
.map_err(|e| Status::internal(format!("Failed to build index reader: {}", e)))?;
let fields = SchemaFields::from(&index.schema()).map_err(|e| {
Status::internal(format!(
"Search index schema mismatch. Reindex required: {}",
e
))
})?;
let hits: Vec<Hit> = rows
Ok(Self {
index,
reader,
fields,
})
}
}
#[derive(Debug)]
struct NormalizedSearchRequest {
profile_name: String,
table_name: Option<String>,
free_query: String,
must: Vec<SearchConstraint>,
limit: Option<usize>,
}
impl NormalizedSearchRequest {
fn has_input(&self) -> bool {
!self.free_query.is_empty() || !self.must.is_empty()
}
}
fn profile_index(
cache: &Mutex<HashMap<String, Arc<ProfileIndex>>>,
profile_name: &str,
path: &Path,
) -> Result<Arc<ProfileIndex>, Status> {
{
let cache_guard = cache
.lock()
.map_err(|_| Status::internal("Profile index cache lock poisoned"))?;
if let Some(index) = cache_guard.get(profile_name) {
return Ok(index.clone());
}
}
let opened = Arc::new(ProfileIndex::open(path)?);
let mut cache_guard = cache
.lock()
.map_err(|_| Status::internal("Profile index cache lock poisoned"))?;
if let Some(index) = cache_guard.get(profile_name) {
return Ok(index.clone());
}
cache_guard.insert(profile_name.to_string(), opened.clone());
Ok(opened)
}
fn validate_identifier(value: &str, field_name: &str) -> Result<(), Status> {
let mut chars = value.chars();
let Some(first) = chars.next() else {
return Err(Status::invalid_argument(format!(
"{field_name} must not be empty"
)));
};
if !(first.is_ascii_alphabetic() || first == '_')
|| !chars.all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
{
return Err(Status::invalid_argument(format!(
"{field_name} contains invalid characters"
)));
}
Ok(())
}
fn qualify_profile_table(profile_name: &str, table_name: &str) -> String {
format!("\"{}\".\"{}\"", profile_name, table_name)
}
async fn profile_exists(pool: &PgPool, profile_name: &str) -> Result<bool, Status> {
let exists =
sqlx::query_scalar::<_, bool>("SELECT EXISTS(SELECT 1 FROM schemas WHERE name = $1)")
.bind(profile_name)
.fetch_one(pool)
.await
.map_err(|e| Status::internal(format!("Profile lookup failed: {}", e)))?;
Ok(exists)
}
async fn table_exists(pool: &PgPool, profile_name: &str, table_name: &str) -> Result<bool, Status> {
let exists = sqlx::query_scalar::<_, bool>(
r#"
SELECT EXISTS(
SELECT 1
FROM table_definitions td
JOIN schemas s ON td.schema_id = s.id
WHERE s.name = $1 AND td.table_name = $2
)
"#,
)
.bind(profile_name)
.bind(table_name)
.fetch_one(pool)
.await
.map_err(|e| Status::internal(format!("Table lookup failed: {}", e)))?;
Ok(exists)
}
fn normalize_request(req: SearchRequest) -> Result<NormalizedSearchRequest, Status> {
let profile_name = req.profile_name.trim();
if profile_name.is_empty() {
return Err(Status::invalid_argument("profile_name is required"));
}
validate_identifier(profile_name, "profile_name")?;
let table_name = match req.table_name.as_deref().map(str::trim) {
Some(table_name) if !table_name.is_empty() => {
validate_identifier(table_name, "table_name")?;
Some(table_name.to_string())
}
_ => None,
};
let free_query = req.free_query.trim().to_string();
let mut must = Vec::new();
for constraint in req.must {
let column = constraint.column.trim();
if column.is_empty() {
return Err(Status::invalid_argument(
"constraint.column must not be empty",
));
}
validate_identifier(column, "constraint.column")?;
let query = constraint.query.trim();
if query.is_empty() {
return Err(Status::invalid_argument(
"constraint.query must not be empty",
));
}
must.push(SearchConstraint {
column: column.to_string(),
query: query.to_string(),
mode: constraint_mode_from_proto(constraint.mode),
});
}
let limit = req
.limit
.map(|value| (value as usize).min(HARD_RESULT_LIMIT));
Ok(NormalizedSearchRequest {
profile_name: profile_name.to_string(),
table_name,
free_query,
must,
limit,
})
}
fn constraint_mode_from_proto(raw_mode: i32) -> ConstraintMode {
match raw_mode {
2 => ConstraintMode::Exact,
_ => ConstraintMode::Fuzzy,
}
}
async fn fetch_latest_rows(
pool: &PgPool,
profile_name: &str,
table_name: &str,
limit: usize,
) -> Result<Vec<Hit>, Status> {
let sql = format!(
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE deleted = FALSE ORDER BY id DESC LIMIT $1",
qualify_profile_table(profile_name, table_name)
);
let rows = sqlx::query(&sql)
.bind(limit as i64)
.fetch_all(pool)
.await
.map_err(|e| Status::internal(format!("DB query for default results failed: {}", e)))?;
Ok(rows
.into_iter()
.map(|row| {
let id: i64 = row.try_get("id").unwrap_or_default();
let json_data: serde_json::Value = row.try_get("data").unwrap_or_default();
Hit {
id,
// Score is 0.0 as this is not a relevance-ranked search
score: 0.0,
content_json: json_data.to_string(),
table_name: table_name.to_string(),
}
})
.collect();
.collect())
}
info!(
"--- SERVER: Successfully processed empty query. Returning {} default hits. ---",
hits.len()
);
return Ok(Response::new(SearchResponse { hits }));
}
// --- END OF MODIFIED LOGIC ---
let index_path = Path::new("./tantivy_indexes").join(&table_name);
if !index_path.exists() {
return Err(Status::not_found(format!(
"No search index found for table '{}'",
table_name
)));
}
let index = Index::open_in_dir(&index_path)
.map_err(|e| Status::internal(format!("Failed to open index: {}", e)))?;
register_slovak_tokenizers(&index).map_err(|e| {
Status::internal(format!("Failed to register Slovak tokenizers: {}", e))
})?;
let reader = index
.reader()
.map_err(|e| Status::internal(format!("Failed to create index reader: {}", e)))?;
let searcher = reader.searcher();
let schema = index.schema();
let pg_id_field = schema
.get_field("pg_id")
.map_err(|_| Status::internal("Schema is missing the 'pg_id' field."))?;
// --- Query Building Logic (no changes here) ---
let prefix_edge_field = schema.get_field("prefix_edge").unwrap();
let prefix_full_field = schema.get_field("prefix_full").unwrap();
let text_ngram_field = schema.get_field("text_ngram").unwrap();
let normalized_query = normalize_slovak_text(&query_str);
let words: Vec<&str> = normalized_query.split_whitespace().collect();
if words.is_empty() {
return Ok(Response::new(SearchResponse { hits: vec![] }));
}
let mut query_layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
// ... all your query building layers remain exactly the same ...
// ===============================
// LAYER 1: PREFIX MATCHING (HIGHEST PRIORITY, Boost: 4.0)
// ===============================
{
let mut must_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
for word in &words {
let edge_term = Term::from_field_text(prefix_edge_field, word);
let full_term = Term::from_field_text(prefix_full_field, word);
let per_word_query = BooleanQuery::new(vec![
(
Occur::Should,
Box::new(TermQuery::new(edge_term, IndexRecordOption::Basic)),
),
(
Occur::Should,
Box::new(TermQuery::new(full_term, IndexRecordOption::Basic)),
),
]);
must_clauses.push((Occur::Must, Box::new(per_word_query) as Box<dyn Query>));
}
if !must_clauses.is_empty() {
let prefix_query = BooleanQuery::new(must_clauses);
let boosted_query = BoostQuery::new(Box::new(prefix_query), 4.0);
query_layers.push((Occur::Should, Box::new(boosted_query)));
}
}
// ===============================
// LAYER 2: FUZZY MATCHING (HIGH PRIORITY, Boost: 3.0)
// ===============================
{
let last_word = words.last().unwrap();
let fuzzy_term = Term::from_field_text(prefix_full_field, last_word);
let fuzzy_query = FuzzyTermQuery::new(fuzzy_term, 2, true);
let boosted_query = BoostQuery::new(Box::new(fuzzy_query), 3.0);
query_layers.push((Occur::Should, Box::new(boosted_query)));
}
// ===============================
// LAYER 3: PHRASE MATCHING WITH SLOP (MEDIUM PRIORITY, Boost: 2.0)
// ===============================
if words.len() > 1 {
let slop_parser = QueryParser::for_index(&index, vec![prefix_full_field]);
let slop_query_str = format!("\"{}\"~3", normalized_query);
if let Ok(slop_query) = slop_parser.parse_query(&slop_query_str) {
let boosted_query = BoostQuery::new(slop_query, 2.0);
query_layers.push((Occur::Should, Box::new(boosted_query)));
}
}
// ===============================
// LAYER 4: NGRAM SUBSTRING MATCHING (LOWEST PRIORITY, Boost: 1.0)
// ===============================
{
let ngram_parser = QueryParser::for_index(&index, vec![text_ngram_field]);
if let Ok(ngram_query) = ngram_parser.parse_query(&normalized_query) {
let boosted_query = BoostQuery::new(ngram_query, 1.0);
query_layers.push((Occur::Should, Box::new(boosted_query)));
}
}
let master_query = BooleanQuery::new(query_layers);
// --- End of Query Building Logic ---
async fn run_search(
pool: &PgPool,
profile: &ProfileIndex,
profile_name: &str,
table_filter: Option<&str>,
free_query: &str,
must: &[SearchConstraint],
limit: usize,
) -> Result<Vec<Hit>, Status> {
let master_query = build_master_query(
&profile.index,
&profile.fields,
free_query,
must,
table_filter,
)?;
let searcher = profile.reader.searcher();
let top_docs = searcher
.search(&master_query, &TopDocs::with_limit(100))
.search(&*master_query, &TopDocs::with_limit(limit))
.map_err(|e| Status::internal(format!("Search failed: {}", e)))?;
if top_docs.is_empty() {
return Ok(Response::new(SearchResponse { hits: vec![] }));
return Ok(vec![]);
}
// --- NEW LOGIC: Fetch from DB and combine results ---
// Step 1: Extract (score, pg_id) from Tantivy results.
let mut scored_ids: Vec<(f32, u64)> = Vec::new();
let mut candidates: Vec<(f32, i64, String)> = Vec::with_capacity(top_docs.len());
for (score, doc_address) in top_docs {
let doc: TantivyDocument = searcher
.doc(doc_address)
.map_err(|e| Status::internal(format!("Failed to retrieve document: {}", e)))?;
if let Some(pg_id_value) = doc.get_first(pg_id_field) {
if let Some(pg_id) = pg_id_value.as_u64() {
scored_ids.push((score, pg_id));
}
}
let Some(pg_id) = doc
.get_first(profile.fields.pg_id)
.and_then(|value| value.as_u64())
else {
continue;
};
let Some(table_name) = doc
.get_first(profile.fields.table_name)
.and_then(|value| value.as_str())
else {
continue;
};
candidates.push((score, pg_id as i64, table_name.to_string()));
}
// Step 2: Fetch all corresponding rows from Postgres in a single query.
let pg_ids: Vec<i64> = scored_ids.iter().map(|(_, id)| *id as i64).collect();
let qualified_table = format!("gen.\"{}\"", table_name);
let query_str = format!(
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE id = ANY($1)",
qualified_table
if candidates.is_empty() {
return Ok(vec![]);
}
let mut ids_by_table: HashMap<String, Vec<i64>> = HashMap::new();
for (_, pg_id, table_name) in &candidates {
ids_by_table
.entry(table_name.clone())
.or_default()
.push(*pg_id);
}
let mut content_map: HashMap<(String, i64), String> = HashMap::new();
for (table_name, pg_ids) in ids_by_table {
validate_identifier(&table_name, "table_name")?;
let sql = format!(
"SELECT id, to_jsonb(t) AS data FROM {} t WHERE deleted = FALSE AND id = ANY($1)",
qualify_profile_table(profile_name, &table_name)
);
let rows = sqlx::query(&query_str)
let rows = sqlx::query(&sql)
.bind(&pg_ids)
.fetch_all(&self.pool)
.fetch_all(pool)
.await
.map_err(|e| Status::internal(format!("Database query failed: {}", e)))?;
// Step 3: Map the database results by ID for quick lookup.
let mut content_map: HashMap<i64, String> = HashMap::new();
for row in rows {
let id: i64 = row.try_get("id").unwrap_or(0);
let json_data: serde_json::Value =
row.try_get("data").unwrap_or(serde_json::Value::Null);
content_map.insert(id, json_data.to_string());
let id: i64 = row.try_get("id").unwrap_or_default();
let json_data: serde_json::Value = row.try_get("data").unwrap_or_default();
content_map.insert((table_name.clone(), id), json_data.to_string());
}
}
// Step 4: Build the final response, combining Tantivy scores with PG content.
let hits: Vec<Hit> = scored_ids
Ok(candidates
.into_iter()
.filter_map(|(score, pg_id)| {
content_map.get(&(pg_id as i64)).map(|content_json| Hit {
id: pg_id as i64,
.filter_map(|(score, pg_id, table_name)| {
content_map
.get(&(table_name.clone(), pg_id))
.map(|content_json| Hit {
id: pg_id,
score,
content_json: content_json.clone(),
table_name,
})
})
.collect();
.collect())
}
info!(
"--- SERVER: Successfully processed search. Returning {} hits. ---",
hits.len()
);
let response = SearchResponse { hits };
Ok(Response::new(response))
#[tonic::async_trait]
impl Searcher for SearcherService {
async fn search(
&self,
request: Request<SearchRequest>,
) -> Result<Response<SearchResponse>, Status> {
self.run_rpc(request).await
}
}

239
search/src/query_builder.rs Normal file
View File

@@ -0,0 +1,239 @@
use common::search::{
json_path_term, normalize_exact, tokenize_ngram, tokenize_word, SchemaFields,
};
use tantivy::query::{
BooleanQuery, BoostQuery, EmptyQuery, FuzzyTermQuery, Occur, PhraseQuery, Query, QueryParser,
TermQuery,
};
use tantivy::schema::{IndexRecordOption, Term};
use tantivy::Index;
use tonic::Status;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ConstraintMode {
Fuzzy,
Exact,
}
#[derive(Clone, Debug)]
pub struct SearchConstraint {
pub column: String,
pub query: String,
pub mode: ConstraintMode,
}
pub fn build_master_query(
index: &Index,
fields: &SchemaFields,
free_query: &str,
must: &[SearchConstraint],
table_filter: Option<&str>,
) -> Result<Box<dyn Query>, Status> {
let mut clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
let mut has_search_clause = false;
for constraint in must {
let predicate = match constraint.mode {
ConstraintMode::Exact => {
exact_predicate(fields, &constraint.column, &constraint.query)?
}
ConstraintMode::Fuzzy => {
fuzzy_predicate_scoped(fields, &constraint.column, &constraint.query)?
}
};
clauses.push((Occur::Must, predicate));
has_search_clause = true;
}
let free_words = tokenize_word(free_query);
if !free_words.is_empty() {
let predicate = fuzzy_predicate_unscoped(index, fields, &free_words)?;
clauses.push((Occur::Should, predicate));
has_search_clause = true;
}
if let Some(table_name) = table_filter {
let term = Term::from_field_text(fields.table_name, table_name);
clauses.push((
Occur::Must,
Box::new(TermQuery::new(term, IndexRecordOption::Basic)),
));
}
if !has_search_clause {
return Ok(Box::new(EmptyQuery));
}
Ok(Box::new(BooleanQuery::new(clauses)))
}
fn exact_predicate(
fields: &SchemaFields,
column: &str,
query: &str,
) -> Result<Box<dyn Query>, Status> {
let normalized_value = normalize_exact(query);
if normalized_value.is_empty() {
return Err(Status::invalid_argument(
"exact query is empty after normalization",
));
}
let term = json_path_term(fields.data_exact, column, &normalized_value);
Ok(Box::new(TermQuery::new(term, IndexRecordOption::Basic)))
}
fn fuzzy_predicate_scoped(
fields: &SchemaFields,
column: &str,
query: &str,
) -> Result<Box<dyn Query>, Status> {
let words = tokenize_word(query);
if words.is_empty() {
return Err(Status::invalid_argument(
"fuzzy query has no searchable tokens",
));
}
let mut layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
let mut per_word_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
for word in &words {
let term = json_path_term(fields.data_word, column, word);
let mut alternates: Vec<(Occur, Box<dyn Query>)> = Vec::new();
alternates.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(TermQuery::new(term.clone(), IndexRecordOption::WithFreqs)),
4.0,
)),
));
alternates.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(FuzzyTermQuery::new_prefix(term.clone(), 0, false)),
3.0,
)),
));
if let Some(distance) = fuzzy_distance(word.chars().count()) {
alternates.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(FuzzyTermQuery::new(term.clone(), distance, true)),
2.0,
)),
));
}
per_word_clauses.push((Occur::Must, Box::new(BooleanQuery::new(alternates))));
}
layers.push((Occur::Should, Box::new(BooleanQuery::new(per_word_clauses))));
if words.len() > 1 {
let phrase_terms: Vec<(usize, Term)> = words
.iter()
.enumerate()
.map(|(offset, word)| (offset, json_path_term(fields.data_word, column, word)))
.collect();
let phrase = PhraseQuery::new_with_offset_and_slop(phrase_terms, 3);
layers.push((
Occur::Should,
Box::new(BoostQuery::new(Box::new(phrase), 2.0)),
));
}
let ngrams = tokenize_ngram(query);
if !ngrams.is_empty() {
let ngram_clauses: Vec<(Occur, Box<dyn Query>)> = ngrams
.into_iter()
.map(|gram| {
let term = json_path_term(fields.data_ngram, column, &gram);
(
Occur::Must,
Box::new(TermQuery::new(term, IndexRecordOption::Basic)) as Box<dyn Query>,
)
})
.collect();
layers.push((
Occur::Should,
Box::new(BoostQuery::new(
Box::new(BooleanQuery::new(ngram_clauses)),
1.0,
)),
));
}
Ok(Box::new(BooleanQuery::new(layers)))
}
fn fuzzy_predicate_unscoped(
index: &Index,
fields: &SchemaFields,
words: &[String],
) -> Result<Box<dyn Query>, Status> {
let mut layers: Vec<(Occur, Box<dyn Query>)> = Vec::new();
{
let parser = QueryParser::for_index(index, vec![fields.data_word]);
let query_string = words
.iter()
.map(|word| format!("+{}*", word))
.collect::<Vec<_>>()
.join(" ");
if let Ok(query) = parser.parse_query(&query_string) {
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 4.0))));
}
}
{
let parser = QueryParser::for_index(index, vec![fields.data_word]);
let query_string = words
.iter()
.map(|word| match fuzzy_distance(word.chars().count()) {
Some(distance) => format!("+{}~{}", word, distance),
None => format!("+{}", word),
})
.collect::<Vec<_>>()
.join(" ");
if let Ok(query) = parser.parse_query(&query_string) {
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 2.0))));
}
}
if words.len() > 1 {
let parser = QueryParser::for_index(index, vec![fields.data_word]);
let query_string = format!("\"{}\"~3", words.join(" "));
if let Ok(query) = parser.parse_query(&query_string) {
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 2.0))));
}
}
{
let parser = QueryParser::for_index(index, vec![fields.data_ngram]);
let query_string = words
.iter()
.map(|word| format!("+{}", word))
.collect::<Vec<_>>()
.join(" ");
if let Ok(query) = parser.parse_query(&query_string) {
layers.push((Occur::Should, Box::new(BoostQuery::new(query, 1.0))));
}
}
if layers.is_empty() {
return Ok(Box::new(EmptyQuery));
}
Ok(Box::new(BooleanQuery::new(layers)))
}
fn fuzzy_distance(word_len: usize) -> Option<u8> {
match word_len {
0..=3 => None,
4..=6 => Some(1),
_ => Some(2),
}
}

2
server

Submodule server updated: 6b0c3e63b4...b178fce273

View File

@@ -0,0 +1,18 @@
[package]
name = "validation-core"
version.workspace = true
edition.workspace = true
license.workspace = true
authors.workspace = true
description = "Shared validation primitives, rules, and sets."
repository.workspace = true
[dependencies]
serde = { workspace = true }
thiserror = { workspace = true }
unicode-width = { workspace = true }
regex = { workspace = true, optional = true }
[features]
default = []
regex = ["dep:regex"]

View File

@@ -0,0 +1,493 @@
# Validation
This document is the frontend guide for the validation system.
The important idea: reusable validation is built from **rules** and **sets**.
The frontend creates and manages those. When a set is applied to a table field,
the server resolves it into the existing `FieldValidation` shape, and the form
runtime continues to work through the normal table-validation flow.
## Ownership
```mermaid
flowchart LR
Core[validation-core<br/>validation meaning<br/>rule/set merge rules]
Server[server<br/>stores rules/sets<br/>applies sets<br/>enforces writes]
Common[common/proto<br/>gRPC contract]
Client[client/frontend<br/>rule/set UI<br/>calls gRPC]
Canvas[canvas<br/>field editing<br/>mask display<br/>local feedback]
Server --> Core
Canvas --> Core
Client --> Common
Server --> Common
Client --> Canvas
```
`server` stores simple serializable settings. `validation-core` owns how those
settings combine. `canvas` uses resolved field validation to guide editing.
## Terms
| Term | Meaning |
| --- | --- |
| `FieldValidation` | Existing per-column validation config from `common/proto/table_validation.proto`. This is what forms/canvas already consume. |
| `ValidationRule` | One named reusable fragment, for example `digits-only`, `phone-length`, or `required`. Stored by the server as a `FieldValidation` fragment with no meaningful `dataKey`. |
| `ValidationSet` | Ordered collection of rule names, for example `phone = [required, phone-length, digits-only, phone-mask]`. |
| Applied validation | A resolved snapshot of a set written to `table_validation_rules` for a concrete `(table, dataKey)`. |
| Snapshot | Applying a set copies the resolved config to a field. Later edits to the set do not automatically update fields that were already applied. |
## What Backend Enforces
Backend write validation enforces only server-relevant parts:
| FieldValidation part | Backend | Canvas/frontend |
| --- | --- | --- |
| `required` | Yes | Yes |
| `limits` | Yes | Yes |
| `pattern` | Yes | Yes |
| `allowed_values` | Yes | Yes |
| `mask` | Partly: raw value length/literals | Yes: display/editing mask |
| `formatter` | No | Yes |
| `external_validation_enabled` | No | Yes/UI hint |
`mask` is visual metadata, but the backend still uses it to reject incorrectly
submitted raw values. Example: if the mask is `(###) ###-####`, the backend
expects the stored value to be raw digits, not `(123) 456-7890`.
## Main User Flow
```mermaid
sequenceDiagram
participant UI as Frontend UI
participant API as TableValidationService
participant DB as Server DB
participant Form as Existing Form Runtime
UI->>API: UpsertValidationRule(required)
UI->>API: UpsertValidationRule(digits-only)
UI->>API: UpsertValidationRule(phone-length)
UI->>API: UpsertValidationSet(phone: [required, phone-length, digits-only])
UI->>API: ApplyValidationSet(profile, table, dataKey, phone)
API->>DB: write resolved FieldValidation snapshot
Form->>API: GetTableValidation(profile, table)
API->>Form: resolved FieldValidation for dataKey
```
After `ApplyValidationSet`, the existing form code does not need to know that a
set was used. It receives normal `FieldValidation`.
## API
All APIs live on `TableValidationService`.
### Rules
Create or update one reusable rule:
```text
UpsertValidationRule(UpsertValidationRuleRequest)
```
Request shape:
```text
profileName: string
rule:
name: string
description: optional string
validation: FieldValidation
```
Frontend rules:
- `rule.name` is required and unique inside a profile.
- `rule.validation.dataKey` is ignored by the server.
- A rule should usually configure one logical fragment.
- Examples: `required`, `phone-length`, `digits-only`, `phone-mask`.
List rules:
```text
ListValidationRules({ profileName })
```
Delete rule:
```text
DeleteValidationRule({ profileName, name })
```
Deleting a rule removes it from future reusable composition. Already applied
field snapshots are not changed.
### Sets
Create or update one reusable set:
```text
UpsertValidationSet(UpsertValidationSetRequest)
```
Request shape:
```text
profileName: string
set:
name: string
description: optional string
ruleNames: repeated string
```
Frontend rules:
- `set.name` is required and unique inside a profile.
- `ruleNames` must contain at least one rule.
- `ruleNames` are ordered.
- Every rule name must already exist.
- Duplicate rule names in the same set are rejected.
- Conflicting singleton fragments are rejected.
Singleton fragments are:
```text
limits
allowed_values
mask
formatter
```
That means a set cannot currently contain two rules that both define `limits`.
Pattern rules are additive: multiple rules with `pattern` are merged into one
combined pattern.
List sets:
```text
ListValidationSets({ profileName })
```
Response includes each set plus `resolvedValidation`, so the frontend can show
what the set expands to.
Delete set:
```text
DeleteValidationSet({ profileName, name })
```
Deleting a set does not change already applied fields.
### Apply Set To Field
Apply a reusable set to one field:
```text
ApplyValidationSet(ApplyValidationSetRequest)
```
Request shape:
```text
profileName: string
tableName: string
dataKey: string
setName: string
```
Server behavior:
1. Loads the set.
2. Loads its ordered rules.
3. Resolves/merges them through `validation-core`.
4. Validates that `dataKey` exists in the table definition.
5. Writes the resolved config into existing `table_validation_rules`.
This is a snapshot. If the user later edits the `phone` set, fields that already
used `phone` keep their old resolved config until the set is applied again.
## FieldValidation Guide
Rules and direct field validation both use `FieldValidation`.
### Required
```text
required: true
```
Backend rejects missing or empty values.
### Limits
```text
limits:
min: 10
max: 10
warnAt: optional
countMode: CHARS | BYTES | DISPLAY_WIDTH
```
Backend enforces `min` and `max`. `warnAt` is mainly UI feedback.
### Pattern
Pattern rules validate characters at positions.
Example digits-only:
```text
pattern:
rules:
- position:
kind: PATTERN_POSITION_FROM
start: 0
constraint:
kind: CHARACTER_CONSTRAINT_NUMERIC
```
Useful constraints:
```text
CHARACTER_CONSTRAINT_ALPHABETIC
CHARACTER_CONSTRAINT_NUMERIC
CHARACTER_CONSTRAINT_ALPHANUMERIC
CHARACTER_CONSTRAINT_EXACT
CHARACTER_CONSTRAINT_ONE_OF
CHARACTER_CONSTRAINT_REGEX
```
Pattern fragments from multiple rules are merged.
### Allowed Values
```text
allowed_values:
values: ["open", "closed"]
allow_empty: false
case_insensitive: true
```
Backend rejects values not in the list.
### Mask
```text
mask:
pattern: "(###) ###-####"
input_char: "#"
template_char: "_"
```
Canvas uses this for display/editing. Backend expects raw values without mask
literals.
### Formatter
```text
formatter:
type: "PhoneFormatter"
options: []
description: optional
```
Formatter is resolved client-side. Backend stores it but does not execute it.
### External Validation
```text
external_validation_enabled: true
```
This is a frontend/UI hint. Backend stores it but does not perform external
validation.
## Recommended Frontend Screens
### Rule List
Show all rules for a profile.
Actions:
```text
create rule
edit rule
delete rule
preview rule config
```
### Rule Editor
Build a `ValidationRuleDefinition`.
Recommended UI:
```text
name
description
required toggle
limits section
pattern section
allowed values section
mask section
formatter section
external validation toggle
```
For v1, encourage one fragment per rule. Example: create `phone-length` and
`digits-only` separately, instead of one huge rule.
### Set List
Show all sets for a profile.
Use `ListValidationSets`, because it returns `resolvedValidation`.
Actions:
```text
create set
edit set
delete set
preview resolved validation
```
### Set Editor
Build a `ValidationSetDefinition`.
Recommended UI:
```text
name
description
ordered rule picker
resolved preview
```
When rule ordering changes, call `UpsertValidationSet` and then refresh
`ListValidationSets`.
### Apply Set
On the table/field validation screen, add:
```text
Apply validation set
```
Flow:
1. Load sets with `ListValidationSets`.
2. User selects a set.
3. Call `ApplyValidationSet(profileName, tableName, dataKey, setName)`.
4. Refresh `GetTableValidation(profileName, tableName)`.
The field should now behave exactly like a directly configured field validation.
## Example: Phone
Create rule `required`:
```text
validation:
required: true
```
Create rule `phone-length`:
```text
validation:
limits:
min: 10
max: 10
countMode: CHARS
```
Create rule `digits-only`:
```text
validation:
pattern:
rules:
- position:
kind: PATTERN_POSITION_FROM
start: 0
constraint:
kind: CHARACTER_CONSTRAINT_NUMERIC
```
Create rule `phone-mask`:
```text
validation:
mask:
pattern: "(###) ###-####"
input_char: "#"
```
Create set `phone`:
```text
ruleNames:
- required
- phone-length
- digits-only
- phone-mask
```
Apply set:
```text
profileName: "default"
tableName: "customers"
dataKey: "customer_phone"
setName: "phone"
```
Then refresh:
```text
GetTableValidation(default, customers)
```
The response contains a normal `FieldValidation` for `customer_phone`.
## Important UX Notes
- Applying a set is not a live link.
- Editing a rule or set does not mutate fields where it was already applied.
- To update a field after set changes, apply the set again.
- If a set has conflicting singleton rules, the server rejects it.
- For now, the system does not store field metadata like `sourceSetName` on
applied fields. The field only stores the resolved validation snapshot.
## Files
Core model:
```text
validation-core/src/set.rs
validation-core/src/config.rs
```
Wire contract:
```text
common/proto/table_validation.proto
```
Server implementation:
```text
server/src/table_validation/get/service.rs
server/src/table_validation/post/repo.rs
server/src/table_validation/config.rs
```
Storage:
```text
server/migrations/20260506170000_create_validation_rules_and_sets.sql
```

View File

@@ -0,0 +1,311 @@
use crate::rules::{
CharacterFilter, CharacterLimits, DisplayMask, PatternFilters, PositionFilter, PositionRange,
};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use thiserror::Error;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AllowedValues {
pub values: Vec<String>,
pub allow_empty: bool,
pub case_insensitive: bool,
}
impl AllowedValues {
pub fn new(values: Vec<String>) -> Self {
Self {
values,
allow_empty: true,
case_insensitive: false,
}
}
pub fn allow_empty(mut self, allow_empty: bool) -> Self {
self.allow_empty = allow_empty;
self
}
pub fn case_insensitive(mut self, case_insensitive: bool) -> Self {
self.case_insensitive = case_insensitive;
self
}
pub fn matches(&self, text: &str) -> bool {
if self.case_insensitive {
self.values
.iter()
.any(|allowed| allowed.eq_ignore_ascii_case(text))
} else {
self.values.iter().any(|allowed| allowed == text)
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FormatterSettings {
pub formatter_type: String,
pub options: Vec<FormatterOption>,
pub description: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FormatterOption {
pub key: String,
pub value: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum CharacterFilterSettings {
Alphabetic,
Numeric,
Alphanumeric,
Exact(char),
OneOf(Vec<char>),
Regex(String),
}
impl CharacterFilterSettings {
pub fn resolve(&self) -> CharacterFilter {
match self {
Self::Alphabetic => CharacterFilter::Alphabetic,
Self::Numeric => CharacterFilter::Numeric,
Self::Alphanumeric => CharacterFilter::Alphanumeric,
Self::Exact(ch) => CharacterFilter::Exact(*ch),
Self::OneOf(chars) => CharacterFilter::OneOf(chars.clone()),
Self::Regex(pattern) => {
#[cfg(feature = "regex")]
{
match regex::Regex::new(pattern) {
Ok(regex) => CharacterFilter::Custom(Arc::new(move |ch| {
regex.is_match(&ch.to_string())
})),
Err(_) => CharacterFilter::Custom(Arc::new(|_| false)),
}
}
#[cfg(not(feature = "regex"))]
{
let _ = pattern;
CharacterFilter::Custom(Arc::new(|_| false))
}
}
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PositionFilterSettings {
pub positions: PositionRange,
pub filter: CharacterFilterSettings,
}
impl PositionFilterSettings {
pub fn resolve(&self) -> PositionFilter {
PositionFilter::new(self.positions.clone(), self.filter.resolve())
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct PatternSettings {
pub filters: Vec<PositionFilterSettings>,
pub description: Option<String>,
}
impl PatternSettings {
pub fn resolve(&self) -> PatternFilters {
PatternFilters::new().add_filters(
self.filters
.iter()
.map(PositionFilterSettings::resolve)
.collect(),
)
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ValidationSettings {
pub required: bool,
pub character_limits: Option<CharacterLimits>,
pub pattern: Option<PatternSettings>,
pub allowed_values: Option<AllowedValues>,
pub display_mask: Option<DisplayMask>,
pub formatter: Option<FormatterSettings>,
pub external_validation_enabled: bool,
}
impl ValidationSettings {
pub fn resolve(&self) -> ValidationConfig {
ValidationConfig {
required: self.required,
character_limits: self.character_limits.clone(),
pattern_filters: self.pattern.as_ref().map(PatternSettings::resolve),
allowed_values: self.allowed_values.clone(),
display_mask: self.display_mask.clone(),
formatter: self.formatter.clone(),
external_validation_enabled: self.external_validation_enabled,
}
}
pub fn merge_rules<'a>(
rules: impl IntoIterator<Item = &'a ValidationSettings>,
) -> Result<Self, ValidationMergeError> {
let mut merged = ValidationSettings::default();
for rule in rules {
merged.merge_rule(rule)?;
}
Ok(merged)
}
pub fn merge_rule(&mut self, rule: &ValidationSettings) -> Result<(), ValidationMergeError> {
self.required |= rule.required;
self.external_validation_enabled |= rule.external_validation_enabled;
merge_singleton(
"character_limits",
&mut self.character_limits,
&rule.character_limits,
)?;
merge_singleton(
"allowed_values",
&mut self.allowed_values,
&rule.allowed_values,
)?;
merge_singleton("display_mask", &mut self.display_mask, &rule.display_mask)?;
merge_singleton("formatter", &mut self.formatter, &rule.formatter)?;
if let Some(pattern) = &rule.pattern {
match &mut self.pattern {
Some(existing) => {
existing.filters.extend(pattern.filters.clone());
if existing.description.is_none() {
existing.description = pattern.description.clone();
}
}
None => self.pattern = Some(pattern.clone()),
}
}
Ok(())
}
}
fn merge_singleton<T: Clone>(
field_name: &'static str,
target: &mut Option<T>,
source: &Option<T>,
) -> Result<(), ValidationMergeError> {
if let Some(source) = source {
if target.is_some() {
return Err(ValidationMergeError::DuplicateSingleton { field_name });
}
*target = Some(source.clone());
}
Ok(())
}
#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum ValidationMergeError {
#[error("validation set contains more than one rule configuring {field_name}")]
DuplicateSingleton { field_name: &'static str },
}
#[derive(Debug, Clone, Default)]
pub struct ValidationConfig {
pub required: bool,
pub character_limits: Option<CharacterLimits>,
pub pattern_filters: Option<PatternFilters>,
pub allowed_values: Option<AllowedValues>,
pub display_mask: Option<DisplayMask>,
pub formatter: Option<FormatterSettings>,
pub external_validation_enabled: bool,
}
impl ValidationConfig {
pub fn validate_content(&self, text: &str) -> ValidationResult {
if text.is_empty() {
if self.required {
return ValidationResult::error("Value required");
}
if let Some(allowed_values) = &self.allowed_values {
if !allowed_values.allow_empty {
return ValidationResult::error("Empty value is not allowed");
}
}
return ValidationResult::Valid;
}
if let Some(limits) = &self.character_limits {
if let Some(result) = limits.validate_content(text) {
if !result.is_acceptable() {
return result;
}
}
}
if let Some(pattern_filters) = &self.pattern_filters {
if let Err(message) = pattern_filters.validate_text(text) {
return ValidationResult::error(message);
}
}
if let Some(allowed_values) = &self.allowed_values {
if !allowed_values.matches(text) {
return ValidationResult::error("Value must be one of the allowed options");
}
}
ValidationResult::Valid
}
pub fn has_validation(&self) -> bool {
self.required
|| self.character_limits.is_some()
|| self.pattern_filters.is_some()
|| self.allowed_values.is_some()
|| self.display_mask.is_some()
|| self.formatter.is_some()
|| self.external_validation_enabled
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ValidationResult {
Valid,
Warning { message: String },
Error { message: String },
}
impl ValidationResult {
pub fn is_acceptable(&self) -> bool {
matches!(self, Self::Valid | Self::Warning { .. })
}
pub fn is_error(&self) -> bool {
matches!(self, Self::Error { .. })
}
pub fn message(&self) -> Option<&str> {
match self {
Self::Valid => None,
Self::Warning { message } | Self::Error { message } => Some(message),
}
}
pub fn warning(message: impl Into<String>) -> Self {
Self::Warning {
message: message.into(),
}
}
pub fn error(message: impl Into<String>) -> Self {
Self::Error {
message: message.into(),
}
}
}

View File

@@ -0,0 +1,14 @@
pub mod config;
pub mod rules;
pub mod set;
pub use config::{
AllowedValues, CharacterFilterSettings, FormatterOption, FormatterSettings, PatternSettings,
PositionFilterSettings, ValidationConfig, ValidationMergeError, ValidationResult,
ValidationSettings,
};
pub use rules::{
count_text, CharacterFilter, CharacterLimits, CountMode, DisplayMask, LimitCheckResult,
MaskDisplayMode, PatternFilters, PositionFilter, PositionRange,
};
pub use set::{AppliedValidation, ValidationRule, ValidationSet};

View File

@@ -0,0 +1,452 @@
// src/validation/limits.rs
//! Character limits validation implementation
use crate::ValidationResult;
use serde::{Deserialize, Serialize};
use unicode_width::UnicodeWidthStr;
/// Character limits configuration for a field
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CharacterLimits {
/// Maximum number of characters allowed (None = unlimited)
max_length: Option<usize>,
/// Minimum number of characters required (None = no minimum)
min_length: Option<usize>,
/// Warning threshold (warn when approaching max limit)
warning_threshold: Option<usize>,
/// Count mode: characters vs display width
count_mode: CountMode,
}
/// How to count characters for limit checking
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
pub enum CountMode {
/// Count actual characters (default)
#[default]
Characters,
/// Count display width (useful for CJK characters)
DisplayWidth,
/// Count bytes (rarely used, but available)
Bytes,
}
/// Result of a character limit check
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LimitCheckResult {
/// Within limits
Ok,
/// Approaching limit (warning)
Warning { current: usize, max: usize },
/// At or exceeding limit (error)
Exceeded { current: usize, max: usize },
/// Below minimum length
TooShort { current: usize, min: usize },
}
impl CharacterLimits {
/// Create new character limits with just max length
pub fn new(max_length: usize) -> Self {
Self {
max_length: Some(max_length),
min_length: None,
warning_threshold: None,
count_mode: CountMode::default(),
}
}
/// Create new character limits with min and max
pub fn new_range(min_length: usize, max_length: usize) -> Self {
Self {
max_length: Some(max_length),
min_length: Some(min_length),
warning_threshold: None,
count_mode: CountMode::default(),
}
}
/// Create new character limits with just minimum length
pub fn new_min(min_length: usize) -> Self {
Self {
max_length: None,
min_length: Some(min_length),
warning_threshold: None,
count_mode: CountMode::default(),
}
}
/// Create new character limits with only a warning threshold.
pub fn new_warning(threshold: usize) -> Self {
Self {
max_length: None,
min_length: None,
warning_threshold: Some(threshold),
count_mode: CountMode::default(),
}
}
/// Set warning threshold (when to show warning before hitting limit)
pub fn with_warning_threshold(mut self, threshold: usize) -> Self {
self.warning_threshold = Some(threshold);
self
}
/// Set count mode (characters vs display width vs bytes)
pub fn with_count_mode(mut self, mode: CountMode) -> Self {
self.count_mode = mode;
self
}
/// Get maximum length
pub fn max_length(&self) -> Option<usize> {
self.max_length
}
/// Get minimum length
pub fn min_length(&self) -> Option<usize> {
self.min_length
}
/// Get warning threshold
pub fn warning_threshold(&self) -> Option<usize> {
self.warning_threshold
}
/// Get count mode
pub fn count_mode(&self) -> CountMode {
self.count_mode
}
/// Count characters/width/bytes according to the configured mode
fn count(&self, text: &str) -> usize {
match self.count_mode {
CountMode::Characters => text.chars().count(),
CountMode::DisplayWidth => text.width(),
CountMode::Bytes => text.len(),
}
}
/// Check if inserting a character would exceed limits
pub fn validate_insertion(
&self,
current_text: &str,
position: usize,
character: char,
) -> Option<ValidationResult> {
let mut new_text = String::with_capacity(current_text.len() + character.len_utf8());
let mut chars = current_text.chars();
let clamped_pos = position.min(current_text.chars().count());
for _ in 0..clamped_pos {
if let Some(ch) = chars.next() {
new_text.push(ch);
}
}
new_text.push(character);
for ch in chars {
new_text.push(ch);
}
let new_count = self.count(&new_text);
let current_count = self.count(current_text);
if let Some(max) = self.max_length {
if new_count > max {
return Some(ValidationResult::error(format!(
"Character limit exceeded: {new_count}/{max}"
)));
}
if let Some(warning_threshold) = self.warning_threshold {
if new_count >= warning_threshold && current_count < warning_threshold {
return Some(ValidationResult::warning(format!(
"Approaching character limit: {new_count}/{max}"
)));
}
}
}
None // No validation issues
}
/// Validate the current content
pub fn validate_content(&self, text: &str) -> Option<ValidationResult> {
let count = self.count(text);
if let Some(min) = self.min_length {
if count < min {
return Some(ValidationResult::warning(format!(
"Minimum length not met: {count}/{min}"
)));
}
}
if let Some(max) = self.max_length {
if count > max {
return Some(ValidationResult::error(format!(
"Character limit exceeded: {count}/{max}"
)));
}
if let Some(warning_threshold) = self.warning_threshold {
if count >= warning_threshold {
return Some(ValidationResult::warning(format!(
"Approaching character limit: {count}/{max}"
)));
}
}
}
None // No validation issues
}
/// Get the current status of the text against limits
pub fn check_limits(&self, text: &str) -> LimitCheckResult {
let count = self.count(text);
if let Some(max) = self.max_length {
if count > max {
return LimitCheckResult::Exceeded {
current: count,
max,
};
}
if let Some(warning_threshold) = self.warning_threshold {
if count >= warning_threshold {
return LimitCheckResult::Warning {
current: count,
max,
};
}
}
}
// Check min length
if let Some(min) = self.min_length {
if count < min {
return LimitCheckResult::TooShort {
current: count,
min,
};
}
}
LimitCheckResult::Ok
}
/// Get a human-readable status string
pub fn status_text(&self, text: &str) -> Option<String> {
match self.check_limits(text) {
LimitCheckResult::Ok => {
// Show current/max if we have a max limit
self.max_length
.map(|max| format!("{}/{}", self.count(text), max))
}
LimitCheckResult::Warning { current, max } => {
Some(format!("{current}/{max} (approaching limit)"))
}
LimitCheckResult::Exceeded { current, max } => {
Some(format!("{current}/{max} (exceeded)"))
}
LimitCheckResult::TooShort { current, min } => Some(format!("{current}/{min} minimum")),
}
}
pub fn allows_field_switch(&self, text: &str) -> bool {
if let Some(min) = self.min_length {
let count = self.count(text);
// Allow switching if field is empty OR meets minimum requirement
count == 0 || count >= min
} else {
true // No minimum requirement, always allow switching
}
}
/// Get reason why field switching is not allowed (if any)
pub fn field_switch_block_reason(&self, text: &str) -> Option<String> {
if let Some(min) = self.min_length {
let count = self.count(text);
if count > 0 && count < min {
return Some(format!(
"Field must be empty or have at least {min} characters (currently: {count})"
));
}
}
None
}
}
pub fn count_text(text: &str, mode: CountMode) -> usize {
match mode {
CountMode::Characters => text.chars().count(),
CountMode::DisplayWidth => text.width(),
CountMode::Bytes => text.len(),
}
}
impl Default for CharacterLimits {
fn default() -> Self {
Self {
max_length: Some(30), // Default 30 character limit as specified
min_length: None,
warning_threshold: None,
count_mode: CountMode::default(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_character_limits_creation() {
let limits = CharacterLimits::new(10);
assert_eq!(limits.max_length(), Some(10));
assert_eq!(limits.min_length(), None);
let range_limits = CharacterLimits::new_range(5, 15);
assert_eq!(range_limits.min_length(), Some(5));
assert_eq!(range_limits.max_length(), Some(15));
}
#[test]
fn test_default_limits() {
let limits = CharacterLimits::default();
assert_eq!(limits.max_length(), Some(30));
}
#[test]
fn test_character_counting() {
let limits = CharacterLimits::new(5);
// Test character mode (default)
assert_eq!(limits.count("hello"), 5);
assert_eq!(limits.count("héllo"), 5); // Accented character counts as 1
// Test display width mode
let limits = limits.with_count_mode(CountMode::DisplayWidth);
assert_eq!(limits.count("hello"), 5);
// Test bytes mode
let limits = limits.with_count_mode(CountMode::Bytes);
assert_eq!(limits.count("hello"), 5);
assert_eq!(limits.count("héllo"), 6); // é takes 2 bytes in UTF-8
}
#[test]
fn test_insertion_validation() {
let limits = CharacterLimits::new(5);
// Valid insertion
let result = limits.validate_insertion("test", 4, 'x');
assert!(result.is_none()); // No validation issues
// Invalid insertion (would exceed limit)
let result = limits.validate_insertion("tests", 5, 'x');
assert!(result.is_some());
assert!(!result.unwrap().is_acceptable());
}
#[test]
fn test_content_validation() {
let limits = CharacterLimits::new_range(3, 10);
// Too short
let result = limits.validate_content("hi");
assert!(result.is_some());
assert!(result.unwrap().is_acceptable()); // Warning, not error
// Just right
let result = limits.validate_content("hello");
assert!(result.is_none());
// Too long
let result = limits.validate_content("hello world!");
assert!(result.is_some());
assert!(!result.unwrap().is_acceptable()); // Error
}
#[test]
fn test_warning_threshold() {
let limits = CharacterLimits::new(10).with_warning_threshold(8);
// Below warning threshold
let result = limits.validate_insertion("123456", 6, 'x');
assert!(result.is_none());
// At warning threshold
let result = limits.validate_insertion("1234567", 7, 'x');
assert!(result.is_some()); // This brings us to 8 chars
assert!(result.unwrap().is_acceptable()); // Warning, not error
let result = limits.validate_insertion("12345678", 8, 'x');
assert!(result.is_none());
}
#[test]
fn test_status_text() {
let limits = CharacterLimits::new(10);
assert_eq!(limits.status_text("hello"), Some("5/10".to_string()));
let limits = limits.with_warning_threshold(8);
assert_eq!(
limits.status_text("12345678"),
Some("8/10 (approaching limit)".to_string())
);
assert_eq!(
limits.status_text("1234567890x"),
Some("11/10 (exceeded)".to_string())
);
}
#[test]
fn test_field_switch_blocking() {
let limits = CharacterLimits::new_range(3, 10);
// Empty field: should allow switching
assert!(limits.allows_field_switch(""));
assert!(limits.field_switch_block_reason("").is_none());
// Field with content below minimum: should block switching
assert!(!limits.allows_field_switch("hi"));
assert!(limits.field_switch_block_reason("hi").is_some());
assert!(limits
.field_switch_block_reason("hi")
.unwrap()
.contains("at least 3 characters"));
// Field meeting minimum: should allow switching
assert!(limits.allows_field_switch("hello"));
assert!(limits.field_switch_block_reason("hello").is_none());
// Field exceeding maximum: should still allow switching (validation shows error but doesn't block)
assert!(limits.allows_field_switch("this is way too long"));
assert!(limits
.field_switch_block_reason("this is way too long")
.is_none());
}
#[test]
fn test_field_switch_no_minimum() {
let limits = CharacterLimits::new(10); // Only max, no minimum
// Should always allow switching when there's no minimum
assert!(limits.allows_field_switch(""));
assert!(limits.allows_field_switch("a"));
assert!(limits.allows_field_switch("hello"));
assert!(limits.field_switch_block_reason("").is_none());
assert!(limits.field_switch_block_reason("a").is_none());
}
}

View File

@@ -0,0 +1,348 @@
// src/validation/mask.rs
//! Pure display mask system - user-defined patterns only
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub enum MaskDisplayMode {
/// Only show separators as user types
/// Example: "" → "", "123" → "123", "12345" → "(123) 45"
#[default]
Dynamic,
/// Show full template with placeholders from start
/// Example: "" → "(___) ___-____", "123" → "(123) ___-____"
Template {
/// Character to use as placeholder for empty input positions
placeholder: char,
},
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DisplayMask {
/// Mask pattern like "##-##-####" where # = input position, others are visual separators
pattern: String,
/// Character used to represent input positions (usually '#')
input_char: char,
/// How to display the mask (dynamic vs template)
display_mode: MaskDisplayMode,
}
impl DisplayMask {
/// Create a new display mask with dynamic mode (current behavior)
///
/// # Arguments
/// * `pattern` - The mask pattern (e.g., "##-##-####", "(###) ###-####")
/// * `input_char` - Character representing input positions (usually '#')
///
/// # Examples
/// ```
/// use validation_core::DisplayMask;
///
/// // Phone number format
/// let phone_mask = DisplayMask::new("(###) ###-####", '#');
///
/// // Date format
/// let date_mask = DisplayMask::new("##/##/####", '#');
///
/// // Custom business format
/// let employee_id = DisplayMask::new("EMP-####-##", '#');
/// ```
pub fn new(pattern: impl Into<String>, input_char: char) -> Self {
Self {
pattern: pattern.into(),
input_char,
display_mode: MaskDisplayMode::Dynamic,
}
}
/// Set the display mode for this mask
///
/// # Examples
/// ```
/// use validation_core::{DisplayMask, MaskDisplayMode};
///
/// let dynamic_mask = DisplayMask::new("##-##", '#')
/// .with_mode(MaskDisplayMode::Dynamic);
///
/// let template_mask = DisplayMask::new("##-##", '#')
/// .with_mode(MaskDisplayMode::Template { placeholder: '_' });
/// ```
pub fn with_mode(mut self, mode: MaskDisplayMode) -> Self {
self.display_mode = mode;
self
}
/// Set template mode with custom placeholder
///
/// # Examples
/// ```
/// use validation_core::DisplayMask;
///
/// let phone_template = DisplayMask::new("(###) ###-####", '#')
/// .with_template('_'); // Shows "(___) ___-____" when empty
///
/// let date_dots = DisplayMask::new("##/##/####", '#')
/// .with_template('•'); // Shows "••/••/••••" when empty
/// ```
pub fn with_template(self, placeholder: char) -> Self {
self.with_mode(MaskDisplayMode::Template { placeholder })
}
/// Apply mask to raw input, showing visual separators and handling display mode
pub fn apply_to_display(&self, raw_input: &str) -> String {
match &self.display_mode {
MaskDisplayMode::Dynamic => self.apply_dynamic(raw_input),
MaskDisplayMode::Template { placeholder } => {
self.apply_template(raw_input, *placeholder)
}
}
}
/// Dynamic mode - only show separators as user types
fn apply_dynamic(&self, raw_input: &str) -> String {
if raw_input.is_empty() {
return String::new();
}
let mut result = String::new();
let mut raw_chars = raw_input.chars();
for pattern_char in self.pattern.chars() {
if pattern_char == self.input_char {
// Input position - take from raw input
if let Some(input_char) = raw_chars.next() {
result.push(input_char);
} else {
// No more input - stop here in dynamic mode
break;
}
} else {
// Visual separator - always show
result.push(pattern_char);
}
}
// Append any remaining raw characters that don't fit the pattern
for remaining_char in raw_chars {
result.push(remaining_char);
}
result
}
/// Template mode - show full pattern with placeholders
fn apply_template(&self, raw_input: &str, placeholder: char) -> String {
let mut result = String::new();
let mut raw_chars = raw_input.chars().peekable();
for pattern_char in self.pattern.chars() {
if pattern_char == self.input_char {
// Input position - take from raw input or use placeholder
if let Some(input_char) = raw_chars.next() {
result.push(input_char);
} else {
// No more input - use placeholder to show template
result.push(placeholder);
}
} else {
// Visual separator - always show in template mode
result.push(pattern_char);
}
}
// In template mode, we don't append extra characters beyond the pattern
// This keeps the template consistent
result
}
/// Check if a display position should accept cursor/input
pub fn is_input_position(&self, display_position: usize) -> bool {
self.pattern
.chars()
.nth(display_position)
.map(|c| c == self.input_char)
.unwrap_or(true) // Beyond pattern = accept input
}
/// Map display position to raw position
pub fn display_pos_to_raw_pos(&self, display_pos: usize) -> usize {
let mut raw_pos = 0;
for (i, pattern_char) in self.pattern.chars().enumerate() {
if i >= display_pos {
break;
}
if pattern_char == self.input_char {
raw_pos += 1;
}
}
raw_pos
}
/// Map raw position to display position
pub fn raw_pos_to_display_pos(&self, raw_pos: usize) -> usize {
let mut input_positions_seen = 0;
for (display_pos, pattern_char) in self.pattern.chars().enumerate() {
if pattern_char == self.input_char {
if input_positions_seen == raw_pos {
return display_pos;
}
input_positions_seen += 1;
}
}
// Beyond pattern, return position after pattern
self.pattern.len() + (raw_pos - input_positions_seen)
}
/// Find next input position at or after the given display position
pub fn next_input_position(&self, display_pos: usize) -> usize {
for (i, pattern_char) in self.pattern.chars().enumerate().skip(display_pos) {
if pattern_char == self.input_char {
return i;
}
}
// Beyond pattern = all positions are input positions
display_pos.max(self.pattern.len())
}
/// Find previous input position at or before the given display position
pub fn prev_input_position(&self, display_pos: usize) -> Option<usize> {
// Collect pattern chars with indices first, then search backwards
let pattern_chars: Vec<(usize, char)> = self.pattern.chars().enumerate().collect();
// Search backwards from display_pos
for &(i, pattern_char) in pattern_chars.iter().rev() {
if i <= display_pos && pattern_char == self.input_char {
return Some(i);
}
}
None
}
/// Get the display mode
pub fn display_mode(&self) -> &MaskDisplayMode {
&self.display_mode
}
/// Check if this mask uses template mode
pub fn is_template_mode(&self) -> bool {
matches!(self.display_mode, MaskDisplayMode::Template { .. })
}
/// Get the pattern string
pub fn pattern(&self) -> &str {
&self.pattern
}
/// Get the input placeholder character
pub fn input_char(&self) -> char {
self.input_char
}
/// Get the position of the first input character in the pattern
pub fn first_input_position(&self) -> usize {
for (pos, ch) in self.pattern.chars().enumerate() {
if ch == self.input_char {
return pos;
}
}
0
}
}
impl Default for DisplayMask {
fn default() -> Self {
Self::new("", '#')
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_user_defined_phone_mask() {
// User creates their own phone mask
let dynamic = DisplayMask::new("(###) ###-####", '#');
let template = DisplayMask::new("(###) ###-####", '#').with_template('_');
// Dynamic mode
assert_eq!(dynamic.apply_to_display(""), "");
assert_eq!(dynamic.apply_to_display("1234567890"), "(123) 456-7890");
// Template mode
assert_eq!(template.apply_to_display(""), "(___) ___-____");
assert_eq!(template.apply_to_display("123"), "(123) ___-____");
}
#[test]
fn test_user_defined_date_mask() {
// User creates their own date formats
let us_date = DisplayMask::new("##/##/####", '#');
let eu_date = DisplayMask::new("##.##.####", '#');
let iso_date = DisplayMask::new("####-##-##", '#');
assert_eq!(us_date.apply_to_display("12252024"), "12/25/2024");
assert_eq!(eu_date.apply_to_display("25122024"), "25.12.2024");
assert_eq!(iso_date.apply_to_display("20241225"), "2024-12-25");
}
#[test]
fn test_user_defined_business_formats() {
// User creates custom business formats
let employee_id = DisplayMask::new("EMP-####-##", '#');
let product_code = DisplayMask::new("###-###-###", '#');
let invoice = DisplayMask::new("INV####/##", '#');
assert_eq!(employee_id.apply_to_display("123456"), "EMP-1234-56");
assert_eq!(product_code.apply_to_display("123456789"), "123-456-789");
assert_eq!(invoice.apply_to_display("123456"), "INV1234/56");
}
#[test]
fn test_custom_input_characters() {
// User can define their own input character
let mask_with_x = DisplayMask::new("XXX-XX-XXXX", 'X');
let mask_with_hash = DisplayMask::new("###-##-####", '#');
let mask_with_n = DisplayMask::new("NNN-NN-NNNN", 'N');
assert_eq!(mask_with_x.apply_to_display("123456789"), "123-45-6789");
assert_eq!(mask_with_hash.apply_to_display("123456789"), "123-45-6789");
assert_eq!(mask_with_n.apply_to_display("123456789"), "123-45-6789");
}
#[test]
fn test_custom_placeholders() {
// User can define custom placeholder characters
let underscores = DisplayMask::new("##-##", '#').with_template('_');
let dots = DisplayMask::new("##-##", '#').with_template('•');
let dashes = DisplayMask::new("##-##", '#').with_template('-');
assert_eq!(underscores.apply_to_display(""), "__-__");
assert_eq!(dots.apply_to_display(""), "••-••");
assert_eq!(dashes.apply_to_display(""), "-----"); // Note: dashes blend with separator
}
#[test]
fn test_position_mapping_user_patterns() {
let custom = DisplayMask::new("ABC-###-XYZ", '#');
// Position mapping should work correctly with any pattern
assert_eq!(custom.raw_pos_to_display_pos(0), 4); // First # at position 4
assert_eq!(custom.raw_pos_to_display_pos(1), 5); // Second # at position 5
assert_eq!(custom.raw_pos_to_display_pos(2), 6); // Third # at position 6
assert_eq!(custom.display_pos_to_raw_pos(4), 0); // Position 4 -> first input
assert_eq!(custom.display_pos_to_raw_pos(5), 1); // Position 5 -> second input
assert_eq!(custom.display_pos_to_raw_pos(6), 2); // Position 6 -> third input
assert!(!custom.is_input_position(0)); // A
assert!(!custom.is_input_position(3)); // -
assert!(custom.is_input_position(4)); // #
assert!(!custom.is_input_position(8)); // Y
}
}

View File

@@ -0,0 +1,7 @@
pub mod character_limits;
pub mod display_mask;
pub mod pattern_rules;
pub use character_limits::{count_text, CharacterLimits, CountMode, LimitCheckResult};
pub use display_mask::{DisplayMask, MaskDisplayMode};
pub use pattern_rules::{CharacterFilter, PatternFilters, PositionFilter, PositionRange};

View File

@@ -0,0 +1,330 @@
// src/validation/patterns.rs
//! Position-based pattern filtering for validation
use serde::{Deserialize, Serialize};
use std::sync::Arc;
/// A filter that applies to specific character positions in a field
#[derive(Debug, Clone)]
pub struct PositionFilter {
/// Which positions this filter applies to
pub positions: PositionRange,
/// What type of character filter to apply
pub filter: CharacterFilter,
}
/// Defines which character positions a filter applies to
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum PositionRange {
/// Single position (e.g., position 3 only)
Single(usize),
/// Range of positions (e.g., positions 0-2, inclusive)
Range(usize, usize),
/// From position onwards (e.g., position 4 and beyond)
From(usize),
/// Multiple specific positions (e.g., positions 0, 2, 5)
Multiple(Vec<usize>),
}
/// Types of character filters that can be applied
pub enum CharacterFilter {
/// Allow only alphabetic characters (a-z, A-Z)
Alphabetic,
/// Allow only numeric characters (0-9)
Numeric,
/// Allow alphanumeric characters (a-z, A-Z, 0-9)
Alphanumeric,
/// Allow only exact character match
Exact(char),
/// Allow any character from the provided set
OneOf(Vec<char>),
/// Custom user-defined filter function
Custom(Arc<dyn Fn(char) -> bool + Send + Sync>),
}
// Manual implementations for Debug and Clone
impl std::fmt::Debug for CharacterFilter {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
CharacterFilter::Alphabetic => write!(f, "Alphabetic"),
CharacterFilter::Numeric => write!(f, "Numeric"),
CharacterFilter::Alphanumeric => write!(f, "Alphanumeric"),
CharacterFilter::Exact(ch) => write!(f, "Exact('{ch}')"),
CharacterFilter::OneOf(chars) => write!(f, "OneOf({chars:?})"),
CharacterFilter::Custom(_) => write!(f, "Custom(<function>)"),
}
}
}
impl Clone for CharacterFilter {
fn clone(&self) -> Self {
match self {
CharacterFilter::Alphabetic => CharacterFilter::Alphabetic,
CharacterFilter::Numeric => CharacterFilter::Numeric,
CharacterFilter::Alphanumeric => CharacterFilter::Alphanumeric,
CharacterFilter::Exact(ch) => CharacterFilter::Exact(*ch),
CharacterFilter::OneOf(chars) => CharacterFilter::OneOf(chars.clone()),
CharacterFilter::Custom(func) => CharacterFilter::Custom(Arc::clone(func)),
}
}
}
impl PositionRange {
/// Check if a position is included in this range
pub fn contains(&self, position: usize) -> bool {
match self {
PositionRange::Single(pos) => position == *pos,
PositionRange::Range(start, end) => position >= *start && position <= *end,
PositionRange::From(start) => position >= *start,
PositionRange::Multiple(positions) => positions.contains(&position),
}
}
/// Get all positions up to a given length that this range covers
pub fn positions_up_to(&self, max_length: usize) -> Vec<usize> {
match self {
PositionRange::Single(pos) => {
if *pos < max_length {
vec![*pos]
} else {
vec![]
}
}
PositionRange::Range(start, end) => {
let actual_end = (*end).min(max_length.saturating_sub(1));
if *start <= actual_end {
(*start..=actual_end).collect()
} else {
vec![]
}
}
PositionRange::From(start) => {
if *start < max_length {
(*start..max_length).collect()
} else {
vec![]
}
}
PositionRange::Multiple(positions) => positions
.iter()
.filter(|&&pos| pos < max_length)
.copied()
.collect(),
}
}
}
impl CharacterFilter {
/// Test if a character passes this filter
pub fn accepts(&self, ch: char) -> bool {
match self {
CharacterFilter::Alphabetic => ch.is_alphabetic(),
CharacterFilter::Numeric => ch.is_numeric(),
CharacterFilter::Alphanumeric => ch.is_alphanumeric(),
CharacterFilter::Exact(expected) => ch == *expected,
CharacterFilter::OneOf(chars) => chars.contains(&ch),
CharacterFilter::Custom(func) => func(ch),
}
}
/// Get a human-readable description of this filter
pub fn description(&self) -> String {
match self {
CharacterFilter::Alphabetic => "alphabetic characters (a-z, A-Z)".to_string(),
CharacterFilter::Numeric => "numeric characters (0-9)".to_string(),
CharacterFilter::Alphanumeric => "alphanumeric characters (a-z, A-Z, 0-9)".to_string(),
CharacterFilter::Exact(ch) => format!("exactly '{ch}'"),
CharacterFilter::OneOf(chars) => {
let char_list: String = chars.iter().collect();
format!("one of: {char_list}")
}
CharacterFilter::Custom(_) => "custom filter".to_string(),
}
}
}
impl PositionFilter {
/// Create a new position filter
pub fn new(positions: PositionRange, filter: CharacterFilter) -> Self {
Self { positions, filter }
}
/// Validate a character at a specific position
pub fn validate_position(&self, position: usize, character: char) -> bool {
if self.positions.contains(position) {
self.filter.accepts(character)
} else {
true // Position not covered by this filter, allow any character
}
}
/// Get error message for invalid character at position
pub fn error_message(&self, position: usize, character: char) -> Option<String> {
if self.positions.contains(position) && !self.filter.accepts(character) {
Some(format!(
"Position {} requires {} but got '{}'",
position,
self.filter.description(),
character
))
} else {
None
}
}
}
/// A collection of position filters for a field
#[derive(Debug, Clone, Default)]
pub struct PatternFilters {
filters: Vec<PositionFilter>,
}
impl PatternFilters {
/// Create empty pattern filters
pub fn new() -> Self {
Self::default()
}
/// Add a position filter
pub fn add_filter(mut self, filter: PositionFilter) -> Self {
self.filters.push(filter);
self
}
/// Add multiple filters
pub fn add_filters(mut self, filters: Vec<PositionFilter>) -> Self {
self.filters.extend(filters);
self
}
/// Validate a character at a specific position against all applicable filters
pub fn validate_char_at_position(
&self,
position: usize,
character: char,
) -> Result<(), String> {
for filter in &self.filters {
if let Some(error) = filter.error_message(position, character) {
return Err(error);
}
}
Ok(())
}
/// Validate entire text against all filters
pub fn validate_text(&self, text: &str) -> Result<(), String> {
for (position, character) in text.char_indices() {
self.validate_char_at_position(position, character)?
}
Ok(())
}
/// Check if any filters are configured
pub fn has_filters(&self) -> bool {
!self.filters.is_empty()
}
/// Get all configured filters
pub fn filters(&self) -> &[PositionFilter] {
&self.filters
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_position_range_contains() {
assert!(PositionRange::Single(3).contains(3));
assert!(!PositionRange::Single(3).contains(2));
assert!(PositionRange::Range(1, 4).contains(3));
assert!(!PositionRange::Range(1, 4).contains(5));
assert!(PositionRange::From(2).contains(5));
assert!(!PositionRange::From(2).contains(1));
assert!(PositionRange::Multiple(vec![0, 2, 5]).contains(2));
assert!(!PositionRange::Multiple(vec![0, 2, 5]).contains(3));
}
#[test]
fn test_position_range_positions_up_to() {
assert_eq!(PositionRange::Single(3).positions_up_to(5), vec![3]);
assert_eq!(PositionRange::Single(5).positions_up_to(3), vec![]);
assert_eq!(PositionRange::Range(1, 3).positions_up_to(5), vec![1, 2, 3]);
assert_eq!(PositionRange::Range(1, 5).positions_up_to(3), vec![1, 2]);
assert_eq!(PositionRange::From(2).positions_up_to(5), vec![2, 3, 4]);
assert_eq!(
PositionRange::Multiple(vec![0, 2, 5]).positions_up_to(4),
vec![0, 2]
);
}
#[test]
fn test_character_filter_accepts() {
assert!(CharacterFilter::Alphabetic.accepts('a'));
assert!(CharacterFilter::Alphabetic.accepts('Z'));
assert!(!CharacterFilter::Alphabetic.accepts('1'));
assert!(CharacterFilter::Numeric.accepts('5'));
assert!(!CharacterFilter::Numeric.accepts('a'));
assert!(CharacterFilter::Alphanumeric.accepts('a'));
assert!(CharacterFilter::Alphanumeric.accepts('5'));
assert!(!CharacterFilter::Alphanumeric.accepts('-'));
assert!(CharacterFilter::Exact('x').accepts('x'));
assert!(!CharacterFilter::Exact('x').accepts('y'));
assert!(CharacterFilter::OneOf(vec!['a', 'b', 'c']).accepts('b'));
assert!(!CharacterFilter::OneOf(vec!['a', 'b', 'c']).accepts('d'));
}
#[test]
fn test_position_filter_validation() {
let filter = PositionFilter::new(PositionRange::Range(0, 1), CharacterFilter::Alphabetic);
assert!(filter.validate_position(0, 'A'));
assert!(filter.validate_position(1, 'b'));
assert!(!filter.validate_position(0, '1'));
assert!(filter.validate_position(2, '1')); // Position 2 not covered, allow anything
}
#[test]
fn test_pattern_filters_validation() {
let patterns = PatternFilters::new()
.add_filter(PositionFilter::new(
PositionRange::Range(0, 1),
CharacterFilter::Alphabetic,
))
.add_filter(PositionFilter::new(
PositionRange::Range(2, 4),
CharacterFilter::Numeric,
));
// Valid pattern: AB123
assert!(patterns.validate_text("AB123").is_ok());
// Invalid: number in alphabetic position
assert!(patterns.validate_text("A1123").is_err());
// Invalid: letter in numeric position
assert!(patterns.validate_text("AB1A3").is_err());
}
#[test]
fn test_custom_filter() {
let pattern = PatternFilters::new().add_filter(PositionFilter::new(
PositionRange::From(0),
CharacterFilter::Custom(Arc::new(|c| c.is_lowercase())),
));
assert!(pattern.validate_text("hello").is_ok());
assert!(pattern.validate_text("Hello").is_err()); // Uppercase not allowed
}
}

118
validation-core/src/set.rs Normal file
View File

@@ -0,0 +1,118 @@
use crate::{ValidationConfig, ValidationMergeError, ValidationSettings};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidationRule {
pub name: String,
pub description: Option<String>,
pub settings: ValidationSettings,
}
impl ValidationRule {
pub fn resolve(&self) -> ValidationConfig {
self.settings.resolve()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidationSet {
pub name: String,
pub description: Option<String>,
pub rules: Vec<ValidationRule>,
}
impl ValidationSet {
pub fn resolve_settings(&self) -> Result<ValidationSettings, ValidationMergeError> {
ValidationSettings::merge_rules(self.rules.iter().map(|rule| &rule.settings))
}
pub fn resolve(&self) -> Result<ValidationConfig, ValidationMergeError> {
Ok(self.resolve_settings()?.resolve())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AppliedValidation {
pub set_name: Option<String>,
pub settings: ValidationSettings,
}
impl AppliedValidation {
pub fn resolve(&self) -> ValidationConfig {
self.settings.resolve()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
CharacterFilterSettings, CharacterLimits, PatternSettings, PositionFilterSettings,
PositionRange,
};
#[test]
fn validation_set_merges_rule_fragments() {
let set = ValidationSet {
name: "phone".to_string(),
description: None,
rules: vec![
ValidationRule {
name: "phone-length".to_string(),
description: None,
settings: ValidationSettings {
character_limits: Some(CharacterLimits::new_range(10, 15)),
..ValidationSettings::default()
},
},
ValidationRule {
name: "digits-only".to_string(),
description: None,
settings: ValidationSettings {
pattern: Some(PatternSettings {
filters: vec![PositionFilterSettings {
positions: PositionRange::From(0),
filter: CharacterFilterSettings::Numeric,
}],
description: None,
}),
..ValidationSettings::default()
},
},
],
};
let settings = set.resolve_settings().expect("set should resolve");
assert!(settings.character_limits.is_some());
assert_eq!(settings.pattern.expect("pattern").filters.len(), 1);
}
#[test]
fn validation_set_rejects_duplicate_singleton_rules() {
let set = ValidationSet {
name: "conflict".to_string(),
description: None,
rules: vec![
ValidationRule {
name: "short".to_string(),
description: None,
settings: ValidationSettings {
character_limits: Some(CharacterLimits::new(10)),
..ValidationSettings::default()
},
},
ValidationRule {
name: "long".to_string(),
description: None,
settings: ValidationSettings {
character_limits: Some(CharacterLimits::new(20)),
..ValidationSettings::default()
},
},
],
};
assert!(set.resolve_settings().is_err());
}
}