diff --git a/typify-impl/src/convert.rs b/typify-impl/src/convert.rs index f97b31b9..b435a5d7 100644 --- a/typify-impl/src/convert.rs +++ b/typify-impl/src/convert.rs @@ -1,4 +1,4 @@ -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company use std::collections::BTreeSet; @@ -801,41 +801,6 @@ impl TypeSpace { validation: Option<&StringValidation>, ) -> Result<(TypeEntry, &'a Option>)> { match format.as_ref().map(String::as_str) { - None => match validation { - // It should not be possible for the StringValidation to be - // Some, but all its fields to be None, but... just to be sure. - None - | Some(schemars::schema::StringValidation { - max_length: None, - min_length: None, - pattern: None, - }) => Ok((TypeEntryDetails::String.into(), metadata)), - - Some(validation) => { - if let Some(pattern) = &validation.pattern { - let _ = regress::Regex::new(pattern).map_err(|e| Error::InvalidSchema { - type_name: type_name.clone().into_option(), - reason: format!("invalid pattern '{}' {}", pattern, e), - })?; - self.uses_regress = true; - } - - let string = TypeEntryDetails::String.into(); - let type_id = self.assign_type(string); - Ok(( - TypeEntryNewtype::from_metadata_with_string_validation( - self, - type_name, - metadata, - type_id, - validation, - original_schema.clone(), - ), - metadata, - )) - } - }, - Some("uuid") => { self.uses_uuid = true; Ok(( @@ -890,9 +855,49 @@ impl TypeSpace { metadata, )), - Some(unhandled) => { - info!("treating a string format '{}' as a String", unhandled); - Ok((TypeEntryDetails::String.into(), metadata)) + // Apply constaints when there is no format or the format isn't + // one of the recognized values above. + other => { + if let Some(unhandled) = other { + debug!("treating a string format '{}' as a String", unhandled); + } + + match validation { + // It should not be possible for the StringValidation to be + // Some, but all its fields to be None, but... just to be + // sure. + None + | Some(schemars::schema::StringValidation { + max_length: None, + min_length: None, + pattern: None, + }) => Ok((TypeEntryDetails::String.into(), metadata)), + + Some(validation) => { + if let Some(pattern) = &validation.pattern { + let _ = + regress::Regex::new(pattern).map_err(|e| Error::InvalidSchema { + type_name: type_name.clone().into_option(), + reason: format!("invalid pattern '{}' {}", pattern, e), + })?; + self.uses_regress = true; + } + + let string = TypeEntryDetails::String.into(); + let type_id = self.assign_type(string); + Ok(( + TypeEntryNewtype::from_metadata_with_string_validation( + self, + type_name, + metadata, + type_id, + validation, + original_schema.clone(), + ), + metadata, + )) + } + } } } } diff --git a/typify-impl/src/merge.rs b/typify-impl/src/merge.rs index 5f1319e9..d127dc1c 100644 --- a/typify-impl/src/merge.rs +++ b/typify-impl/src/merge.rs @@ -1,4 +1,4 @@ -// Copyright 2024 Oxide Computer Company +// Copyright 2026 Oxide Computer Company use std::{ collections::{BTreeMap, BTreeSet}, @@ -783,7 +783,12 @@ fn merge_so_string( let pattern = match (&a.pattern, &b.pattern) { (None, v) | (v, None) => v.clone(), (Some(x), Some(y)) if x == y => Some(x.clone()), - _ => unimplemented!("merging distinct patterns is impractical"), + // Combine distinct patterns using lookaheads so the merged + // string must satisfy all constraints. If x is already a + // sequence of lookaheads (produced by a prior merge), append + // rather than re-wrap nested lookaheads. + (Some(x), Some(y)) if x.starts_with("(?=") => Some(format!("{x}(?={y})")), + (Some(x), Some(y)) => Some(format!("(?={x})(?={y})")), }; if let (Some(min), Some(max)) = (min_length, max_length) { @@ -1885,4 +1890,34 @@ mod tests { serde_json::to_string_pretty(&merged).unwrap(), ) } + + #[test] + fn test_merge_multiple_patterns() { + // Multiple schemas with distinct string patterns that must all be + // satisfied. The merged pattern should be a flat sequence of + // lookaheads: (?=p1)(?=p2)(?=p3). + let schemas: Vec = [ + json!({"type": "string", "pattern": "^[a-z]+$"}), + json!({"type": "string", "pattern": "^.+[0-9].+$"}), + json!({"type": "string", "pattern": ".+[A-Z]$"}), + ] + .into_iter() + .map(|v| serde_json::from_value(v).unwrap()) + .collect(); + + let merged = super::merge_all(&schemas, &BTreeMap::default()); + + let expected: schemars::schema::Schema = serde_json::from_value(json!({ + "type": "string", + "pattern": "(?=^[a-z]+$)(?=^.+[0-9].+$)(?=.+[A-Z]$)" + })) + .unwrap(); + + assert_eq!( + merged, + expected, + "{}", + serde_json::to_string_pretty(&merged).unwrap(), + ); + } } diff --git a/typify-test/Cargo.toml b/typify-test/Cargo.toml index 8e134288..e9e60c6c 100644 --- a/typify-test/Cargo.toml +++ b/typify-test/Cargo.toml @@ -15,4 +15,5 @@ ipnetwork = { workspace = true } prettyplease = { workspace = true } schemars = { workspace = true } serde = { workspace = true } +serde_json = { workspace = true } syn = { workspace = true } diff --git a/typify-test/build.rs b/typify-test/build.rs index f5c77373..9288a29a 100644 --- a/typify-test/build.rs +++ b/typify-test/build.rs @@ -1,4 +1,4 @@ -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company use std::collections::{HashMap, HashSet}; use std::{env, fs, path::Path}; @@ -125,6 +125,49 @@ struct UnknownFormat { pancakes: Pancakes, } +struct TriplePattern; +impl JsonSchema for TriplePattern { + fn schema_name() -> String { + "TriplePattern".to_string() + } + + fn json_schema(_: &mut schemars::gen::SchemaGenerator) -> Schema { + schemars::schema::SchemaObject { + subschemas: Some(Box::new(schemars::schema::SubschemaValidation { + all_of: Some(vec![ + schemars::schema::SchemaObject { + string: Some(Box::new(schemars::schema::StringValidation { + pattern: Some("^[a-z].+$".to_string()), + ..Default::default() + })), + ..Default::default() + } + .into(), + schemars::schema::SchemaObject { + string: Some(Box::new(schemars::schema::StringValidation { + pattern: Some("^.{4,8}$".to_string()), + ..Default::default() + })), + ..Default::default() + } + .into(), + schemars::schema::SchemaObject { + string: Some(Box::new(schemars::schema::StringValidation { + pattern: Some(".+[a-z]$".to_string()), + ..Default::default() + })), + ..Default::default() + } + .into(), + ]), + ..Default::default() + })), + ..Default::default() + } + .into() + } +} + fn main() { let mut type_space = TypeSpace::default(); @@ -133,6 +176,7 @@ fn main() { NonAsciiChars::add(&mut type_space); UnknownFormat::add(&mut type_space); ipnetwork::IpNetwork::add(&mut type_space); + TriplePattern::add(&mut type_space); let contents = prettyplease::unparse(&syn::parse2::(type_space.to_stream()).unwrap()); diff --git a/typify-test/src/main.rs b/typify-test/src/main.rs index 3ae1bc88..722096ae 100644 --- a/typify-test/src/main.rs +++ b/typify-test/src/main.rs @@ -1,4 +1,4 @@ -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company // Include the generated code to make sure it compiles. include!(concat!(env!("OUT_DIR"), "/codegen.rs")); @@ -56,6 +56,28 @@ fn test_unknown_format() { }; } +#[test] +fn test_triple_pattern() { + // Must satisfy all three patterns simultaneously: + // 1. ^[a-z].+$ — starts with lowercase + // 2. ^.{4,8}$ — 4–8 characters long + // 3. .+[a-z]$ — ends with lowercase + + // Valid: 4 lowercase letters + assert!(TriplePattern::try_from("abcd").is_ok()); + // Valid: 6 lowercase letters + assert!(TriplePattern::try_from("abcdef").is_ok()); + + // Fails: starts with uppercase + assert!(TriplePattern::try_from("Abcd").is_err()); + // Fails: ends with uppercase + assert!(TriplePattern::try_from("abcD").is_err()); + // Fails: too short + assert!(TriplePattern::try_from("abc").is_err()); + // Fails: too long + assert!(TriplePattern::try_from("abcdefghijkl").is_err()); +} + mod hashmap { #![allow(dead_code)] diff --git a/typify/tests/schemas/merged-schemas.json b/typify/tests/schemas/merged-schemas.json index e4b49b97..cffd2565 100644 --- a/typify/tests/schemas/merged-schemas.json +++ b/typify/tests/schemas/merged-schemas.json @@ -552,6 +552,23 @@ } } ] + }, + "TriplePattern": { + "allOf": [ + { + "type": "string", + "pattern": "^[a-z].+$", + "format": "custom-id" + }, + { + "type": "string", + "pattern": "^.{4,8}$" + }, + { + "type": "string", + "pattern": ".+[a-z]$" + } + ] } } } diff --git a/typify/tests/schemas/merged-schemas.rs b/typify/tests/schemas/merged-schemas.rs index 33e2b3ea..7f317516 100644 --- a/typify/tests/schemas/merged-schemas.rs +++ b/typify/tests/schemas/merged-schemas.rs @@ -918,6 +918,91 @@ impl TrimFat { Default::default() } } +#[doc = "`TriplePattern`"] +#[doc = r""] +#[doc = r"
JSON schema"] +#[doc = r""] +#[doc = r" ```json"] +#[doc = "{"] +#[doc = " \"allOf\": ["] +#[doc = " {"] +#[doc = " \"type\": \"string\","] +#[doc = " \"format\": \"custom-id\","] +#[doc = " \"pattern\": \"^[a-z].+$\""] +#[doc = " },"] +#[doc = " {"] +#[doc = " \"type\": \"string\","] +#[doc = " \"pattern\": \"^.{4,8}$\""] +#[doc = " },"] +#[doc = " {"] +#[doc = " \"type\": \"string\","] +#[doc = " \"pattern\": \".+[a-z]$\""] +#[doc = " }"] +#[doc = " ]"] +#[doc = "}"] +#[doc = r" ```"] +#[doc = r"
"] +#[derive(:: serde :: Serialize, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[serde(transparent)] +pub struct TriplePattern(::std::string::String); +impl ::std::ops::Deref for TriplePattern { + type Target = ::std::string::String; + fn deref(&self) -> &::std::string::String { + &self.0 + } +} +impl ::std::convert::From for ::std::string::String { + fn from(value: TriplePattern) -> Self { + value.0 + } +} +impl ::std::str::FromStr for TriplePattern { + type Err = self::error::ConversionError; + fn from_str(value: &str) -> ::std::result::Result { + static PATTERN: ::std::sync::LazyLock<::regress::Regex> = + ::std::sync::LazyLock::new(|| { + ::regress::Regex::new("(?=^[a-z].+$)(?=^.{4,8}$)(?=.+[a-z]$)").unwrap() + }); + if PATTERN.find(value).is_none() { + return Err("doesn't match pattern \"(?=^[a-z].+$)(?=^.{4,8}$)(?=.+[a-z]$)\"".into()); + } + Ok(Self(value.to_string())) + } +} +impl ::std::convert::TryFrom<&str> for TriplePattern { + type Error = self::error::ConversionError; + fn try_from(value: &str) -> ::std::result::Result { + value.parse() + } +} +impl ::std::convert::TryFrom<&::std::string::String> for TriplePattern { + type Error = self::error::ConversionError; + fn try_from( + value: &::std::string::String, + ) -> ::std::result::Result { + value.parse() + } +} +impl ::std::convert::TryFrom<::std::string::String> for TriplePattern { + type Error = self::error::ConversionError; + fn try_from( + value: ::std::string::String, + ) -> ::std::result::Result { + value.parse() + } +} +impl<'de> ::serde::Deserialize<'de> for TriplePattern { + fn deserialize(deserializer: D) -> ::std::result::Result + where + D: ::serde::Deserializer<'de>, + { + ::std::string::String::deserialize(deserializer)? + .parse() + .map_err(|e: self::error::ConversionError| { + ::custom(e.to_string()) + }) + } +} #[doc = "`UnchangedByMerge`"] #[doc = r""] #[doc = r"
JSON schema"]