diff --git a/benchmarks/java/src/main/java/org/apache/fory/benchmark/SchemaEvolutionSuite.java b/benchmarks/java/src/main/java/org/apache/fory/benchmark/SchemaEvolutionSuite.java new file mode 100644 index 0000000000..6b24d77344 --- /dev/null +++ b/benchmarks/java/src/main/java/org/apache/fory/benchmark/SchemaEvolutionSuite.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.benchmark; + +import java.util.Arrays; +import org.apache.fory.format.annotation.ForyVersion; +import org.apache.fory.format.encoder.Encoders; +import org.apache.fory.format.encoder.RowEncoder; +import org.apache.fory.logging.Logger; +import org.apache.fory.logging.LoggerFactory; +import org.openjdk.jmh.Main; +import org.openjdk.jmh.annotations.Benchmark; + +/** + * Row-codec schema-evolution throughput and allocation. Pair with the JMH gc profiler ({@code -prof + * gc}) to read {@code gc.alloc.rate.norm} (bytes per op). Two comparisons matter: {@code + * currentDecode} vs {@code olderDecode} shows that decoding an older payload through a projection + * codec allocates no more than decoding the current schema, because each projection holds its + * historical schema's row layout (no per-decode rebuild); and the {@code *NoEvolution} benchmarks + * vs their evolution-on counterparts show the steady-state cost of enabling {@code + * withSchemaEvolution()} when reading and writing current-version data. + */ +public class SchemaEvolutionSuite { + private static final Logger LOG = LoggerFactory.getLogger(SchemaEvolutionSuite.class); + + public static class PersonV1 { + String name; + int age; + } + + public static class PersonV2 { + String name; + int age; + + @ForyVersion(since = 2) + String email; + } + + // Evolution-enabled codecs for the current (V2) schema; the V1 codec only produces a payload + // whose hash routes the V2 reader onto its projection path. Both standard and compact formats + // are measured: compact is where a per-projection cached row layout matters, so olderDecode vs + // currentDecode there is the parity check. + private static final RowEncoder v1Codec = + Encoders.buildBeanCodec(PersonV1.class).withSchemaEvolution().build().get(); + private static final RowEncoder v2Codec = + Encoders.buildBeanCodec(PersonV2.class).withSchemaEvolution().build().get(); + private static final RowEncoder v1CompactCodec = + Encoders.buildBeanCodec(PersonV1.class).compactEncoding().withSchemaEvolution().build().get(); + private static final RowEncoder v2CompactCodec = + Encoders.buildBeanCodec(PersonV2.class).compactEncoding().withSchemaEvolution().build().get(); + + // Evolution-disabled codecs for the same current (V2) schema. Comparing the *NoEvolution + // benchmarks against their evolution-on counterparts isolates the steady-state cost of the + // withSchemaEvolution() flag on the common path (reading and writing current-version data): the + // 8-byte hash slot the evolution wire format adds, plus the hash compare on decode. + private static final RowEncoder v2PlainCodec = + Encoders.buildBeanCodec(PersonV2.class).build().get(); + private static final RowEncoder v2PlainCompactCodec = + Encoders.buildBeanCodec(PersonV2.class).compactEncoding().build().get(); + + private static final PersonV2 person = newPerson(); + private static final byte[] currentBytes = v2Codec.encode(person); + private static final byte[] olderBytes = v1Codec.encode(newPersonV1()); + private static final byte[] currentCompactBytes = v2CompactCodec.encode(person); + private static final byte[] olderCompactBytes = v1CompactCodec.encode(newPersonV1()); + private static final byte[] plainBytes = v2PlainCodec.encode(person); + private static final byte[] plainCompactBytes = v2PlainCompactCodec.encode(person); + + private static PersonV2 newPerson() { + PersonV2 p = new PersonV2(); + p.name = "Ada Lovelace"; + p.age = 36; + p.email = "ada@example.com"; + return p; + } + + private static PersonV1 newPersonV1() { + PersonV1 p = new PersonV1(); + p.name = "Ada Lovelace"; + p.age = 36; + return p; + } + + @Benchmark + public Object encode() { + return v2Codec.encode(person); + } + + @Benchmark + public Object currentDecode() { + return v2Codec.decode(currentBytes); + } + + @Benchmark + public Object olderDecode() { + return v2Codec.decode(olderBytes); + } + + @Benchmark + public Object compactEncode() { + return v2CompactCodec.encode(person); + } + + @Benchmark + public Object compactCurrentDecode() { + return v2CompactCodec.decode(currentCompactBytes); + } + + @Benchmark + public Object compactOlderDecode() { + return v2CompactCodec.decode(olderCompactBytes); + } + + // Evolution-off baselines for the current path. Pair each with its evolution-on counterpart + // (encode/currentDecode and the compact variants) to read the flag's overhead. + @Benchmark + public Object encodeNoEvolution() { + return v2PlainCodec.encode(person); + } + + @Benchmark + public Object currentDecodeNoEvolution() { + return v2PlainCodec.decode(plainBytes); + } + + @Benchmark + public Object compactEncodeNoEvolution() { + return v2PlainCompactCodec.encode(person); + } + + @Benchmark + public Object compactCurrentDecodeNoEvolution() { + return v2PlainCompactCodec.decode(plainCompactBytes); + } + + public static void main(String[] args) throws Exception { + if (args.length == 0) { + String commandLine = + "org.apache.fory.*SchemaEvolutionSuite.* -f 3 -wi 3 -i 3 -t 1 -w 2s -r 2s -prof gc -rf csv"; + args = commandLine.split(" "); + } + LOG.info("command line: {}", Arrays.toString(args)); + Main.main(args); + } +} diff --git a/docs/guide/java/row-format.md b/docs/guide/java/row-format.md index 477f9ec136..75b5bc533e 100644 --- a/docs/guide/java/row-format.md +++ b/docs/guide/java/row-format.md @@ -187,6 +187,116 @@ std::string str = bar10->get_string(0); | Memory usage | Full object graph in memory | Only accessed fields | | Suitable for | Small objects, full access | Large objects, selective access | +## Schema evolution + +Enable `.withSchemaEvolution()` on a row, array, or map codec builder to read payloads written +by older versions of the same bean. Writing always uses the current version; reading detects +the payload's version from a strict hash at the head of the payload. Java only. + +Annotate fields added after v1 with `@ForyVersion(since = N)`: + +```java +@Data +public class Person { + String name; + int age; + + @ForyVersion(since = 2) + String email; +} +``` + +A v1 payload (with `name` and `age` only) decodes to a `Person` whose `email` is `null`. +Primitive fields added later default to `0`, `0.0`, or `false`. Unannotated fields are treated +as present from the first version, so a class can adopt versioning by annotating only the fields +added after v1. + +For a record, the absent component's default is passed to the canonical constructor, so a +constructor that rejects `null` for a reference component added in a later version throws when +decoding an older payload. Let the constructor tolerate the missing value, for example by +normalizing `null` to a default: + +```java +public record Person(String name, @ForyVersion(since = 2) String email) { + public Person { + if (email == null) { + email = ""; + } + } +} +``` + +Remove a field by deleting the Java member and declaring it on a nested history interface as a +method with a `@ForyVersion(until = N)`. The method's return type carries any parameterized +type information from the original field. + +```java +@Data +@ForySchema(removedFields = Person.History.class) +public class Person { + String name; + + @ForyVersion(since = 2) + String email; + + interface History { + @ForyVersion(until = 3) + int age(); + + @ForyVersion(until = 5) + List tags(); + } +} +``` + +The history method name matches the original live descriptor name. For field-backed beans +(Lombok `@Data`, records, or plain classes with a backing field) that is the field name +(`age`, `tags`). For interface beans, where the live member is a getter with no backing field, +it is the method name (`getAge`). + +### Wire format and limitations + +Producers and consumers must agree on the `withSchemaEvolution()` flag — they are not +wire-compatible otherwise. Row payloads always carry an 8-byte hash slot; under evolution its +value is the strict hash (which includes field name and nullability), so a flag-mismatched +peer fails loudly with `ClassNotCompatibleException`. Arrays and maps of bean elements prepend +an 8-byte strict-hash prefix under evolution and no prefix otherwise; an evolution-on consumer +reading evolution-off bytes also fails with `ClassNotCompatibleException`, but the reverse +direction (evolution-off consumer, evolution-on bytes) is undefined. + +To adopt the flag on an existing deployment, enable `withSchemaEvolution()` on both sides in a +release that changes no schema, then start evolving schemas only once every peer is on the +evolution-enabled build. Turning the flag on and changing a schema in the same release strands +any peer that has not yet upgraded. + +Cross-language consumers (Python, C++) cannot read evolution-enabled payloads. + +A reader selects the matching layout from the 8-byte strict hash on the payload. The hash includes +field names and nullability and is checked for collisions across a bean's own versions when the +codec is built, but it is still a 64-bit value: a payload whose hash coincides with one of the +reader's historical layouts is decoded against that layout. This is the same hash-based dispatch +the row format has always used, so feeding a codec bytes it was not built for has undefined results +whether or not evolution is enabled. Only hand a codec payloads produced for the same bean. + +Nested evolution works to arbitrary depth and places no restriction on shape: a versioned bean +may contain versioned beans that themselves contain versioned beans, the same versioned bean +class may back more than one field, and fields typed as a non-evolving bean, a list, or a map are +unrestricted. Each nesting level is routed to the correct historical layout. A versioned bean may +be used as a map key as well as a map value, and the key and value evolve independently. This +holds wherever the map appears: as the codec's top-level type, nested inside a bean field, or +reached through a top-level array or map (such as `List>`), and a single +map may evolve more than one distinct bean class across its key and value. A top-level map carries +its own hash identifying both layouts together; a map nested inside an array, another map, or a +bean field has its layouts folded into the enclosing payload's hash. + +When a versioned bean contains other versioned beans, the reader can read one projection layout per +combination of versions across the composition. A reader compiles a combination's codec the first +time it decodes a payload at that combination, so the cost tracks the historical versions you +actually receive, not the number you could in principle define. A map whose key and value both +evolve combines their versions the same way. Retiring an entry from a bean's `History` interface +once you no longer read payloads from that range stops the reader from accepting those payloads; it +is purely a read-side decision, and the writer always uses the current schema. + ## Related Topics - [Xlang Serialization](xlang-serialization.md) - xlang mode diff --git a/docs/specification/row_format_spec.md b/docs/specification/row_format_spec.md index de1e8b50da..139bd08fd2 100644 --- a/docs/specification/row_format_spec.md +++ b/docs/specification/row_format_spec.md @@ -343,6 +343,16 @@ if (fixed_width % 8 == 0): --- +## Schema Evolution (Java Only) + +Schema evolution lets a codec read payloads written by older versions of the same bean. It is implemented in Java only and does not change the cross-language wire contract above; producer and consumer must agree on whether it is enabled. + +The Java encoder frames a row payload with a leading 8-byte schema-hash word. When evolution is enabled, that word holds a stricter hash that also distinguishes field names and nullability; otherwise it holds the format's default schema hash. Array and map payloads carry no hash word otherwise, so under evolution they gain an 8-byte strict-hash prefix. A map's prefix is a single hash that identifies the key and value layouts together, so a map key and value evolve independently while the payload still carries one hash. + +See the [Java row format guide](../guide/java/row-format.md#schema-evolution) for usage, annotations, and limitations. + +--- + ## Common Specifications The following specifications apply to both standard and compact formats. diff --git a/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java b/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java index 91405ce194..740e85a560 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java @@ -966,7 +966,11 @@ private static boolean isSupported(TypeRef typeRef, TypeResolutionContext ctx return false; } Tuple2, TypeRef> mapKeyValueType = getMapKeyValueType(typeRef); - return isSupported(mapKeyValueType.f0) && isSupported(mapKeyValueType.f1); + // Thread ctx through both key and value, matching the iterable branch above. The single-arg + // isSupported overload builds a fresh context with synthesizeInterfaces=false and the empty + // custom-type registry, which would reject an interface bean used as a map key or value even + // though the same type is supported as a direct field or list element. + return isSupported(mapKeyValueType.f0, ctx) && isSupported(mapKeyValueType.f1, ctx); } else if (cls.isEnum()) { return true; } else { diff --git a/java/fory-format/pom.xml b/java/fory-format/pom.xml index 34886797d7..ae410d51c9 100644 --- a/java/fory-format/pom.xml +++ b/java/fory-format/pom.xml @@ -99,6 +99,22 @@ + + + org.apache.maven.plugins + maven-compiler-plugin + + 11 + + + + org.apache.maven.plugins maven-jar-plugin diff --git a/java/fory-format/src/main/java/org/apache/fory/format/annotation/ForySchema.java b/java/fory-format/src/main/java/org/apache/fory/format/annotation/ForySchema.java new file mode 100644 index 0000000000..b7d4bb856f --- /dev/null +++ b/java/fory-format/src/main/java/org/apache/fory/format/annotation/ForySchema.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.format.annotation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Class-level row-codec schema metadata used when the codec builder enables schema evolution. + * + *

Live fields without a {@link ForyVersion} annotation are treated as present from the first + * version, so a class can adopt versioning by annotating only the fields added later. + * + *

{@link #removedFields()} points at a class (conventionally a nested {@code interface}) whose + * accessor methods describe fields that have been removed from this bean but still appear on the + * wire in older payloads. Each method's return type is the original Java type of the removed field; + * each method must carry a {@link ForyVersion} annotation with {@code until} set, since removed + * fields have a known end-of-life version. + * + *

Example: + * + *

+ * @Data
+ * @ForySchema(removedFields = MyBean.History.class)
+ * public class MyBean {
+ *   private String name;
+ *
+ *   interface History {
+ *     @ForyVersion(until = 3)
+ *     List<String> tags();
+ *
+ *     @ForyVersion(since = 2, until = 5)
+ *     Map<String, Long> counters();
+ *   }
+ * }
+ * 
+ */ +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface ForySchema { + /** + * A class whose accessor methods describe historically-present-but-now-removed fields. Default + * {@code void.class} means there are no removed fields. The class is never instantiated; the + * codec reads its method signatures and annotations. + */ + Class removedFields() default void.class; +} diff --git a/java/fory-format/src/main/java/org/apache/fory/format/annotation/ForyVersion.java b/java/fory-format/src/main/java/org/apache/fory/format/annotation/ForyVersion.java new file mode 100644 index 0000000000..3143eab87e --- /dev/null +++ b/java/fory-format/src/main/java/org/apache/fory/format/annotation/ForyVersion.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.format.annotation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Declares the version window in which a row-codec field is logically present. The window is + * inclusive on the left and exclusive on the right, so {@code since=2, until=5} means versions 2, + * 3, and 4. + * + *

Only effective when the codec builder is configured with {@code withSchemaEvolution()}; + * otherwise the annotation is ignored and the field is treated as always present. + * + *

May be placed on a field or an accessor method, which also covers a record component. Record + * components are covered by {@code FIELD} and {@code METHOD} rather than {@code + * ElementType.RECORD_COMPONENT}: the compiler propagates a record-component annotation to the + * backing field and the accessor method (the targets it declares), and the codec reads the + * annotation from those elements. {@code RECORD_COMPONENT} is a JDK 16 enum constant and would + * break this Java 11 module at runtime, so it is intentionally omitted. + */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.FIELD, ElementType.METHOD}) +public @interface ForyVersion { + /** First version (inclusive) that contains this field. Defaults to the class base version. */ + int since() default 1; + + /** + * First version (exclusive) that no longer contains this field. The default {@link + * Integer#MAX_VALUE} means the field has no upper bound and is present in every version from + * {@link #since()} onward. + */ + int until() default Integer.MAX_VALUE; +} diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayCodecBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayCodecBuilder.java index 6e6c6d3645..75a91da71b 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayCodecBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayCodecBuilder.java @@ -22,16 +22,18 @@ import static org.apache.fory.type.TypeUtils.getRawType; import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.MethodType; import java.util.Collection; import java.util.HashSet; +import java.util.Map; import java.util.Set; import java.util.function.Function; import java.util.function.Supplier; +import org.apache.fory.Fory; +import org.apache.fory.collection.LongMap; import org.apache.fory.format.row.binary.writer.BinaryArrayWriter; import org.apache.fory.format.type.DataTypes; import org.apache.fory.format.type.Field; +import org.apache.fory.format.type.SchemaHistory; import org.apache.fory.format.type.TypeInference; import org.apache.fory.reflect.TypeRef; import org.apache.fory.type.TypeUtils; @@ -63,17 +65,120 @@ public ArrayEncoder get() { Function> buildWithWriter() { loadArrayInnerCodecs(); - final Function generatedEncoderFactory = + final TypeRef elementType = TypeUtils.getElementType(collectionType); + final Class elementClass = schemaEvolution ? evolutionBean(elementType) : null; + if (elementClass == null) { + final Function generatedEncoderFactory = + generatedEncoderFactory(); + return new Function>() { + @Override + public ArrayEncoder apply(final BinaryArrayWriter writer) { + return new BinaryArrayEncoder<>( + writer, generatedEncoderFactory.apply(writer), sizeEmbedded); + } + }; + } + return buildVersionedWithWriter(elementType, elementClass); + } + + /** + * Whether this array takes the evolution path, and a representative bean for naming the generated + * codec. A directly-typed bean (versioned or not) takes the path so the strict-hash prefix is + * always present and an evolution-on consumer can detect a flag-mismatched producer cleanly; a + * bean nested inside a list/map/array element is found by descending the wrapper. Null when the + * element carries no bean. The per-version enumeration over every reachable bean is done by + * {@link #buildElementSchemaHistory}. + */ + private Class evolutionBean(final TypeRef elementType) { + return SchemaHistory.evolutionBean(elementType, typeCtx()); + } + + private Function> buildVersionedWithWriter( + final TypeRef elementType, final Class elementClass) { + // Enumerate the element field over every versioned bean reachable through its wrappers, so an + // element like Map evolves both the key bean and the value bean. The element + // schema's strict hash identifies the whole combination, and each combination's chosen versions + // are carried in vs.nestedBeanSchemas(), which ProjectionRouting uses to generate a projection + // row codec for every nested class (not just one). + SchemaHistory history = buildElementSchemaHistory(elementField.name(), elementType); + SchemaHistory.VersionedSchema current = history.current(); + + // Index of hash → deferred projection source per non-current combination. Building it compiles + // nothing: a combination's row and array codec classes are generated the first time a payload + // with that hash is decoded. The suffix encodes each chosen inner-bean version so distinct + // cross-product entries do not collide on a single generated class. + // + // Keyed by the raw strict hash straight from SchemaHistory, which already proves these hashes + // are unique across versions() and distinct from the current schema, so no builder-side + // collision check is needed here (unlike the map codec's combined (key, value) hash). + LongMap projectionSources = new LongMap<>(); + String elementName = elementField.name(); + for (SchemaHistory.VersionedSchema vs : history.versions()) { + if (vs == current) { + continue; + } + projectionSources.put(vs.strictHash(), new ProjectionSource(elementClass, elementName, vs)); + } + final Function currentFactory = generatedEncoderFactory(); + long currentHash = current.strictHash(); return new Function>() { @Override public ArrayEncoder apply(final BinaryArrayWriter writer) { return new BinaryArrayEncoder<>( - writer, generatedEncoderFactory.apply(writer), sizeEmbedded); + writer, + currentFactory.apply(writer), + sizeEmbedded, + currentHash, + projectionSources, + fory); } }; } + /** + * Deferred projection codec for one historical element version. Holds only the inputs to generate + * the codec; the row and array codec classes are compiled on the first {@link #compile} call (the + * first decode of this version's hash), not at build time. + */ + private final class ProjectionSource implements BinaryArrayEncoder.ProjectionSource { + private final Class elementClass; + private final String elementName; + private final SchemaHistory.VersionedSchema version; + + ProjectionSource( + Class elementClass, String elementName, SchemaHistory.VersionedSchema version) { + this.elementClass = elementClass; + this.elementName = elementName; + this.version = version; + } + + @Override + public BinaryArrayEncoder.ProjectionArrayCodec compile(Fory fory) { + String suffix = ProjectionRouting.projectionSuffix(version); + // Generates the projection row codec for every nested versioned bean class in this + // combination, both map key and value, so the array codec's references all resolve. + Map, String> nestedSuffixes = + ProjectionRouting.nestedSuffixesFor(version, codecFormat); + Class arrayClass = + Encoders.loadOrGenProjectionArrayCodecClass( + collectionType, TypeRef.of(elementClass), codecFormat, suffix, nestedSuffixes); + MethodHandle ctor = Encoders.constructorHandleFor(arrayClass, GeneratedArrayEncoder.class); + // forElement substitutes each chosen historical struct into its leaf, so the element field at + // this combination is simply the single field of vs.schema(); wrap it back in the list field. + Field histListField = + DataTypes.arrayField(elementName, DataTypes.fieldOfSchema(version.schema(), 0)); + try { + BinaryArrayWriter projWriter = codecFormat.newArrayWriter(histListField); + Object[] references = {histListField, projWriter, fory}; + GeneratedArrayEncoder codec = (GeneratedArrayEncoder) ctor.invokeExact(references); + return new BinaryArrayEncoder.ProjectionArrayCodec(projWriter, codec); + } catch (Throwable e) { + throw ExceptionUtils.throwException(e); + } + } + } + private void loadArrayInnerCodecs() { final Set> set = new HashSet<>(); Encoders.findBeanToken(collectionType, set); @@ -90,30 +195,15 @@ Function generatedEncoderFactory() { final TypeRef elementType = TypeUtils.getElementType(collectionType); final Class arrayCodecClass = Encoders.loadOrGenArrayCodecClass(collectionType, elementType, codecFormat); - - final MethodHandle constructorHandle; - try { - final var constructor = - arrayCodecClass.asSubclass(GeneratedArrayEncoder.class).getConstructor(Object[].class); - constructorHandle = - MethodHandles.lookup() - .unreflectConstructor(constructor) - .asType(MethodType.methodType(GeneratedArrayEncoder.class, Object[].class)); - } catch (final NoSuchMethodException | IllegalAccessException e) { - throw new EncoderException( - "Failed to construct array codec for " - + collectionType - + " with element class " - + elementType, - e); - } + final MethodHandle constructorHandle = + Encoders.constructorHandleFor(arrayCodecClass, GeneratedArrayEncoder.class); return new Function() { @Override public GeneratedArrayEncoder apply(final BinaryArrayWriter writer) { final Object[] references = {writer.getField(), writer, fory}; try { return (GeneratedArrayEncoder) constructorHandle.invokeExact(references); - } catch (final Throwable t) { + } catch (Throwable t) { throw ExceptionUtils.throwException(t); } } diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayEncoderBuilder.java index a5ec4715b2..58e650358b 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayEncoderBuilder.java @@ -24,6 +24,7 @@ import java.lang.reflect.Array; import java.util.HashSet; +import java.util.Map; import org.apache.fory.Fory; import org.apache.fory.codegen.CodeGenerator; import org.apache.fory.codegen.CodegenContext; @@ -54,10 +55,35 @@ public ArrayEncoderBuilder(Class arrayCls, Class beanClass) { } public ArrayEncoderBuilder(TypeRef clsType, TypeRef beanType) { + this(clsType, beanType, (String) null); + } + + /** + * Construct a projection array codec builder. {@code classSuffix} names this codec's own class + * (encoding the whole nested-bean version combination); {@code nestedSuffixes} routes each nested + * versioned bean class to its own projection row codec, so an element wrapping more than one bean + * (such as {@code Map}) embeds the right historical codec for each. + */ + ArrayEncoderBuilder( + TypeRef clsType, + TypeRef beanType, + String classSuffix, + Map, String> nestedSuffixes) { + this(clsType, beanType, classSuffix); + this.nestedClassSuffixes = nestedSuffixes; + } + + /** + * Construct an array codec builder that embeds row codec class references for its element bean + * with the supplied suffix. Used by schema-evolution code to point per-version array codecs at + * per-version row codecs. + */ + ArrayEncoderBuilder(TypeRef clsType, TypeRef beanType, String rowCodecSuffix) { // A top-level collection has no enclosing bean, so scope element-codec resolution to Object to // match TypeInference's empty-path enclosing type; beanType still names the element type for // class naming and the empty-array template below. super(new CodegenContext(), beanType, Object.class); + this.rowCodecSuffixForBeans = rowCodecSuffix; arrayToken = clsType; ctx.reserveName(ROOT_ARRAY_WRITER_NAME); ctx.reserveName(ROOT_ARRAY_NAME); @@ -86,7 +112,9 @@ public ArrayEncoderBuilder(TypeRef clsType, TypeRef beanType) { @Override public String genCode() { ctx.setPackage(CodeGenerator.getPackage(beanClass)); - String className = codecClassName(beanClass, TypeInference.inferTypeName(arrayToken)); + String className = + codecClassName(beanClass, TypeInference.inferTypeName(arrayToken)) + + (rowCodecSuffixForBeans == null ? "" : rowCodecSuffixForBeans); ctx.setClassName(className); // don't addImport(arrayClass), because user class may name collide. // janino don't support generics, so GeneratedCodec has no generics @@ -126,8 +154,8 @@ public String genCode() { long startTime = System.nanoTime(); String code = ctx.genCode(); - long durationMs = (System.nanoTime() - startTime) / 1000_000; - LOG.info("Generate array codec for class {} take {} us", beanClass, durationMs); + long durationUs = (System.nanoTime() - startTime) / 1000; + LOG.info("Generate array codec for class {} take {} us", beanClass, durationUs); return code; } diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseBinaryEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseBinaryEncoderBuilder.java index 530bf5a931..cee5baa7c5 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseBinaryEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseBinaryEncoderBuilder.java @@ -96,12 +96,36 @@ public abstract class BaseBinaryEncoderBuilder extends CodecBuilder { protected static TypeRef binaryArrayTypeToken = TypeRef.of(BinaryArray.class); protected final Map, Reference> arrayWriterMap = new HashMap<>(); - protected final Map, Reference> beanEncoderMap = new HashMap<>(); + + // Keyed by beanCodecKey(typeRef) rather than the raw typeRef so a single builder can hold two + // codecs for one bean class when position matters: a map decodes its key bean at the current + // schema while its value bean may project to a historical one, and the two would otherwise + // collide on the same type key. + protected final Map beanEncoderMap = new HashMap<>(); + + /** + * When non-null, nested bean codec class references generated by this builder will be suffixed + * with this string. Used by schema-evolution code paths to direct generated array/map codecs to + * the projection variant of an element bean's row codec. + */ + protected String rowCodecSuffixForBeans; + + /** + * Per-class projection suffix for nested bean codecs, used when one generated codec embeds more + * than one distinct versioned bean class at independent versions — for example an element typed + * {@code Map}, whose key and value beans each project to their own historical row + * codec. Looked up by raw class in {@link #nestedBeanSuffix}; a class absent from the map (or a + * null map) falls back to {@link #rowCodecSuffixForBeans}. Routing by raw class is exact because + * a writer writes one definition of a class, so every occurrence of a class in one payload is at + * the same version. + */ + protected Map, String> nestedClassSuffixes; + // We need to call beanEncoder's rowWriter.reset() before write a corresponding nested bean every // time. // Outermost beanEncoder's rowWriter.reset() should be called outside generated code before // writer an outermost bean every time. - protected final Map, Reference> rowWriterMap = new HashMap<>(); + protected final Map rowWriterMap = new HashMap<>(); protected final CustomTypeHandler customTypeHandler = CustomTypeEncoderRegistry.customTypeHandler(); protected final TypeResolutionContext typeCtx; @@ -511,34 +535,10 @@ protected Expression serializeForBean( Field fieldIfKnown, TypeRef typeRef, Expression structField) { - Class rawType = getRawType(typeRef); - Reference rowWriter; - Reference beanEncoder = beanEncoderMap.get(typeRef); - if (beanEncoder == null) { - // janino generics don't add cast, so this `<${type}>` is only for generated code readability - Expression schema = createSchemaFromStructField(structField); - String rowWriterName = - ctx.newName(StringUtils.uncapitalize(rawType.getSimpleName() + "RowWriter")); - NewInstance newRowWriter = new NewInstance(rowWriterType(), schema, writer); - ctx.addField(ctx.type(rowWriterType()), rowWriterName, newRowWriter); - - Preconditions.checkArgument(!codecClassName(rawType).contains(".")); - String encoderName = ctx.newName(StringUtils.uncapitalize(codecClassName(rawType))); - String encoderClass = codecQualifiedClassName(rawType); - TypeRef codecTypeRef = TypeRef.of(GeneratedRowEncoder.class); - NewInstance newEncoder = - new NewInstance( - codecTypeRef, - encoderClass, - ExpressionUtils.newObjectArray(schema, newRowWriter, foryRef)); - ctx.addField(encoderClass, encoderName, newEncoder); - - rowWriter = new Reference(rowWriterName, rowWriterType()); - rowWriterMap.put(typeRef, rowWriter); - beanEncoder = new Reference(encoderName, codecTypeRef); - beanEncoderMap.put(typeRef, beanEncoder); - } - rowWriter = rowWriterMap.get(typeRef); + registerBeanCodec(writer, typeRef, structField); + Object codecKey = beanCodecKey(typeRef); + Reference rowWriter = rowWriterMap.get(codecKey); + Reference beanEncoder = beanEncoderMap.get(codecKey); Expression expression = serializeForNotNullBean(ordinal, writer, inputObject, fieldIfKnown, rowWriter, beanEncoder); @@ -547,6 +547,68 @@ protected Expression serializeForBean( new Expression.IsNull(inputObject), new Invoke(writer, "setNullAt", ordinal), expression); } + /** + * Idempotently add the nested-bean row writer and row encoder as fields on the generated codec + * class and register them in {@link #beanEncoderMap} and {@link #rowWriterMap}. Used both by + * {@link #serializeForBean} and by decode-only projection codegen, where the encode pass is + * skipped but the decode pass still needs the bean encoder reference. + */ + protected void registerBeanCodec(Expression writer, TypeRef typeRef, Expression structField) { + Object codecKey = beanCodecKey(typeRef); + if (beanEncoderMap.containsKey(codecKey)) { + return; + } + Class rawType = getRawType(typeRef); + Expression schema = createSchemaFromStructField(structField); + String rowWriterName = + ctx.newName(StringUtils.uncapitalize(rawType.getSimpleName() + "RowWriter")); + NewInstance newRowWriter = new NewInstance(rowWriterType(), schema, writer); + ctx.addField(ctx.type(rowWriterType()), rowWriterName, newRowWriter); + + Preconditions.checkArgument(!codecClassName(rawType).contains(".")); + String encoderName = ctx.newName(StringUtils.uncapitalize(codecClassName(rawType))); + String encoderClass = codecQualifiedClassName(rawType) + nestedBeanSuffix(typeRef); + TypeRef codecTypeRef = TypeRef.of(GeneratedRowEncoder.class); + NewInstance newEncoder = + new NewInstance( + codecTypeRef, + encoderClass, + ExpressionUtils.newObjectArray(schema, newRowWriter, foryRef)); + ctx.addField(encoderClass, encoderName, newEncoder); + + rowWriterMap.put(codecKey, new Reference(rowWriterName, rowWriterType())); + beanEncoderMap.put(codecKey, new Reference(encoderName, codecTypeRef)); + } + + /** + * Registration key for a nested bean's row writer and codec in {@link #beanEncoderMap} and {@link + * #rowWriterMap}. Defaults to the type itself, so each bean class maps to a single codec. + * Subclasses where one class can appear in two positions that need different codecs (such as a + * map key versus its value) override this to keep those registrations distinct. + */ + protected Object beanCodecKey(TypeRef typeRef) { + return typeRef; + } + + /** + * Suffix to append to a nested bean's codec class name when emitting a reference. When this + * builder embeds several distinct versioned bean classes at independent versions, {@link + * #nestedClassSuffixes} routes each class to its own projection codec; otherwise the single + * uniform {@link #rowCodecSuffixForBeans} (or empty) applies. Subclasses with positional routing + * (a map key versus its value) override this. + */ + protected String nestedBeanSuffix(TypeRef typeRef) { + if (nestedClassSuffixes != null) { + // The suffix map is keyed from the same field walk as this descent, and + // SchemaHistory.collectNestedSites sites every bean with more than one version. So an absent + // class is necessarily single-version: read it at its current schema (empty suffix), not this + // codec's own suffix. Keep this silent rather than fail-loud; a versioned bean cannot miss. + String suffix = nestedClassSuffixes.get(getRawType(typeRef)); + return suffix == null ? "" : suffix; + } + return rowCodecSuffixForBeans == null ? "" : rowCodecSuffixForBeans; + } + protected Expression createSchemaFromStructField(Expression structField) { return new StaticInvoke( DataTypes.class, "schemaFromStructField", "schema", SCHEMA_TYPE, false, structField); @@ -712,9 +774,19 @@ protected Expression deserializeFor( * . */ protected Expression deserializeForBean(Expression row, TypeRef typeRef) { - Reference beanEncoder = beanEncoderMap.get(typeRef); + // beanCodecKey(typeRef) must resolve to the same position (key vs value) here as it did when + // registerBeanCodec() ran. A miss means this decode is reached outside the position scope that + // registered the codec -- e.g. a nested key bean built outside MapEncoderBuilder.keyScoped / + // KeyPositionScope -- so the lookup falls back to the wrong key. Fail loud rather than + // mis-route. + Reference beanEncoder = beanEncoderMap.get(beanCodecKey(typeRef)); if (beanEncoder == null) { - throw new IllegalStateException("beanEncoder should have be added in serializeForBean()"); + throw new IllegalStateException( + "No bean codec registered for " + + typeRef + + " under key " + + beanCodecKey(typeRef) + + "; registerBeanCodec() must run in the same key/value position as this decode"); } Invoke beanObj = new Invoke(beanEncoder, "fromRow", TypeUtils.OBJECT_TYPE, false, row); return new Cast(beanObj, typeRef, "bean"); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseCodecBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseCodecBuilder.java index 81f78ca247..5c9a77e787 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseCodecBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseCodecBuilder.java @@ -19,17 +19,24 @@ package org.apache.fory.format.encoder; +import java.util.function.UnaryOperator; import org.apache.fory.Fory; import org.apache.fory.format.row.binary.CompactBinaryRow; import org.apache.fory.format.row.binary.writer.CompactBinaryRowWriter; +import org.apache.fory.format.type.CustomTypeEncoderRegistry; import org.apache.fory.format.type.Schema; +import org.apache.fory.format.type.SchemaHistory; +import org.apache.fory.reflect.TypeRef; +import org.apache.fory.type.TypeResolutionContext; public class BaseCodecBuilder> { + protected Schema schema; protected int initialBufferSize = 16; protected boolean sizeEmbedded = true; protected Fory fory; protected Encoding codecFormat = DefaultCodecFormat.INSTANCE; + protected boolean schemaEvolution = false; BaseCodecBuilder(final Schema schema) { this.schema = schema; @@ -58,6 +65,27 @@ public B withSizeEmbedded(final boolean sizeEmbedded) { return castThis(); } + /** + * Enable schema evolution. The codec accepts payloads written by older versions of the same bean, + * using the {@link org.apache.fory.format.annotation.ForyVersion} and {@link + * org.apache.fory.format.annotation.ForySchema} annotations to reconstruct historical schemas. + * Writing always uses the current version. + * + *

This flag is part of the wire contract: producers and consumers must agree on it, and it + * cannot be flipped on an existing dataset without rewriting. For array and map codecs it adds an + * 8-byte strict-hash prefix to the payload. Because those codecs have no header otherwise, an + * evolution-off reader has no way to tell that prefix from valid body bytes, so reading + * evolution-on bytes with evolution off (or the reverse) mis-decodes silently rather than + * failing. Row payloads already carry an 8-byte hash slot, so a flag mismatch there is detected + * and rejected; but the slot is computed with a stricter hash under evolution (it also + * distinguishes field names and nullability), so flipping the flag rejects reads of previously + * written rows even when the field layout is byte-identical. + */ + public B withSchemaEvolution() { + this.schemaEvolution = true; + return castThis(); + } + /** * Configure compact encoding, which is more space efficient than the default encoding, but is not * yet stable. See {@link CompactBinaryRow} for details. @@ -68,6 +96,44 @@ public B compactEncoding() { return castThis(); } + /** + * Build the schema history for {@code targetClass} under the active codec format. The compact + * format sorts schema fields, so historical schemas must be sorted the same way for their strict + * hashes and layouts to match what the writer produces; the default format passes schemas through + * unchanged. + */ + protected SchemaHistory buildSchemaHistory(final Class targetClass) { + return SchemaHistory.build(targetClass, schemaTransform()); + } + + /** + * History of a top-level array element or map entry field, enumerated over the cross-product of + * every versioned bean reachable through the field's list/map/array wrappers (a directly-typed + * bean, a list element, or a map key or value). Used by the array/map codecs so the element + * schema's strict hash identifies all nested layouts jointly, letting a wrapper that reaches more + * than one distinct versioned bean class evolve every one of them. + */ + protected SchemaHistory buildElementSchemaHistory( + final String fieldName, final TypeRef elementType) { + return SchemaHistory.forElement(fieldName, elementType, schemaTransform()); + } + + /** + * Type-resolution context for discovering versioned beans reachable from array/map element types. + * Synthesizes interface-typed bean fields the same way the row-format type inference does; + * without it a class with interface members would not be recognized as a bean even though the row + * codec can encode it, and its older versions would never be enumerated. + */ + protected static TypeResolutionContext typeCtx() { + return new TypeResolutionContext(CustomTypeEncoderRegistry.customTypeHandler(), true); + } + + private UnaryOperator schemaTransform() { + return codecFormat == CompactCodecFormat.INSTANCE + ? CompactBinaryRowWriter::sortSchema + : UnaryOperator.identity(); + } + @SuppressWarnings("unchecked") protected B castThis() { return (B) this; diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryArrayEncoder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryArrayEncoder.java index d1b2b9184f..6c5ddb3ffc 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryArrayEncoder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryArrayEncoder.java @@ -19,9 +19,13 @@ package org.apache.fory.format.encoder; +import org.apache.fory.Fory; +import org.apache.fory.collection.LongMap; +import org.apache.fory.exception.ClassNotCompatibleException; import org.apache.fory.format.row.binary.BinaryArray; import org.apache.fory.format.row.binary.writer.BinaryArrayWriter; import org.apache.fory.format.type.Field; +import org.apache.fory.memory.LittleEndian; import org.apache.fory.memory.MemoryBuffer; import org.apache.fory.memory.MemoryUtils; @@ -30,13 +34,69 @@ class BinaryArrayEncoder implements ArrayEncoder { private final GeneratedArrayEncoder codec; private final boolean sizeEmbedded; + /** + * Strict hash of the element bean's current schema; written before the array payload when {@code + * schemaEvolution} is on. + */ + private final long currentHash; + + /** + * Hash → source able to compile a projection codec for an older element schema. {@code null} + * disables versioning (evolution off, no hash prefix). Non-null but empty under evolution with no + * historical versions: the hash prefix is still written for flag-mismatch detection. Shared and + * immutable; a combination's codec class is compiled only the first time its hash is decoded. + */ + private final LongMap projectionSources; + + /** + * Per-encoder cache of projection codecs compiled on first decode of their hash. Lock-free: an + * encoder is single-threaded (see {@link ArrayEncoder}), and the class compile is memoized + * globally by the shared code generator. + */ + private final LongMap projections; + + private final Fory fory; + + /** + * A projection variant of the array codec along with the writer used to materialize an array + * instance of the right physical type (standard vs. compact) for the historical element field. + */ + static final class ProjectionArrayCodec { + final BinaryArrayWriter writer; + final GeneratedArrayEncoder codec; + + ProjectionArrayCodec(BinaryArrayWriter writer, GeneratedArrayEncoder codec) { + this.writer = writer; + this.codec = codec; + } + } + + /** Compiles one historical element version's projection codec on first decode of its hash. */ + interface ProjectionSource { + ProjectionArrayCodec compile(Fory fory); + } + BinaryArrayEncoder( final BinaryArrayWriter writer, final GeneratedArrayEncoder codec, final boolean sizeEmbedded) { + this(writer, codec, sizeEmbedded, 0L, null, null); + } + + BinaryArrayEncoder( + final BinaryArrayWriter writer, + final GeneratedArrayEncoder codec, + final boolean sizeEmbedded, + final long currentHash, + final LongMap projectionSources, + final Fory fory) { this.writer = writer; this.codec = codec; this.sizeEmbedded = sizeEmbedded; + this.currentHash = currentHash; + this.projectionSources = projectionSources; + this.fory = fory; + this.projections = projectionSources == null ? null : new LongMap<>(projectionSources.size); } @Override @@ -62,22 +122,89 @@ public T decode(final MemoryBuffer buffer) { @Override public T decode(final byte[] bytes) { - // byte[] overloads ignore sizeEmbedded: encode writes no size prefix, decode uses bytes.length. + // byte[] overloads ignore sizeEmbedded: encode writes no length prefix (under schema evolution + // an 8-byte hash leads the body, but that is data, not framing), so decode takes the size from + // bytes.length. return decode(MemoryUtils.wrap(bytes), bytes.length); } + @SuppressWarnings("unchecked") T decode(final MemoryBuffer buffer, final int size) { - final BinaryArray array = writer.newArray(); + if (projectionSources == null) { + // Evolution off: the whole payload is body, with no hash prefix. Reading evolution-on bytes + // here cannot be caught: the array wire form has no hash slot when evolution is off, and an + // evolution-on payload's leading 8-byte FNV hash is indistinguishable from a valid array + // body, so it silently mis-decodes. The row-format guide documents this direction as + // unsupported; producer and consumer must agree on the flag. + final BinaryArray array = writer.newArray(); + final int readerIndex = buffer.readerIndex(); + array.pointTo(buffer, readerIndex, size); + buffer.readerIndex(readerIndex + size); + return fromArray(array); + } + if (size < 8) { + throw new ClassNotCompatibleException( + "Array payload too small for an 8-byte schema hash under schema evolution: size=" + size); + } + final long peerHash = buffer.readInt64(); + final int bodySize = size - 8; + if (peerHash == currentHash) { + final BinaryArray array = writer.newArray(); + final int readerIndex = buffer.readerIndex(); + array.pointTo(buffer, readerIndex, bodySize); + buffer.readerIndex(readerIndex + bodySize); + return fromArray(array); + } + ProjectionArrayCodec projection = resolveProjection(peerHash); + if (projection == null) { + throw new ClassNotCompatibleException( + String.format( + "Array element schema is not consistent. self/peer hash are %x/%x.", + currentHash, peerHash)); + } + BinaryArray array = projection.writer.newArray(); final int readerIndex = buffer.readerIndex(); - array.pointTo(buffer, readerIndex, size); - buffer.readerIndex(readerIndex + size); - return fromArray(array); + array.pointTo(buffer, readerIndex, bodySize); + buffer.readerIndex(readerIndex + bodySize); + return (T) projection.codec.fromArray(array); + } + + /** + * The projection codec for {@code peerHash}, or {@code null} if no historical version has that + * hash. Compiles the codec on first encounter and caches it. Single-threaded by the {@link + * ArrayEncoder} contract, so the cache needs no locking. + */ + private ProjectionArrayCodec resolveProjection(final long peerHash) { + ProjectionArrayCodec cached = projections.get(peerHash); + if (cached != null) { + return cached; + } + ProjectionSource source = projectionSources.get(peerHash); + if (source == null) { + return null; + } + ProjectionArrayCodec projection = source.compile(fory); + projections.put(peerHash, projection); + return projection; } @Override public byte[] encode(final T obj) { final BinaryArray array = toArray(obj); - return writer.getBuffer().getBytes(0, array.getSizeInBytes()); + if (projectionSources == null) { + return writer.getBuffer().getBytes(0, array.getSizeInBytes()); + } + // Build the result with a single allocation: the result byte[]. The hash header is poked + // in via LittleEndian (no buffer wrapper) and the body is bulk-copied out of the writer's + // backing buffer into the result. + final int n = array.getSizeInBytes(); + if (n > Integer.MAX_VALUE - 8) { + throw new EncoderException("Array body too large to prepend schema hash header: " + n); + } + final byte[] result = new byte[8 + n]; + LittleEndian.putInt64(result, 0, currentHash); + writer.getBuffer().get(0, result, 8, n); + return result; } @Override @@ -87,6 +214,9 @@ public int encode(final MemoryBuffer buffer, final T obj) { if (sizeEmbedded) { buffer.writeInt32(-1); } + if (projectionSources != null) { + buffer.writeInt64(currentHash); + } try { writer.setBuffer(buffer); toArray(obj); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryMapEncoder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryMapEncoder.java index 90ba96dc5e..722deafa0a 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryMapEncoder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryMapEncoder.java @@ -19,10 +19,14 @@ package org.apache.fory.format.encoder; +import org.apache.fory.Fory; +import org.apache.fory.collection.LongMap; +import org.apache.fory.exception.ClassNotCompatibleException; import org.apache.fory.format.row.binary.BinaryArray; import org.apache.fory.format.row.binary.BinaryMap; import org.apache.fory.format.row.binary.writer.BinaryArrayWriter; import org.apache.fory.format.type.Field; +import org.apache.fory.memory.LittleEndian; import org.apache.fory.memory.MemoryBuffer; import org.apache.fory.memory.MemoryUtils; @@ -33,6 +37,45 @@ class BinaryMapEncoder implements MapEncoder { private final BinaryArrayWriter keyWriter; private final GeneratedMapEncoder codec; private final boolean sizeEmbedded; + private final long currentHash; + + /** + * Combined (key,value) hash → source able to compile a projection codec for an older layout. + * {@code null} disables versioning (evolution off, no hash prefix); non-null but empty under + * evolution with no historical combinations. Shared and immutable; a combination's codec class is + * compiled only the first time its hash is decoded. + */ + private final LongMap projectionSources; + + /** + * Per-encoder cache of projection codecs compiled on first decode of their hash. Lock-free: an + * encoder is single-threaded (see {@link MapEncoder}), and the class compile is memoized globally + * by the shared code generator. + */ + private final LongMap projections; + + private final Fory fory; + + /** + * Per-version projection codec; the {@code Encoding} and historical {@code mapField} together + * materialize an empty map shaped for the historical layout (standard vs. compact). + */ + static final class ProjectionMapCodec { + final Encoding format; + final Field mapField; + final GeneratedMapEncoder codec; + + ProjectionMapCodec(Encoding format, Field mapField, GeneratedMapEncoder codec) { + this.format = format; + this.mapField = mapField; + this.codec = codec; + } + } + + /** Compiles one historical (key,value) combination's projection codec on first decode. */ + interface ProjectionSource { + ProjectionMapCodec compile(Encoding format, Fory fory); + } BinaryMapEncoder( final Encoding format, @@ -41,12 +84,29 @@ class BinaryMapEncoder implements MapEncoder { final BinaryArrayWriter keyWriter, final GeneratedMapEncoder codec, final boolean sizeEmbedded) { + this(format, mapField, valWriter, keyWriter, codec, sizeEmbedded, 0L, null, null); + } + + BinaryMapEncoder( + final Encoding format, + final Field mapField, + final BinaryArrayWriter valWriter, + final BinaryArrayWriter keyWriter, + final GeneratedMapEncoder codec, + final boolean sizeEmbedded, + final long currentHash, + final LongMap projectionSources, + final Fory fory) { this.format = format; this.mapField = mapField; this.valWriter = valWriter; this.keyWriter = keyWriter; this.codec = codec; this.sizeEmbedded = sizeEmbedded; + this.currentHash = currentHash; + this.projectionSources = projectionSources; + this.fory = fory; + this.projections = projectionSources == null ? null : new LongMap<>(projectionSources.size); } @Override @@ -75,24 +135,91 @@ public M decode(final MemoryBuffer buffer) { return decode(buffer, sizeEmbedded ? buffer.readInt32() : buffer.remaining()); } + @SuppressWarnings("unchecked") M decode(final MemoryBuffer buffer, final int size) { - final BinaryMap map = format.newMap(mapField); - final int readerIndex = buffer.readerIndex(); - map.pointTo(buffer, readerIndex, size); - buffer.readerIndex(readerIndex + size); - return fromMap(map); + if (projectionSources == null) { + // Evolution off: the whole payload is body, with no hash prefix. Reading evolution-on bytes + // here cannot be caught: the map wire form has no hash slot when evolution is off, and an + // evolution-on payload's leading 8-byte FNV hash is indistinguishable from a valid map body, + // so it silently mis-decodes. The row-format guide documents this direction as unsupported; + // producer and consumer must agree on the flag. + final BinaryMap map = format.newMap(mapField); + final int readerIndex = buffer.readerIndex(); + map.pointTo(buffer, readerIndex, size); + buffer.readerIndex(readerIndex + size); + return fromMap(map); + } + if (size < 8) { + throw new ClassNotCompatibleException( + "Map payload too small for an 8-byte schema hash under schema evolution: size=" + size); + } + long peerHash = buffer.readInt64(); + int bodySize = size - 8; + if (peerHash == currentHash) { + final BinaryMap map = format.newMap(mapField); + int readerIndex = buffer.readerIndex(); + map.pointTo(buffer, readerIndex, bodySize); + buffer.readerIndex(readerIndex + bodySize); + return fromMap(map); + } + ProjectionMapCodec projection = resolveProjection(peerHash); + if (projection == null) { + throw new ClassNotCompatibleException( + String.format( + "Map (key,value) schema is not consistent. self/peer hash are %x/%x.", + currentHash, peerHash)); + } + BinaryMap map = projection.format.newMap(projection.mapField); + int readerIndex = buffer.readerIndex(); + map.pointTo(buffer, readerIndex, bodySize); + buffer.readerIndex(readerIndex + bodySize); + return (M) projection.codec.fromMap(map); } @Override public M decode(final byte[] bytes) { - // byte[] overloads ignore sizeEmbedded: encode writes no size prefix, decode uses bytes.length. + // byte[] overloads ignore sizeEmbedded: encode writes no length prefix (under schema evolution + // an 8-byte hash leads the body, but that is data, not framing), so decode takes the size from + // bytes.length. return decode(MemoryUtils.wrap(bytes), bytes.length); } + /** + * The projection codec for {@code peerHash}, or {@code null} if no historical combination has + * that hash. Compiles the codec on first encounter and caches it. Single-threaded by the {@link + * MapEncoder} contract, so the cache needs no locking. + */ + private ProjectionMapCodec resolveProjection(final long peerHash) { + ProjectionMapCodec cached = projections.get(peerHash); + if (cached != null) { + return cached; + } + ProjectionSource source = projectionSources.get(peerHash); + if (source == null) { + return null; + } + ProjectionMapCodec projection = source.compile(format, fory); + projections.put(peerHash, projection); + return projection; + } + @Override public byte[] encode(final M obj) { final BinaryMap map = toMap(obj); - return map.getBuf().getBytes(map.getBaseOffset(), map.getSizeInBytes()); + if (projectionSources == null) { + return map.getBuf().getBytes(map.getBaseOffset(), map.getSizeInBytes()); + } + // Build the result with a single allocation: the result byte[]. The hash header is poked + // in via LittleEndian (no buffer wrapper) and the body is bulk-copied out of the map's + // backing buffer into the result. + final int n = map.getSizeInBytes(); + if (n > Integer.MAX_VALUE - 8) { + throw new EncoderException("Map body too large to prepend schema hash header: " + n); + } + final byte[] result = new byte[8 + n]; + LittleEndian.putInt64(result, 0, currentHash); + map.getBuf().get(map.getBaseOffset(), result, 8, n); + return result; } @Override @@ -102,6 +229,9 @@ public int encode(final MemoryBuffer buffer, final M obj) { if (sizeEmbedded) { buffer.writeInt32(-1); } + if (projectionSources != null) { + buffer.writeInt64(currentHash); + } try { keyWriter.setBuffer(buffer); valWriter.setBuffer(buffer); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryRowEncoder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryRowEncoder.java index 7cafa0ab2c..1cfb9e803e 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryRowEncoder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryRowEncoder.java @@ -19,6 +19,8 @@ package org.apache.fory.format.encoder; +import org.apache.fory.Fory; +import org.apache.fory.collection.LongMap; import org.apache.fory.exception.ClassNotCompatibleException; import org.apache.fory.format.row.binary.BinaryRow; import org.apache.fory.format.row.binary.writer.BaseBinaryRowWriter; @@ -33,18 +35,69 @@ class BinaryRowEncoder implements RowEncoder { private final BaseBinaryRowWriter writer; private final boolean sizeEmbedded; private final long schemaHash; + private final Fory fory; + + /** + * Hash → source able to compile and bind a projection codec for an older version. {@code null} + * when schema evolution is disabled; in that case a hash mismatch is a hard error. Shared, + * immutable, and built without compiling anything: a combination's codec class is compiled only + * the first time its hash is decoded. + */ + private final LongMap projectionSources; + + /** + * Per-encoder cache of projection codecs compiled on first decode of their hash. Lock-free: an + * encoder is single-threaded (see {@link RowEncoder}), and the underlying class compile is + * memoized globally by the shared code generator, so a concurrent first-miss on the same hash in + * another encoder compiles the class once. + */ + private final LongMap projections; + private final MemoryBuffer buffer = MemoryUtils.buffer(16); + /** + * A projection codec and a row factory with the historical schema's layout precomputed so + * projection decodes match the current-schema path's per-call cost. + */ + static final class ProjectionCodec { + final RowFactory rowFactory; + final GeneratedRowEncoder codec; + + ProjectionCodec(RowFactory rowFactory, GeneratedRowEncoder codec) { + this.rowFactory = rowFactory; + this.codec = codec; + } + } + + /** Compiles and binds one historical version's projection codec on first decode of its hash. */ + interface ProjectionSource { + ProjectionCodec compile(BaseBinaryRowWriter writer, Fory fory); + } + BinaryRowEncoder( final Schema schema, final GeneratedRowEncoder codec, final BaseBinaryRowWriter writer, final boolean sizeEmbedded) { + this(schema, codec, writer, sizeEmbedded, DataTypes.computeSchemaHash(schema), null, null); + } + + BinaryRowEncoder( + final Schema schema, + final GeneratedRowEncoder codec, + final BaseBinaryRowWriter writer, + final boolean sizeEmbedded, + final long schemaHash, + final LongMap projectionSources, + final Fory fory) { this.schema = schema; this.codec = codec; this.writer = writer; this.sizeEmbedded = sizeEmbedded; - this.schemaHash = DataTypes.computeSchemaHash(schema); + this.schemaHash = schemaHash; + this.projectionSources = projectionSources; + this.fory = fory; + this.projections = projectionSources == null ? null : new LongMap<>(projectionSources.size); } @Override @@ -68,29 +121,67 @@ public T decode(final MemoryBuffer buffer) { return decode(buffer, sizeEmbedded ? buffer.readInt32() : buffer.remaining()); } + @SuppressWarnings("unchecked") T decode(final MemoryBuffer buffer, final int size) { - final long peerSchemaHash = buffer.readInt64(); - if (peerSchemaHash != schemaHash) { + if (size < 8) { throw new ClassNotCompatibleException( - String.format( - "Schema is not consistent, encoder schema is %s. " - + "self/peer schema hash are %s/%s. " - + "Please check writer schema.", - schema, schemaHash, peerSchemaHash)); + "Row payload too small for an 8-byte schema hash: size=" + size); } + final long peerSchemaHash = buffer.readInt64(); + // The 8-byte hash has just been consumed; the row body occupies the remaining bytes. final int rowSize = size - 8; - final BinaryRow row = writer.newRow(); - row.pointTo(buffer, buffer.readerIndex(), rowSize); - buffer.increaseReaderIndex(rowSize); - return fromRow(row); + if (peerSchemaHash == schemaHash) { + // Hot path: writer.newRow() reuses the writer's cached row layout for the current schema. + final BinaryRow row = writer.newRow(); + row.pointTo(buffer, buffer.readerIndex(), rowSize); + buffer.increaseReaderIndex(rowSize); + return fromRow(row); + } + if (projectionSources != null) { + ProjectionCodec projection = resolveProjection(peerSchemaHash); + if (projection != null) { + // The writer is bound to the current schema, so the historical row comes from the + // projection's own factory, which carries that schema's precomputed layout. + final BinaryRow row = projection.rowFactory.newRow(); + row.pointTo(buffer, buffer.readerIndex(), rowSize); + buffer.increaseReaderIndex(rowSize); + return (T) projection.codec.fromRow(row); + } + } + throw new ClassNotCompatibleException( + String.format( + "Schema is not consistent, encoder schema is %s. " + + "self/peer schema hash are %x/%x. " + + "Please check writer schema.", + schema, schemaHash, peerSchemaHash)); } @Override public T decode(final byte[] bytes) { - // byte[] overloads ignore sizeEmbedded: encode writes no size prefix, decode uses bytes.length. + // byte[] overloads ignore sizeEmbedded: encode writes no length prefix (the schema-hash prefix + // is part of the body, not framing), so decode takes the size from bytes.length. return decode(MemoryUtils.wrap(bytes), bytes.length); } + /** + * The projection codec for {@code peerSchemaHash}, or {@code null} if no historical version has + * that hash. Compiles the codec on first encounter and caches it for the rest of this encoder's + * life. Single-threaded by the {@link RowEncoder} contract, so the cache needs no locking. + */ + private ProjectionCodec resolveProjection(final long peerSchemaHash) { + ProjectionCodec cached = projections.get(peerSchemaHash); + if (cached != null) { + return cached; + } + ProjectionSource source = projectionSources.get(peerSchemaHash); + if (source == null) { + return null; + } + ProjectionCodec projection = source.compile(writer, fory); + projections.put(peerSchemaHash, projection); + return projection; + } + @Override public byte[] encode(final T obj) { buffer.writerIndex(0); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactArrayEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactArrayEncoderBuilder.java index 65f8508e35..c174a03d57 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactArrayEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactArrayEncoderBuilder.java @@ -19,6 +19,7 @@ package org.apache.fory.format.encoder; +import java.util.Map; import org.apache.fory.codegen.Expression; import org.apache.fory.codegen.Expression.Invoke; import org.apache.fory.codegen.Expression.Reference; @@ -33,6 +34,19 @@ public CompactArrayEncoderBuilder(final TypeRef clsType, final TypeRef bea super(clsType, beanType); } + CompactArrayEncoderBuilder( + final TypeRef clsType, final TypeRef beanType, final String rowCodecSuffix) { + super(clsType, beanType, rowCodecSuffix); + } + + CompactArrayEncoderBuilder( + final TypeRef clsType, + final TypeRef beanType, + final String classSuffix, + final Map, String> nestedSuffixes) { + super(clsType, beanType, classSuffix, nestedSuffixes); + } + @Override protected Invoke beanWriterReset( final Expression writer, final Reference rowWriter, final Expression ordinal) { diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactCodecFormat.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactCodecFormat.java index c92be822b4..2d1244a2cf 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactCodecFormat.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactCodecFormat.java @@ -21,10 +21,12 @@ import java.util.Collection; import java.util.Map; +import java.util.Set; import org.apache.fory.format.row.binary.BinaryArray; import org.apache.fory.format.row.binary.BinaryMap; import org.apache.fory.format.row.binary.CompactBinaryArray; import org.apache.fory.format.row.binary.CompactBinaryMap; +import org.apache.fory.format.row.binary.CompactRowLayout; import org.apache.fory.format.row.binary.writer.BaseBinaryRowWriter; import org.apache.fory.format.row.binary.writer.BinaryArrayWriter; import org.apache.fory.format.row.binary.writer.CompactBinaryArrayWriter; @@ -62,18 +64,58 @@ public RowEncoderBuilder newRowEncoder(final TypeRef beanType) { return new CompactRowEncoderBuilder(beanType); } + @Override + public RowEncoderBuilder newProjectionRowEncoder( + final TypeRef beanType, + final Schema historicalSchema, + final Set liveNames, + final String classSuffix, + final Map, String> nestedSuffixes) { + return new CompactRowEncoderBuilder( + beanType, historicalSchema, liveNames, classSuffix, nestedSuffixes); + } + @Override public ArrayEncoderBuilder newArrayEncoder( final TypeRef> collectionType, final TypeRef elementType) { return new CompactArrayEncoderBuilder(collectionType, elementType); } + @Override + public ArrayEncoderBuilder newProjectionArrayEncoder( + final TypeRef> collectionType, + final TypeRef elementType, + final String classSuffix, + final Map, String> nestedSuffixes) { + return new CompactArrayEncoderBuilder(collectionType, elementType, classSuffix, nestedSuffixes); + } + @Override public MapEncoderBuilder newMapEncoder( final TypeRef> mapType, final TypeRef beanToken) { return new CompactMapEncoderBuilder(mapType, beanToken); } + @Override + public MapEncoderBuilder newProjectionMapEncoder( + final TypeRef> mapType, + final TypeRef beanToken, + final String valCodecSuffix, + final String keyCodecSuffix, + final Map, String> valNestedSuffixes, + final Map, String> keyNestedSuffixes) { + return new CompactMapEncoderBuilder( + mapType, beanToken, valCodecSuffix, keyCodecSuffix, valNestedSuffixes, keyNestedSuffixes); + } + + @Override + public RowFactory newRowFactory(final Schema schema) { + // Compute the compact layout once; every newRow() call reuses it (same model as the writer + // and the nested-slot read path). + final CompactRowLayout layout = new CompactRowLayout(schema); + return layout::newRow; + } + @Override public BinaryArray newArray(final Field field) { return new CompactBinaryArray(field); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactMapEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactMapEncoderBuilder.java index be3d206d59..55f8bef092 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactMapEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactMapEncoderBuilder.java @@ -19,6 +19,7 @@ package org.apache.fory.format.encoder; +import java.util.Map; import org.apache.fory.codegen.Expression; import org.apache.fory.codegen.Expression.Invoke; import org.apache.fory.codegen.Expression.Reference; @@ -36,6 +37,16 @@ public CompactMapEncoderBuilder(final TypeRef clsType, final TypeRef beanT super(clsType, beanType); } + CompactMapEncoderBuilder( + final TypeRef clsType, + final TypeRef beanType, + final String valCodecSuffix, + final String keyCodecSuffix, + final Map, String> valNestedSuffixes, + final Map, String> keyNestedSuffixes) { + super(clsType, beanType, valCodecSuffix, keyCodecSuffix, valNestedSuffixes, keyNestedSuffixes); + } + @Override protected Invoke beanWriterReset( final Expression writer, final Reference rowWriter, final Expression ordinal) { diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactRowEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactRowEncoderBuilder.java index 79ccc53391..b9d0012a4f 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactRowEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactRowEncoderBuilder.java @@ -19,6 +19,8 @@ package org.apache.fory.format.encoder; +import java.util.Map; +import java.util.Set; import org.apache.fory.codegen.Expression; import org.apache.fory.codegen.Expression.Invoke; import org.apache.fory.codegen.Expression.ListExpression; @@ -41,6 +43,15 @@ public CompactRowEncoderBuilder(final TypeRef beanType) { super(beanType); } + CompactRowEncoderBuilder( + final TypeRef beanType, + final Schema historicalSchema, + final Set liveNames, + final String classSuffix, + final Map, String> nestedSuffixes) { + super(beanType, historicalSchema, liveNames, classSuffix, nestedSuffixes); + } + @Override protected Schema inferSchema(final TypeRef beanType) { return CompactBinaryRowWriter.sortSchema(super.inferSchema(beanType)); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/DefaultCodecFormat.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/DefaultCodecFormat.java index 8ee0f3a8f1..8cd7e16d95 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/DefaultCodecFormat.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/DefaultCodecFormat.java @@ -21,8 +21,10 @@ import java.util.Collection; import java.util.Map; +import java.util.Set; import org.apache.fory.format.row.binary.BinaryArray; import org.apache.fory.format.row.binary.BinaryMap; +import org.apache.fory.format.row.binary.BinaryRow; import org.apache.fory.format.row.binary.writer.BaseBinaryRowWriter; import org.apache.fory.format.row.binary.writer.BinaryArrayWriter; import org.apache.fory.format.row.binary.writer.BinaryRowWriter; @@ -59,18 +61,55 @@ public RowEncoderBuilder newRowEncoder(final TypeRef beanClass) { return new RowEncoderBuilder(beanClass); } + @Override + public RowEncoderBuilder newProjectionRowEncoder( + final TypeRef beanType, + final Schema historicalSchema, + final Set liveNames, + final String classSuffix, + final Map, String> nestedSuffixes) { + return new RowEncoderBuilder( + beanType, historicalSchema, liveNames, classSuffix, nestedSuffixes); + } + @Override public ArrayEncoderBuilder newArrayEncoder( final TypeRef> collectionType, final TypeRef elementType) { return new ArrayEncoderBuilder(collectionType, elementType); } + @Override + public ArrayEncoderBuilder newProjectionArrayEncoder( + final TypeRef> collectionType, + final TypeRef elementType, + final String classSuffix, + final Map, String> nestedSuffixes) { + return new ArrayEncoderBuilder(collectionType, elementType, classSuffix, nestedSuffixes); + } + @Override public MapEncoderBuilder newMapEncoder( final TypeRef> mapType, final TypeRef beanToken) { return new MapEncoderBuilder(mapType, beanToken); } + @Override + public MapEncoderBuilder newProjectionMapEncoder( + final TypeRef> mapType, + final TypeRef beanToken, + final String valCodecSuffix, + final String keyCodecSuffix, + final Map, String> valNestedSuffixes, + final Map, String> keyNestedSuffixes) { + return new MapEncoderBuilder( + mapType, beanToken, valCodecSuffix, keyCodecSuffix, valNestedSuffixes, keyNestedSuffixes); + } + + @Override + public RowFactory newRowFactory(final Schema schema) { + return () -> new BinaryRow(schema); + } + @Override public BinaryArray newArray(final Field field) { return new BinaryArray(field); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoders.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoders.java index e8ab49cc15..c0d217750d 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoders.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoders.java @@ -22,6 +22,10 @@ import static org.apache.fory.type.TypeUtils.OBJECT_TYPE; import static org.apache.fory.type.TypeUtils.getRawType; +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodType; +import java.lang.reflect.Constructor; import java.util.Collection; import java.util.HashSet; import java.util.LinkedHashSet; @@ -35,6 +39,7 @@ import org.apache.fory.format.row.binary.writer.BinaryRowWriter; import org.apache.fory.format.type.CustomTypeEncoderRegistry; import org.apache.fory.format.type.CustomTypeRegistration; +import org.apache.fory.format.type.Schema; import org.apache.fory.format.type.TypeInference; import org.apache.fory.logging.Logger; import org.apache.fory.logging.LoggerFactory; @@ -98,7 +103,7 @@ public static RowEncoder bean(Class beanClass, BinaryRowWriter writer) *

  • java.math.BigDecimal, java.math.BigInteger *
  • time related: java.sql.Date, java.sql.Timestamp, java.time.LocalDate, java.time.Instant *
  • Optional and friends: OptionalInt, OptionalLong, OptionalDouble - *
  • collection types: only array and java.util.List currently, map support is in progress + *
  • collection types: array, java.util.List, and java.util.Map *
  • record types *
  • nested java bean * @@ -230,8 +235,7 @@ public static , K, V> MapEncoder mapEncoder(TypeRef to *

    supported types for java bean field: - primitive types: boolean, int, double, etc. - boxed * types: Boolean, Integer, Double, etc. - String - java.math.BigDecimal, java.math.BigInteger - * time related: java.sql.Date, java.sql.Timestamp, java.time.LocalDate, java.time.Instant - - * collection types: only array and java.util.List currently, map support is in progress - nested - * java bean. + * collection types: array, java.util.List, and java.util.Map - nested java bean. */ public static , K, V> MapEncoder mapEncoder( TypeRef mapToken, TypeRef keyToken, TypeRef valToken, Fory fory) { @@ -334,6 +338,31 @@ static Class loadOrGenRowCodecClass(Class beanClass, Encoding codecFactory return loadCls(compileUnits); } + /** + * Compile and load a projection codec class for one historical version of {@code beanClass}. The + * current-version codec class is loaded separately by {@link #loadOrGenRowCodecClass}; this is + * used by schema-evolution code paths to materialize a decoder for each older version. The {@code + * nestedSuffixes} map directs codegen to the projection codec class to embed for each nested + * versioned bean type. + */ + static Class loadOrGenProjectionRowCodecClass( + Class beanClass, + Encoding codecFactory, + Schema historicalSchema, + Set liveNames, + String classSuffix, + Map, String> nestedSuffixes) { + final RowEncoderBuilder codecBuilder = + codecFactory.newProjectionRowEncoder( + TypeRef.of(beanClass), historicalSchema, liveNames, classSuffix, nestedSuffixes); + CompileUnit compileUnit = + new CompileUnit( + CodeGenerator.getPackage(beanClass), + codecBuilder.codecClassName(beanClass) + classSuffix, + codecBuilder::genCode); + return loadCls(compileUnit); + } + static Class loadOrGenArrayCodecClass( TypeRef> arrayCls, TypeRef elementType, Encoding codecFactory) { LOG.info("Create ArrayCodec for classes {}", elementType); @@ -351,6 +380,24 @@ static Class loadOrGenArrayCodecClass( return loadCls(compileUnit); } + static Class loadOrGenProjectionArrayCodecClass( + TypeRef> arrayCls, + TypeRef elementType, + Encoding codecFactory, + String classSuffix, + Map, String> nestedSuffixes) { + Class cls = getRawType(elementType); + String prefix = TypeInference.inferTypeName(arrayCls); + ArrayEncoderBuilder codecBuilder = + codecFactory.newProjectionArrayEncoder(arrayCls, elementType, classSuffix, nestedSuffixes); + CompileUnit compileUnit = + new CompileUnit( + CodeGenerator.getPackage(cls), + codecBuilder.codecClassName(cls, prefix) + classSuffix, + codecBuilder::genCode); + return loadCls(compileUnit); + } + static Class loadOrGenMapCodecClass( TypeRef> mapCls, TypeRef keyToken, @@ -384,6 +431,32 @@ static Class loadOrGenMapCodecClass( return loadCls(compileUnit); } + static Class loadOrGenProjectionMapCodecClass( + TypeRef> mapCls, + TypeRef beanToken, + Encoding codecFactory, + String valCodecSuffix, + String keyCodecSuffix, + Map, String> valNestedSuffixes, + Map, String> keyNestedSuffixes) { + Class cls = getRawType(beanToken); + String prefix = TypeInference.inferTypeName(mapCls); + MapEncoderBuilder codecBuilder = + codecFactory.newProjectionMapEncoder( + mapCls, + beanToken, + valCodecSuffix, + keyCodecSuffix, + valNestedSuffixes, + keyNestedSuffixes); + CompileUnit compileUnit = + new CompileUnit( + CodeGenerator.getPackage(cls), + codecBuilder.codecClassName(cls, prefix) + codecBuilder.mapClassSuffix(), + codecBuilder::genCode); + return loadCls(compileUnit); + } + private static Class loadCls(CompileUnit... compileUnit) { CodeGenerator codeGenerator = CodeGenerator.getSharedCodeGenerator(Thread.currentThread().getContextClassLoader()); @@ -395,4 +468,21 @@ private static Class loadCls(CompileUnit... compileUnit) { throw new IllegalStateException("Impossible because we just compiled class", e); } } + + /** + * Build a {@link MethodHandle} bound to {@code generatedClass}'s {@code (Object[])} constructor, + * adapted so it returns {@code generatedType}. All generated row/array/map codec classes share + * this constructor shape; this helper centralises the reflection and exception wrapping. + */ + static MethodHandle constructorHandleFor(Class generatedClass, Class generatedType) { + try { + Constructor constructor = + generatedClass.asSubclass(generatedType).getConstructor(Object[].class); + return MethodHandles.lookup() + .unreflectConstructor(constructor) + .asType(MethodType.methodType(generatedType, Object[].class)); + } catch (NoSuchMethodException | IllegalAccessException e) { + throw new EncoderException("Failed to resolve constructor for " + generatedClass, e); + } + } } diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoding.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoding.java index c28b4d3b19..863f4f70b6 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoding.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoding.java @@ -21,6 +21,7 @@ import java.util.Collection; import java.util.Map; +import java.util.Set; import org.apache.fory.format.row.binary.BinaryArray; import org.apache.fory.format.row.binary.BinaryMap; import org.apache.fory.format.row.binary.writer.BaseBinaryRowWriter; @@ -41,11 +42,63 @@ interface Encoding { RowEncoderBuilder newRowEncoder(TypeRef beanType); + /** + * Construct a projection codec builder for an older version of {@code beanType}, reading the + * supplied historical schema and producing instances of the current bean class. The {@code + * nestedSuffixes} map directs codegen to embed a specific projection codec class for each + * nested-bean type (used when a nested versioned bean was on the wire at an older version). An + * empty map means all nested beans use their current-version codecs. + */ + RowEncoderBuilder newProjectionRowEncoder( + TypeRef beanType, + Schema historicalSchema, + Set liveNames, + String classSuffix, + Map, String> nestedSuffixes); + ArrayEncoderBuilder newArrayEncoder( TypeRef> collectionType, TypeRef elementType); + /** + * Construct an array encoder builder for one historical combination of the element's nested + * versioned beans. {@code classSuffix} names the generated array codec class (encoding the whole + * combination); {@code nestedSuffixes} routes each nested bean class to its own projection row + * codec, so an element wrapping more than one distinct versioned bean (such as {@code Map}) embeds the right historical codec for each. + */ + ArrayEncoderBuilder newProjectionArrayEncoder( + TypeRef> collectionType, + TypeRef elementType, + String classSuffix, + Map, String> nestedSuffixes); + MapEncoderBuilder newMapEncoder(TypeRef> mapType, TypeRef beanToken); + /** + * Construct a map encoder builder whose generated code references the value and key bean row + * codec classes for one historical (key-version, value-version) combination. {@code + * valCodecSuffix} and {@code keyCodecSuffix} name the generated map codec class per position; + * {@code valNestedSuffixes} and {@code keyNestedSuffixes} route each nested bean class within a + * position to its own projection row codec, so a position wrapping more than one distinct + * versioned bean (such as a value typed {@code Map}) embeds the right historical + * codec for each. An empty suffix or absent class means that position, or that bean, is read at + * its current schema. + */ + MapEncoderBuilder newProjectionMapEncoder( + TypeRef> mapType, + TypeRef beanToken, + String valCodecSuffix, + String keyCodecSuffix, + Map, String> valNestedSuffixes, + Map, String> keyNestedSuffixes); + + /** + * Build a {@link RowFactory} for {@code schema}, precomputing any schema-derived layout once. + * Used by the schema-evolution decode path to allocate rows for a historical schema without + * re-deriving the layout on every decode. + */ + RowFactory newRowFactory(Schema schema); + BinaryArray newArray(Field field); BinaryMap newMap(Field field); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapCodecBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapCodecBuilder.java index 44ad87e6de..5422a5328e 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapCodecBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapCodecBuilder.java @@ -20,14 +20,17 @@ package org.apache.fory.format.encoder; import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.MethodType; +import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.function.BiFunction; import java.util.function.Supplier; +import org.apache.fory.Fory; +import org.apache.fory.collection.LongMap; import org.apache.fory.format.row.binary.writer.BinaryArrayWriter; import org.apache.fory.format.type.DataTypes; import org.apache.fory.format.type.Field; +import org.apache.fory.format.type.SchemaHistory; import org.apache.fory.format.type.TypeInference; import org.apache.fory.reflect.TypeRef; import org.apache.fory.type.TypeUtils; @@ -35,6 +38,14 @@ public class MapCodecBuilder> extends BaseCodecBuilder> { + // Strict hash for a map position that carries no versioned bean. Such a position has a single + // fixed wire layout, so a constant identity is correct: it is the same on writer and reader and + // simply leaves the combined hash determined by the position that does evolve. 0L is also a + // legitimate FNV result for a real schema, so this sentinel's safety does not rest on 0L being + // unreachable; it rests on the build-time collision guards in buildVersioned, which reject any + // combined hash that duplicates another combination or the current schema. + private static final long NON_BEAN_POSITION_HASH = 0L; + private final TypeRef mapType; private final Field field; private final Field keyField; @@ -55,23 +66,246 @@ public class MapCodecBuilder> extends BaseCodecBuilder> build() { loadMapInnerCodecs(); - final var mapEncoderFactory = generatedMapEncoder(); + final Class valClass = schemaEvolution ? evolutionBean() : null; + final Class keyClass = schemaEvolution ? keyEvolutionBean() : null; + if (valClass == null && keyClass == null) { + final var mapEncoderFactory = generatedMapEncoder(); + return new Supplier>() { + @Override + public MapEncoder get() { + final BinaryArrayWriter keyWriter = codecFormat.newArrayWriter(keyField); + final BinaryArrayWriter valWriter = + codecFormat.newArrayWriter(valField, keyWriter.getBuffer()); + final var codec = mapEncoderFactory.apply(keyWriter, valWriter); + return new BufferResettingMapEncoder<>( + initialBufferSize, + keyWriter, + valWriter, + new BinaryMapEncoder( + codecFormat, field, valWriter, keyWriter, codec, sizeEmbedded)); + } + }; + } + return buildVersioned(valClass, keyClass); + } + + /** + * Whether the value position takes the evolution path, and a representative bean for naming. A + * directly-typed bean (versioned or not) takes the path so the strict-hash prefix is always + * present and an evolution-on consumer can detect a flag-mismatched producer cleanly; a bean + * nested inside a list/map/array value is found by descending the wrapper. Null when the value + * carries no bean. The per-version enumeration over every reachable bean in the value position is + * done by {@link #buildElementSchemaHistory}. + */ + private Class evolutionBean() { + return SchemaHistory.evolutionBean(valType, typeCtx()); + } + + /** + * Bean this map's key evolves on, reachable through the key type, mirroring {@link + * #evolutionBean()} for the value. Null when the key carries no bean. A versioned key is read at + * the matching historical layout selected by the map header's combined hash, so an evolving key + * no longer corrupts silently. + */ + private Class keyEvolutionBean() { + return SchemaHistory.evolutionBean(keyType, typeCtx()); + } + + /** + * Versions for one map position: the history's entries, or a single null when it does not evolve. + */ + private static List positionVersions(SchemaHistory history) { + return history == null ? Collections.singletonList(null) : history.versions(); + } + + /** Strict hash contributed by one position: its schema's hash, or the non-bean constant. */ + private static long positionHash(SchemaHistory.VersionedSchema vs) { + return vs == null ? NON_BEAN_POSITION_HASH : vs.strictHash(); + } + + private Supplier> buildVersioned(final Class valClass, final Class keyClass) { + // The map's own key and value are independent positions on the wire (each its own array), and + // the map header's combined hash identifies the (key-layout, value-layout) pair. Each position + // is enumerated over every versioned bean reachable through its wrappers, so a position that + // itself wraps more than one bean (such as a value typed Map) evolves all of + // them. + // A position with no bean contributes a single current-only layout, so the cross-product + // degenerates to the evolving position's versions (or to just the current layout when neither + // evolves, i.e. a non-versioned bean that still needs the hash prefix for flag-mismatch + // detection). + SchemaHistory valHistory = + valClass == null ? null : buildElementSchemaHistory(field.name(), valType); + SchemaHistory keyHistory = + keyClass == null ? null : buildElementSchemaHistory(field.name(), keyType); + SchemaHistory.VersionedSchema valCurrent = valHistory == null ? null : valHistory.current(); + SchemaHistory.VersionedSchema keyCurrent = keyHistory == null ? null : keyHistory.current(); + + // Index one deferred projection source per (key-version, value-version) combination, keyed by + // the combined hash. The current/current combination is the hot path and is handled by the + // unsuffixed current codec, so it is skipped here. A null history means that position does not + // evolve and contributes only its current entry. Building the index compiles nothing: a + // combination's codec classes are generated the first time its hash is decoded. The collision + // guards below still run eagerly over the full cross-product, so a hash clash fails fast at + // build rather than surfacing on an unlucky decode. + // + // The full cross-product is required even when key and value are the same bean family: a writer + // can pin them to different versions via distinct type arguments (Map), + // so off-diagonal pairs are reachable. See evolveMapSameBeanKeyAndValueCrossCombos. + List valVersions = positionVersions(valHistory); + List keyVersions = positionVersions(keyHistory); + LongMap projectionSources = new LongMap<>(); + for (SchemaHistory.VersionedSchema valVs : valVersions) { + for (SchemaHistory.VersionedSchema keyVs : keyVersions) { + if (valVs == valCurrent && keyVs == keyCurrent) { + continue; + } + // The map header carries a single hash, so combine the key and value strict hashes into + // one 64-bit map-layout hash that identifies the (key-version, value-version) combination + // jointly. The collision check below proves these combined hashes are unique at build time. + long hash = SchemaHistory.combineHashes(positionHash(keyVs), positionHash(valVs)); + if (projectionSources.containsKey(hash)) { + throw new IllegalStateException( + "Combined (key, value) schema-hash collision for map " + + mapType + + ": two distinct version combinations produced the same map-layout hash. " + + "Please file an issue with the key and value bean definitions."); + } + projectionSources.put( + hash, new ProjectionSource(valClass, keyClass, valVs, keyVs, valCurrent, keyCurrent)); + } + } + final var currentFactory = generatedMapEncoder(); + long currentHash = + SchemaHistory.combineHashes(positionHash(keyCurrent), positionHash(valCurrent)); + // The decode hot path matches currentHash before consulting the projection map, so a projection + // colliding with it would be shadowed and never dispatched to. Prove that cannot happen. + if (projectionSources.containsKey(currentHash)) { + throw new IllegalStateException( + "Combined (key, value) schema-hash collision for map " + + mapType + + ": a historical version combination produced the same map-layout hash as the " + + "current schema. Please file an issue with the key and value bean definitions."); + } return new Supplier>() { @Override public MapEncoder get() { - final BinaryArrayWriter keyWriter = codecFormat.newArrayWriter(keyField); - final BinaryArrayWriter valWriter = - codecFormat.newArrayWriter(valField, keyWriter.getBuffer()); - final var codec = mapEncoderFactory.apply(keyWriter, valWriter); + BinaryArrayWriter keyWriter = codecFormat.newArrayWriter(keyField); + BinaryArrayWriter valWriter = codecFormat.newArrayWriter(valField, keyWriter.getBuffer()); + var codec = currentFactory.apply(keyWriter, valWriter); return new BufferResettingMapEncoder<>( initialBufferSize, keyWriter, valWriter, - new BinaryMapEncoder(codecFormat, field, valWriter, keyWriter, codec, sizeEmbedded)); + new BinaryMapEncoder( + codecFormat, + field, + valWriter, + keyWriter, + codec, + sizeEmbedded, + currentHash, + projectionSources, + fory)); } }; } + /** + * Generate the projection row codec(s) and projection map codec for one (key-version, + * value-version) combination, and rebuild the map field with each position projected onto its + * historical schema. A position at its current schema gets an empty suffix and keeps its current + * field, so a map where only one side evolves pays no codegen for the unchanged side. + */ + /** + * The map position field projected onto {@code positionVs}: the substituted type from the + * position's historical schema, but the current position field's name and nullability (a map key + * is non-nullable, a value nullable), which forElement's inferred field does not carry. + */ + private static Field projectedPositionField( + Field currentField, SchemaHistory.VersionedSchema positionVs) { + Field projected = DataTypes.fieldOfSchema(positionVs.schema(), 0); + return DataTypes.field(currentField.name(), projected.type(), currentField.nullable()); + } + + /** + * Deferred projection codec for one (key-version, value-version) combination. Holds only the + * chosen versions; the per-position row codecs and the map codec class are generated on the first + * {@link #compile} call (the first decode of this combination's combined hash), not at build + * time. The build-time collision guards already proved this combination's hash is unique. + */ + private final class ProjectionSource implements BinaryMapEncoder.ProjectionSource { + private final Class valClass; + private final Class keyClass; + private final SchemaHistory.VersionedSchema valVs; + private final SchemaHistory.VersionedSchema keyVs; + private final SchemaHistory.VersionedSchema valCurrent; + private final SchemaHistory.VersionedSchema keyCurrent; + + ProjectionSource( + Class valClass, + Class keyClass, + SchemaHistory.VersionedSchema valVs, + SchemaHistory.VersionedSchema keyVs, + SchemaHistory.VersionedSchema valCurrent, + SchemaHistory.VersionedSchema keyCurrent) { + this.valClass = valClass; + this.keyClass = keyClass; + this.valVs = valVs; + this.keyVs = keyVs; + this.valCurrent = valCurrent; + this.keyCurrent = keyCurrent; + } + + @Override + public BinaryMapEncoder.ProjectionMapCodec compile(Encoding format, Fory fory) { + // Each position's history is forElement over the position type, so its schema's single field + // is the position field with every reachable bean projected onto this combination, and + // nestedSuffixesFor routes each bean class in the position to its own historical row codec. + // The projected field carries only the substituted type; the position's own nullability is + // taken from the current map field (map keys are non-nullable, values nullable). + Field currentVal = DataTypes.itemFieldForMap(field); + Field histVal = currentVal; + String valSuffix = ""; + Map, String> valNested = null; + if (valVs != null && valVs != valCurrent) { + valSuffix = ProjectionRouting.projectionSuffix(valVs); + valNested = ProjectionRouting.nestedSuffixesFor(valVs, codecFormat); + histVal = projectedPositionField(currentVal, valVs); + } + Field currentKey = DataTypes.keyFieldForMap(field); + Field histKey = currentKey; + String keySuffix = ""; + Map, String> keyNested = null; + if (keyVs != null && keyVs != keyCurrent) { + keySuffix = ProjectionRouting.projectionSuffix(keyVs); + keyNested = ProjectionRouting.nestedSuffixesFor(keyVs, codecFormat); + histKey = projectedPositionField(currentKey, keyVs); + } + Class mapClass = + Encoders.loadOrGenProjectionMapCodecClass( + mapType, + TypeRef.of(valClass != null ? valClass : keyClass), + codecFormat, + valSuffix, + keySuffix, + valNested, + keyNested); + MethodHandle ctor = Encoders.constructorHandleFor(mapClass, GeneratedMapEncoder.class); + Field histMapField = DataTypes.mapField(field.name(), histKey, histVal); + try { + Field histKeyField = DataTypes.keyArrayFieldForMap(histMapField); + Field histValField = DataTypes.itemArrayFieldForMap(histMapField); + BinaryArrayWriter projKey = format.newArrayWriter(histKeyField); + BinaryArrayWriter projVal = format.newArrayWriter(histValField, projKey.getBuffer()); + Object[] references = {histKeyField, histValField, projKey, projVal, fory, histMapField}; + GeneratedMapEncoder codec = (GeneratedMapEncoder) ctor.invokeExact(references); + return new BinaryMapEncoder.ProjectionMapCodec(format, histMapField, codec); + } catch (Throwable e) { + throw ExceptionUtils.throwException(e); + } + } + } + private void loadMapInnerCodecs() { Encoders.loadMapCodecs(keyType, codecFormat); Encoders.loadMapCodecs(valType, codecFormat); @@ -81,17 +315,8 @@ BiFunction generatedM final Class arrayCodecClass = Encoders.loadOrGenMapCodecClass(mapType, keyType, valType, codecFormat); - final MethodHandle constructorHandle; - try { - final var constructor = - arrayCodecClass.asSubclass(GeneratedMapEncoder.class).getConstructor(Object[].class); - constructorHandle = - MethodHandles.lookup() - .unreflectConstructor(constructor) - .asType(MethodType.methodType(GeneratedMapEncoder.class, Object[].class)); - } catch (final NoSuchMethodException | IllegalAccessException e) { - throw new EncoderException("Failed to construct array codec for " + mapType, e); - } + final MethodHandle constructorHandle = + Encoders.constructorHandleFor(arrayCodecClass, GeneratedMapEncoder.class); return new BiFunction() { @Override public GeneratedMapEncoder apply( @@ -99,7 +324,7 @@ public GeneratedMapEncoder apply( final Object[] references = {keyField, valField, keyWriter, valWriter, fory, field}; try { return (GeneratedMapEncoder) constructorHandle.invokeExact(references); - } catch (final Throwable t) { + } catch (Throwable t) { throw ExceptionUtils.throwException(t); } } diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapEncoderBuilder.java index 18abad605e..05966354ea 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapEncoderBuilder.java @@ -24,10 +24,13 @@ import static org.apache.fory.type.TypeUtils.getRawType; import java.util.Map; +import java.util.function.Supplier; import org.apache.fory.Fory; +import org.apache.fory.codegen.Code; import org.apache.fory.codegen.CodeGenerator; import org.apache.fory.codegen.CodegenContext; import org.apache.fory.codegen.Expression; +import org.apache.fory.codegen.Expression.AbstractExpression; import org.apache.fory.codegen.ExpressionUtils; import org.apache.fory.format.row.binary.BinaryArray; import org.apache.fory.format.row.binary.BinaryMap; @@ -53,15 +56,59 @@ public class MapEncoderBuilder extends BaseBinaryEncoderBuilder { private final TypeRef mapToken; + // True while the key-array subtree generates. The key and value positions evolve independently: + // the map header's combined hash selects a (key version, value version) combination, so a key + // bean must route to its own historical projection codec (keyCodecSuffix / keyNestedSuffixes) + // rather than borrow the value position's. The flag steers both nestedBeanSuffix resolution and + // beanCodecKey registration to the key position. Nested bean codecs register lazily inside + // genCode, so the flag toggles during the key subtree's genCode via KeyPositionScope rather than + // at expression construction. A single boolean suffices because genCode is depth-first: the key + // subtree fully generates within its KeyPositionScope before the value subtree begins, so the two + // positions never interleave. + private boolean inKeyPosition; + + // Projection suffix for the key bean's row codec, parallel to rowCodecSuffixForBeans (the value + // suffix). Null/empty means the key is read at its current schema. A non-empty value routes the + // key bean to a historical projection row codec, letting a map key evolve across versions; the + // map header's combined hash selects this key+value combination. + private final String keyCodecSuffix; + + // Per-class projection suffixes for the value and key positions. Used when a position itself + // wraps + // more than one distinct versioned bean class (such as a value typed Map): each + // class in that position routes to its own historical row codec. Null falls back to the single + // position suffix (rowCodecSuffixForBeans for the value, keyCodecSuffix for the key). + private final Map, String> valNestedSuffixes; + private final Map, String> keyNestedSuffixes; + public MapEncoderBuilder(Class mapCls, Class keyClass) { this(TypeRef.of(mapCls), TypeRef.of(keyClass)); } public MapEncoderBuilder(TypeRef clsType, TypeRef beanType) { + this(clsType, beanType, null, null); + } + + MapEncoderBuilder( + TypeRef clsType, TypeRef beanType, String valCodecSuffix, String keyCodecSuffix) { + this(clsType, beanType, valCodecSuffix, keyCodecSuffix, null, null); + } + + MapEncoderBuilder( + TypeRef clsType, + TypeRef beanType, + String valCodecSuffix, + String keyCodecSuffix, + Map, String> valNestedSuffixes, + Map, String> keyNestedSuffixes) { // A top-level map has no enclosing bean, so scope key/value-codec resolution to Object to match // TypeInference's empty-path enclosing type; beanType still names the key/value bean for class // naming and schema generation. super(new CodegenContext(), beanType, Object.class); + this.rowCodecSuffixForBeans = valCodecSuffix; + this.keyCodecSuffix = keyCodecSuffix; + this.valNestedSuffixes = valNestedSuffixes; + this.keyNestedSuffixes = keyNestedSuffixes; mapToken = clsType; ctx.reserveName(ROOT_KEY_WRITER_NAME); ctx.reserveName(ROOT_VALUE_WRITER_NAME); @@ -75,7 +122,8 @@ public MapEncoderBuilder(TypeRef clsType, TypeRef beanType) { @Override public String genCode() { ctx.setPackage(CodeGenerator.getPackage(beanClass)); - String className = codecClassName(beanClass, TypeInference.inferTypeName(mapToken)); + String className = + codecClassName(beanClass, TypeInference.inferTypeName(mapToken)) + mapClassSuffix(); ctx.setClassName(className); // don't addImport(arrayClass), because user class may name collide. // janino don't support generics, so GeneratedCodec has no generics @@ -140,8 +188,8 @@ public String genCode() { long startTime = System.nanoTime(); String code = ctx.genCode(); - long durationMs = (System.nanoTime() - startTime) / 1000_000; - LOG.info("Generate map codec for class {} take {} us", beanClass, durationMs); + long durationUs = (System.nanoTime() - startTime) / 1000; + LOG.info("Generate map codec for class {} take {} us", beanClass, durationUs); return code; } @@ -181,7 +229,9 @@ public Expression buildEncodeExpression() { expressions.add( new Expression.Invoke(keyArrayWriter, "writeDirectly", Expression.Literal.ofInt(-1))); Expression keySerializationExpr = - serializeForArrayByWriter(keySet, keyArrayWriter, keySetType, null, keyFieldExpr); + keyScoped( + () -> + serializeForArrayByWriter(keySet, keyArrayWriter, keySetType, null, keyFieldExpr)); Expression.Invoke keyArray = new Expression.Invoke(keyArrayWriter, "toArray", TypeRef.of(BinaryArray.class)); expressions.add(map); @@ -242,9 +292,9 @@ private Expression directlyDeserializeMap( Expression keyJavaArray; Expression valueJavaArray; if (TypeUtils.ITERABLE_TYPE.isSupertypeOf(keysType)) { - keyJavaArray = deserializeForCollection(keyArrayRef, keysType); + keyJavaArray = keyScoped(() -> deserializeForCollection(keyArrayRef, keysType)); } else { - keyJavaArray = deserializeForArray(keyArrayRef, keysType); + keyJavaArray = keyScoped(() -> deserializeForArray(keyArrayRef, keysType)); } if (TypeUtils.ITERABLE_TYPE.isSupertypeOf(valuesType)) { valueJavaArray = deserializeForCollection(valArrayRef, valuesType); @@ -261,4 +311,124 @@ private Expression directlyDeserializeMap( ExpressionUtils.notNull(key), new Expression.Invoke(map, "put", key, value))); return new Expression.ListExpression(map, put); } + + /** + * Class-name suffix for the generated map codec. The codec class is cached by name, so distinct + * (value-combination, key-combination) pairs must map to distinct suffixes or the second pair + * would reuse the first pair's codec class. {@link ProjectionRouting#projectionSuffix} is already + * injective over a single position's combinations, so each side's string determines its own + * combination. The key side is prefixed with a {@code _K} marker before its own {@code _V...} + * suffix, which keeps the two halves from interleaving: a {@code _K} only ever introduces the key + * suffix, never a value token, so "value v2, key current" ({@code _V2}) and "value current, key + * v2" ({@code _K_V2}) stay distinct rather than both reducing to {@code _V2}. The {@code _K} is a + * namespace prefix, not a parse boundary; the name is never split, only compared for equality, so + * a value-side nested-bean token that happens to contain {@code _K_V} (a bean named {@code K_V}) + * does not cause a collision. Must match the name {@link + * Encoders#loadOrGenProjectionMapCodecClass} computes for the same builder. + */ + String mapClassSuffix() { + String val = rowCodecSuffixForBeans == null ? "" : rowCodecSuffixForBeans; + String key = keyCodecSuffix == null || keyCodecSuffix.isEmpty() ? "" : "_K" + keyCodecSuffix; + return val + key; + } + + /** + * Route a bean to its projection row codec by suffix, per position. When a position wraps several + * distinct versioned bean classes (such as a value typed {@code Map}), its + * per-class suffix map routes each class to its own historical codec; otherwise the single + * position suffix applies to every bean in that position. The key position uses {@link + * #keyCodecSuffix} / {@link #keyNestedSuffixes} and the value position the value suffix / {@link + * #valNestedSuffixes}, so a map whose key and value evolve independently embeds the right + * historical codec for each. An empty suffix means the bean is read at its current schema. + */ + @Override + protected String nestedBeanSuffix(TypeRef typeRef) { + Map, String> nested = inKeyPosition ? keyNestedSuffixes : valNestedSuffixes; + if (nested != null) { + // A class absent from the position's map is read at its current schema, not this position's + // own combination suffix. + String suffix = nested.get(getRawType(typeRef)); + return suffix == null ? "" : suffix; + } + if (inKeyPosition) { + return keyCodecSuffix == null ? "" : keyCodecSuffix; + } + return super.nestedBeanSuffix(typeRef); + } + + /** + * Register the key bean's codec under a distinct key so it does not collide with a same-class + * value bean that projects to a historical schema. Both would otherwise share one {@code + * beanEncoderMap} entry and the first-registered (suffixed) codec would wrongly decode the key. + */ + @Override + protected Object beanCodecKey(TypeRef typeRef) { + return inKeyPosition ? new KeyCodecKey(typeRef) : typeRef; + } + + /** Distinguishes a key-position bean codec registration from the value-position one. */ + private static final class KeyCodecKey { + private final TypeRef typeRef; + + KeyCodecKey(TypeRef typeRef) { + this.typeRef = typeRef; + } + + @Override + public boolean equals(Object o) { + return o instanceof KeyCodecKey && typeRef.equals(((KeyCodecKey) o).typeRef); + } + + @Override + public int hashCode() { + return typeRef.hashCode() * 31 + 1; + } + } + + /** + * Build a key-array subtree with {@link #inKeyPosition} set. Nested bean codecs register both at + * expression construction (the encode {@code ForEach} builds its body eagerly) and during genCode + * (the decode lazy-array body), so the scope has to cover both: the flag is set around the build + * here, and {@link KeyPositionScope} re-sets it around the subtree's genCode. + */ + private Expression keyScoped(Supplier build) { + boolean prev = inKeyPosition; + inKeyPosition = true; + try { + return new KeyPositionScope(build.get()); + } finally { + inKeyPosition = prev; + } + } + + /** Re-sets {@link #inKeyPosition} around the key subtree's genCode; see {@link #keyScoped}. */ + private final class KeyPositionScope extends AbstractExpression { + private final Expression key; + + KeyPositionScope(Expression key) { + super(key); + this.key = key; + } + + @Override + public TypeRef type() { + return key.type(); + } + + @Override + public boolean nullable() { + return key.nullable(); + } + + @Override + public Code.ExprCode doGenCode(CodegenContext ctx) { + boolean prev = inKeyPosition; + inKeyPosition = true; + try { + return key.genCode(ctx); + } finally { + inKeyPosition = prev; + } + } + } } diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/ProjectionRouting.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/ProjectionRouting.java new file mode 100644 index 0000000000..2344dacd87 --- /dev/null +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/ProjectionRouting.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.format.encoder; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.fory.format.type.SchemaHistory; + +/** + * Suffix routing shared by row/array/map projection codec generation. Each cross-product entry gets + * a class-name suffix that uniquely identifies its full nested combination, and the per-nested-bean + * suffix map directs codegen to embed the right inner projection class for each nested-bean type at + * this combination's versions. + */ +final class ProjectionRouting { + private ProjectionRouting() {} + + /** + * Build a class-name suffix that uniquely identifies {@code vs} across the whole cross-product, + * at any nesting depth. The suffix encodes the outer version and, for each nested bean, that + * inner's simple name, version, and its strict hash. + * + *

    The cross-product for one outer schema enumerates a fixed set of nested classes, so every + * combination carries the same classes in the same sorted-by-name positions; two combinations + * differ only in some inner's chosen (version, hash). Sorting by full class name therefore binds + * each token position to one class, which keeps the suffix injective even when two nested beans + * share a simple name and an identical (structural) strict hash. The simple name is for human + * readability, not disambiguation. Do not switch this to an unordered per-class join: positional + * stability is what prevents same-simple-name beans from collapsing onto one class name. + */ + static String projectionSuffix(SchemaHistory.VersionedSchema vs) { + StringBuilder sb = new StringBuilder("_V").append(vs.version()); + if (!vs.nestedBeanSchemas().isEmpty()) { + List, SchemaHistory.VersionedSchema>> entries = + new ArrayList<>(vs.nestedBeanSchemas().entrySet()); + entries.sort((a, b) -> a.getKey().getName().compareTo(b.getKey().getName())); + for (Map.Entry, SchemaHistory.VersionedSchema> e : entries) { + SchemaHistory.VersionedSchema inner = e.getValue(); + sb.append("_") + .append(e.getKey().getSimpleName()) + .append(inner.version()) + .append("h") + .append(Long.toHexString(inner.strictHash())); + } + } + return sb.toString(); + } + + /** + * Per-nested-bean-type suffix map for codegen, recursively materializing every inner projection + * class implied by {@code vs}. Empty string means the inner bean uses its current-version codec + * class. The chosen inner entry is taken directly from {@code vs}, so this resolves the correct + * combination to arbitrary depth without re-deriving it from a version number. + * + *

    Called when an outer combination is first compiled (its hash first decoded), so the inner + * classes are generated lazily alongside it rather than at builder time. + */ + static Map, String> nestedSuffixesFor( + SchemaHistory.VersionedSchema vs, Encoding codecFormat) { + Map, String> out = new HashMap<>(); + for (Map.Entry, SchemaHistory.VersionedSchema> e : vs.nestedBeanSchemas().entrySet()) { + Class innerClass = e.getKey(); + SchemaHistory.VersionedSchema innerVs = e.getValue(); + if (innerVs.isCurrent()) { + out.put(innerClass, ""); + } else { + String innerSuffix = projectionSuffix(innerVs); + out.put(innerClass, innerSuffix); + // Generate the inner's projection class so the outer's `new InnerCodec` resolves at + // class load. Recurses through the inner's own nested combination. + Encoders.loadOrGenProjectionRowCodecClass( + innerClass, + codecFormat, + innerVs.schema(), + innerVs.liveFieldNames(), + innerSuffix, + nestedSuffixesFor(innerVs, codecFormat)); + } + } + return out; + } +} diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowCodecBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowCodecBuilder.java index eeb624d706..ca00842790 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowCodecBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowCodecBuilder.java @@ -20,11 +20,14 @@ package org.apache.fory.format.encoder; import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.MethodType; +import java.util.Map; import java.util.function.Function; import java.util.function.Supplier; +import org.apache.fory.Fory; +import org.apache.fory.collection.LongMap; import org.apache.fory.format.row.binary.writer.BaseBinaryRowWriter; +import org.apache.fory.format.type.Schema; +import org.apache.fory.format.type.SchemaHistory; import org.apache.fory.format.type.TypeInference; import org.apache.fory.util.ExceptionUtils; @@ -45,51 +48,163 @@ public class RowCodecBuilder extends BaseCodecBuilder> { * virtual thread. */ public Supplier> build() { - final Function> rowEncoderFactory = buildForWriter(); + final RowEncoderFactory factory = buildEncoderFactory(); return new Supplier>() { @Override public RowEncoder get() { - final BaseBinaryRowWriter writer = codecFormat.newWriter(schema); - return new BufferResettingRowEncoder( - initialBufferSize, writer, rowEncoderFactory.apply(writer)); + final BaseBinaryRowWriter writer = codecFormat.newWriter(factory.schema); + return new BufferResettingRowEncoder(initialBufferSize, writer, factory.apply(writer)); } }; } Function> buildForWriter() { + return buildEncoderFactory(); + } + + /** + * Resolve the schema and the per-writer encoder factory together. The evolution path rotates the + * schema to the history-derived current version; returning it alongside the factory keeps that + * resolution out of the mutable builder state, so a reused builder or a direct {@link + * #buildForWriter()} caller is unaffected. + */ + private RowEncoderFactory buildEncoderFactory() { + return schemaEvolution ? evolvingBuildForWriter() : defaultBuildForWriter(); + } + + private RowEncoderFactory defaultBuildForWriter() { + final Schema currentSchema = schema; final Function rowEncoderFactory = - rowEncoderFactory(); - return new Function>() { + rowEncoderFactory(currentSchema); + return new RowEncoderFactory(currentSchema) { @Override public RowEncoder apply(final BaseBinaryRowWriter writer) { return new BinaryRowEncoder( - schema, rowEncoderFactory.apply(writer), writer, sizeEmbedded); + currentSchema, rowEncoderFactory.apply(writer), writer, sizeEmbedded); } }; } - Function rowEncoderFactory() { - final Class rowCodecClass = Encoders.loadOrGenRowCodecClass(beanClass, codecFormat); - MethodHandle constructorHandle; - try { - final var constructor = - rowCodecClass.asSubclass(GeneratedRowEncoder.class).getConstructor(Object[].class); - constructorHandle = - MethodHandles.lookup() - .unreflectConstructor(constructor) - .asType(MethodType.methodType(GeneratedRowEncoder.class, Object[].class)); - } catch (final NoSuchMethodException | IllegalAccessException e) { - throw new EncoderException("Failed to construct codec for " + beanClass, e); + private RowEncoderFactory evolvingBuildForWriter() { + SchemaHistory history = buildSchemaHistory(beanClass); + SchemaHistory.VersionedSchema currentVersion = history.current(); + // The history-derived schema is what writers, generated codec, and decode dispatch must agree + // on. It travels back to build() through the returned factory rather than the mutable schema + // field, so building does not rotate builder state that a later build()/buildForWriter() reads. + final Schema currentSchema = currentVersion.schema(); + + final Function currentFactory = + rowEncoderFactory(currentSchema); + // Index of hash → deferred projection source for each non-current combination of + // (outer-version, inner-versions). Building it compiles nothing: a combination's codec class is + // generated only the first time a payload with that hash is decoded. The suffix encodes the + // combination so distinct cross-product entries get distinct generated classes; the nested-bean + // version map directs the projection codec to embed the right inner projection class. + // + // Keyed by the raw strict hash straight from SchemaHistory, which already proves these hashes + // are unique across versions() and distinct from the current schema (its hashToSignature guard + // throws on a real collision). No builder-side collision check is needed here, unlike the map + // codec, whose key is a combined (key, value) hash computed outside SchemaHistory. + final LongMap projectionSources = new LongMap<>(); + for (SchemaHistory.VersionedSchema vs : history.versions()) { + if (vs == currentVersion) { + continue; + } + projectionSources.put(vs.strictHash(), new ProjectionSource(beanClass, codecFormat, vs)); } + + final long currentHash = currentVersion.strictHash(); + return new RowEncoderFactory(currentSchema) { + @Override + public RowEncoder apply(final BaseBinaryRowWriter writer) { + return new BinaryRowEncoder( + currentSchema, + currentFactory.apply(writer), + writer, + sizeEmbedded, + currentHash, + projectionSources.size == 0 ? null : projectionSources, + fory); + } + }; + } + + /** + * A per-writer encoder factory that also carries the schema the writer must be created with. The + * schema travels with the factory instead of through the mutable builder, so {@link #build()} can + * create the writer without reading builder state that the evolution path would otherwise rotate. + */ + abstract static class RowEncoderFactory + implements Function> { + final Schema schema; + + RowEncoderFactory(final Schema schema) { + this.schema = schema; + } + } + + /** + * Deferred projection codec for one historical version. Holds only the inputs needed to generate + * the codec; the class is compiled on the first {@link #compile} call (the first decode of this + * version's hash), not at build time. Shared across encoder instances and immutable, so the + * compile relies on the shared code generator's own memoization rather than local locking. + */ + private static final class ProjectionSource implements BinaryRowEncoder.ProjectionSource { + private final Class beanClass; + private final Encoding codecFormat; + private final SchemaHistory.VersionedSchema version; + + ProjectionSource( + Class beanClass, Encoding codecFormat, SchemaHistory.VersionedSchema version) { + this.beanClass = beanClass; + this.codecFormat = codecFormat; + this.version = version; + } + + @Override + public BinaryRowEncoder.ProjectionCodec compile(BaseBinaryRowWriter writer, Fory fory) { + Schema historicalSchema = version.schema(); + String suffix = ProjectionRouting.projectionSuffix(version); + Map, String> nestedSuffixes = + ProjectionRouting.nestedSuffixesFor(version, codecFormat); + Class projectionClass = + Encoders.loadOrGenProjectionRowCodecClass( + beanClass, + codecFormat, + historicalSchema, + version.liveFieldNames(), + suffix, + nestedSuffixes); + MethodHandle ctor = Encoders.constructorHandleFor(projectionClass, GeneratedRowEncoder.class); + // The RowFactory depends only on the historical schema and codec format, so build it here + // alongside the codec the first time this version is decoded. + RowFactory rowFactory = codecFormat.newRowFactory(historicalSchema); + try { + Object[] references = {historicalSchema, writer, fory}; + GeneratedRowEncoder codec = (GeneratedRowEncoder) ctor.invokeExact(references); + return new BinaryRowEncoder.ProjectionCodec(rowFactory, codec); + } catch (final ReflectiveOperationException e) { + throw new EncoderException( + "Failed to construct projection codec for schema " + historicalSchema, e); + } catch (Throwable e) { + throw ExceptionUtils.throwException(e); + } + } + } + + Function rowEncoderFactory(final Schema codecSchema) { + final Class rowCodecClass = Encoders.loadOrGenRowCodecClass(beanClass, codecFormat); + final MethodHandle constructorHandle = + Encoders.constructorHandleFor(rowCodecClass, GeneratedRowEncoder.class); return new Function() { @Override public GeneratedRowEncoder apply(final BaseBinaryRowWriter writer) { try { - final Object[] references = {schema, writer, fory}; + final Object[] references = {codecSchema, writer, fory}; return (GeneratedRowEncoder) constructorHandle.invokeExact(references); } catch (final ReflectiveOperationException e) { throw new EncoderException("Failed to construct codec for " + beanClass, e); - } catch (final Throwable e) { + } catch (Throwable e) { throw ExceptionUtils.throwException(e); } } diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java index fe1fab9196..a3bfa83afe 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java @@ -31,6 +31,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.SortedMap; import org.apache.fory.Fory; import org.apache.fory.builder.CodecBuilder; @@ -59,6 +60,7 @@ import org.apache.fory.type.TypeUtils; import org.apache.fory.util.Preconditions; import org.apache.fory.util.StringUtils; +import org.apache.fory.util.record.RecordComponent; import org.apache.fory.util.record.RecordUtils; /** Expression builder for building jit row encoder class. */ @@ -77,15 +79,50 @@ class RowEncoderBuilder extends BaseBinaryEncoderBuilder { private final CodegenContext generatedBeanImpl; private final String generatedBeanImplName; + /** + * When non-null, this builder produces a decode-only projection codec: schema fields whose name + * is in {@code projectionLiveNames} are assigned to the bean as usual; others are decoded for + * offset arithmetic only and discarded. {@code toRow} on a projection codec throws. + */ + private final Set projectionLiveNames; + + private final String projectionClassSuffix; + public RowEncoderBuilder(Class beanClass) { this(TypeRef.of(beanClass)); } public RowEncoderBuilder(TypeRef beanType) { + this(beanType, null, null, null, null); + } + + /** + * Construct a decode-only projection builder for an older version of {@code beanType}. The + * supplied {@code historicalSchema} is used as the layout to decode; only fields whose name is in + * {@code liveNames} are written into the resulting bean. {@code classSuffix} distinguishes this + * codec from the current-version codec and from other historical projections. {@code + * nestedSuffixes} routes each nested-bean type to a specific projection codec class (used when an + * inner versioned bean was on the wire at an older version). + */ + RowEncoderBuilder( + TypeRef beanType, + Schema historicalSchema, + Set liveNames, + String classSuffix, + Map, String> nestedSuffixes) { super(new CodegenContext(), beanType); Preconditions.checkArgument(beanClass.isInterface() || TypeUtils.isBean(beanType, typeCtx)); - className = codecClassName(beanClass); - this.schema = inferSchema(beanType); + this.projectionLiveNames = liveNames; + this.projectionClassSuffix = classSuffix; + // Per-class nested-bean routing lives in the inherited nestedClassSuffixes; the base + // nestedBeanSuffix reads it and returns "" for a class absent from a non-null map, which is the + // routing this builder needs. A null arg means no projection (current schema for every bean). + this.nestedClassSuffixes = nestedSuffixes; + className = + projectionClassSuffix == null + ? codecClassName(beanClass) + : codecClassName(beanClass) + projectionClassSuffix; + this.schema = historicalSchema != null ? historicalSchema : inferSchema(beanType); this.descriptorsMap = Descriptor.getDescriptorsMap(beanClass); ctx.reserveName(ROOT_ROW_WRITER_NAME); ctx.reserveName(SCHEMA_NAME); @@ -105,7 +142,13 @@ public RowEncoderBuilder(TypeRef beanType) { ctx.addImports(Row.class, ArrayData.class, MapData.class); ctx.addImports(BinaryRow.class, BinaryArray.class, BinaryMap.class); if (beanClass.isInterface()) { - generatedBeanImplName = beanClass.getSimpleName() + "GeneratedImpl"; + // Append the projection suffix so each historical version of an interface bean gets its + // own impl class; the impl classes are inner classes of the codec and would collide on + // the simple name otherwise. + generatedBeanImplName = + beanClass.getSimpleName() + + "GeneratedImpl" + + (projectionClassSuffix == null ? "" : projectionClassSuffix); generatedBeanImpl = buildImplClass(); } else { generatedBeanImplName = null; @@ -158,6 +201,9 @@ public String genCode() { ctx.addField(rowWriterType, ROOT_ROW_WRITER_NAME); ctx.addField(ctx.type(Fory.class), FORY_NAME); + // Order matters for projection codecs: the encode pass registers nested-bean encoder fields + // on ctx as a side effect (see buildEncodeExpression), and the decode pass reads them. Building + // decode first would fail with "No bean codec registered". Keep encode genCode before decode. Expression encodeExpr = buildEncodeExpression(); String encodeCode = encodeExpr.genCode(ctx).code(); Expression decodeExpr = buildDecodeExpression(); @@ -177,8 +223,8 @@ public String genCode() { + generatedBeanImpl.genCode() + code.substring(insertPoint); } - long durationMs = (System.nanoTime() - startTime) / 1000; - LOG.info("Generate codec for class {} take {} us", beanClass, durationMs); + long durationUs = (System.nanoTime() - startTime) / 1000; + LOG.info("Generate codec for class {} take {} us", beanClass, durationUs); return code; } @@ -203,8 +249,13 @@ public Expression buildEncodeExpression() { // schema field's name must correspond to descriptor's name. for (int i = 0; i < numFields; i++) { Field field = schema.field(i); + if (projectionLiveNames != null && !projectionLiveNames.contains(field.name())) { + // Removed wire field — no Java accessor to encode from, and a projection codec is + // never dispatched on write anyway. + continue; + } Descriptor d = getDescriptorByFieldName(field.name()); - Preconditions.checkNotNull(d); + Preconditions.checkNotNull(d, "missing descriptor for schema field " + field.name()); TypeRef fieldType = d.getTypeRef(); Expression fieldValue = getFieldValue(bean, d); Literal ordinal = Literal.ofInt(i); @@ -215,6 +266,12 @@ public Expression buildEncodeExpression() { serializeFor(ordinal, fieldValue, writer, fieldType, field, foryField, new HashSet<>()); expressions.add(fieldExpr); } + if (projectionLiveNames != null) { + // Decode-only: never run the writer logic. The expressions above were generated only for + // their side effects on the codegen context (registering nested-bean encoder fields). + return new Expression.Block( + "throw new UnsupportedOperationException(\"projection codec is decode-only\");\n"); + } expressions.add( new Expression.Return( new Expression.Invoke(writer, "getRow", TypeRef.of(BinaryRow.class)))); @@ -237,19 +294,24 @@ public Expression buildDecodeExpression() { bean = new Expression.Reference("new " + generatedBeanImplName + "(row)"); } else { int numFields = schema.numFields(); - List fieldNames = new ArrayList<>(numFields); - Expression[] values = new Expression[numFields]; - Descriptor[] descriptors = new Descriptor[numFields]; - // schema field's name must correspond to descriptor's name. + // Build, in schema order, the per-slot bean-side info for live fields only; removed slots + // stay in the row layout but have no Java target. Offsets are keyed on slot index, so + // skipping them is safe. + List liveDescriptors = new ArrayList<>(); + List liveValues = new ArrayList<>(); for (int i = 0; i < numFields; i++) { Literal ordinal = Literal.ofInt(i); - Descriptor d = getDescriptorByFieldName(schema.field(i).name()); - fieldNames.add(d.getName()); - descriptors[i] = d; + String wireName = schema.field(i).name(); + if (projectionLiveNames != null && !projectionLiveNames.contains(wireName)) { + continue; + } + Descriptor d = getDescriptorByFieldName(wireName); + Preconditions.checkNotNull(d, "missing descriptor for wire field " + wireName); TypeRef fieldType = d.getTypeRef(); Expression.Variable value = new Expression.Variable("value_" + d.getName(), nullValue(fieldType)); - values[i] = value; + liveDescriptors.add(d); + liveValues.add(value); expressions.add(value); Expression.Invoke isNullAt = new Expression.Invoke( @@ -267,17 +329,12 @@ public Expression buildDecodeExpression() { expressions.add(decode); } if (RecordUtils.isRecord(beanClass)) { - int[] map = RecordUtils.buildRecordComponentMapping(beanClass, fieldNames); - Expression[] args = new Expression[numFields]; - for (int i = 0; i < numFields; i++) { - args[i] = values[map[i]]; - } - bean = new Expression.NewInstance(beanType, beanType.getRawType().getName(), args); + bean = buildRecordInstance(liveDescriptors, liveValues); } else { bean = newBean(); expressions.add(bean); - for (int i = 0; i < values.length; i++) { - expressions.add(setFieldValue(bean, descriptors[i], values[i])); + for (int i = 0; i < liveDescriptors.size(); i++) { + expressions.add(setFieldValue(bean, liveDescriptors.get(i), liveValues.get(i))); } } } @@ -290,6 +347,30 @@ public Expression buildDecodeExpression() { return expressions; } + /** + * Build a record instance, supplying defaults for components not contributed by the wire. The + * non-projection path always supplies every component; the projection path may supply a subset. + */ + private Expression buildRecordInstance( + List liveDescriptors, List liveValues) { + Map byName = new HashMap<>(liveDescriptors.size() * 2); + for (int i = 0; i < liveDescriptors.size(); i++) { + byName.put(liveDescriptors.get(i).getName(), liveValues.get(i)); + } + RecordComponent[] components = RecordUtils.getRecordComponents(beanClass); + Expression[] args = new Expression[components.length]; + for (int i = 0; i < components.length; i++) { + String compName = components[i].getName(); + Expression value = byName.get(compName); + if (value == null) { + TypeRef compType = TypeRef.of(components[i].getGenericType()); + value = nullValue(compType); + } + args[i] = value; + } + return new Expression.NewInstance(beanType, beanType.getRawType().getName(), args); + } + private static Expression nullValue(TypeRef fieldType) { Class rawType = fieldType.getRawType(); if (TypeUtils.isOptionalType(rawType)) { @@ -303,7 +384,11 @@ private void addDecoderMethods() { int numFields = schema.numFields(); for (int i = 0; i < numFields; i++) { Literal ordinal = Literal.ofInt(i); - Descriptor d = getDescriptorByFieldName(schema.field(i).name()); + String wireName = schema.field(i).name(); + if (projectionLiveNames != null && !projectionLiveNames.contains(wireName)) { + continue; + } + Descriptor d = getDescriptorByFieldName(wireName); TypeRef fieldType = d.getTypeRef(); Class rawFieldType = fieldType.getRawType(); // Resolve a codec on the raw field type before any Optional unwrap; keep in lockstep with the @@ -358,7 +443,12 @@ private CodegenContext buildImplClass() { int numFields = schema.numFields(); for (int i = 0; i < numFields; i++) { Literal ordinal = Literal.ofInt(i); - Descriptor d = getDescriptorByFieldName(schema.field(i).name()); + String wireName = schema.field(i).name(); + if (projectionLiveNames != null && !projectionLiveNames.contains(wireName)) { + // Removed wire field — no Java member to back this slot. + continue; + } + Descriptor d = getDescriptorByFieldName(wireName); TypeRef fieldType = d.getTypeRef(); Class rawFieldType = fieldType.getRawType(); @@ -410,6 +500,7 @@ private CodegenContext buildImplClass() { // Note: adding constructor captures init code, so must happen after all fields are collected implClass.addConstructor("this.row = row;", BinaryRow.class, "row"); + final boolean projecting = projectionLiveNames != null; methodsNeedingImpl.forEach( (methodName, signatures) -> signatures.forEach( @@ -422,16 +513,58 @@ private CodegenContext buildImplClass() { params[i * 2] = methodType.parameterType(i); params[i * 2 + 1] = "unused" + i; } - implClass.addMethod( - methodName, - "throw new UnsupportedOperationException();", - methodType.returnType(), - params); + String body; + if (projecting && isAccessorOfAbsentField(methodName, methodType)) { + body = + "return " + + defaultValueExpression(methodType.returnType(), implClass) + + ";"; + } else { + body = "throw new UnsupportedOperationException();"; + } + implClass.addMethod(methodName, body, methodType.returnType(), params); })); return implClass; } + /** + * True when {@code methodName(returnType)} on the current bean class names a property whose field + * is not in the historical schema this projection codec is generating. Such a method gets a + * default-value body instead of {@code throw} so the interface proxy can serve callers that don't + * know the field is missing in this version. + */ + private boolean isAccessorOfAbsentField(String methodName, MethodType methodType) { + // An accessor takes no arguments; the live-member pass above only removes the no-arg signature. + // A parameterized method sharing a name and return type with a descriptor is not that field's + // accessor, so it must still throw rather than be silenced into a default value. + if (methodType.parameterCount() != 0) { + return false; + } + // Look up by raw method name, not the wire-name conversion: this path runs only for interface + // beans (see buildImplClass), whose descriptors are keyed by the method names themselves. + Descriptor d = descriptorsMap.get(methodName); + if (d == null) { + return false; + } + // Match the raw return type, the same identity the live-field pass uses to remove an accessor + // from methodsNeedingImpl above. A method whose return type differs is a different overload, + // not this field's accessor, and must still throw. + if (d.getTypeRef().getRawType() != methodType.returnType()) { + return false; + } + // Name and return type match a descriptor, but the live-member loop did not emit it: the field + // is absent in this version. + return true; + } + + private static String defaultValueExpression(Class returnType, CodegenContext ctx) { + if (TypeUtils.isOptionalType(returnType)) { + return ctx.type(returnType) + ".empty()"; + } + return TypeUtils.defaultValue(returnType); + } + private Descriptor getDescriptorByFieldName(String fieldName) { String name = StringUtils.lowerUnderscoreToLowerCamelCase(fieldName); return descriptorsMap.get(name); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowFactory.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowFactory.java new file mode 100644 index 0000000000..4e8e94f0cc --- /dev/null +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowFactory.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.format.encoder; + +import org.apache.fory.format.row.binary.BinaryRow; + +/** + * Allocates fresh {@link BinaryRow} instances for a fixed schema. Obtained once per schema from + * {@link Encoding#newRowFactory}. The compact format captures its schema-derived layout (offsets, + * widths, nullability) in the factory so every {@link #newRow} call reuses it; the default format + * builds a {@link BinaryRow} directly per call, matching {@code BinaryRowWriter#newRow}. Either way + * the schema-evolution decode path holds one factory per historical schema, giving it the same + * per-decode cost as the current-schema path that reads through the writer's cached layout. + */ +@FunctionalInterface +interface RowFactory { + BinaryRow newRow(); +} diff --git a/java/fory-format/src/main/java/org/apache/fory/format/type/SchemaHistory.java b/java/fory-format/src/main/java/org/apache/fory/format/type/SchemaHistory.java new file mode 100644 index 0000000000..9b6419c416 --- /dev/null +++ b/java/fory-format/src/main/java/org/apache/fory/format/type/SchemaHistory.java @@ -0,0 +1,936 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.format.type; + +import java.lang.reflect.AnnotatedElement; +import java.lang.reflect.Type; +import java.lang.reflect.TypeVariable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.TreeSet; +import java.util.function.UnaryOperator; +import org.apache.fory.annotation.Internal; +import org.apache.fory.collection.Tuple2; +import org.apache.fory.format.annotation.ForySchema; +import org.apache.fory.format.annotation.ForyVersion; +import org.apache.fory.format.encoder.CustomCodec; +import org.apache.fory.reflect.TypeRef; +import org.apache.fory.type.Descriptor; +import org.apache.fory.type.TypeResolutionContext; +import org.apache.fory.type.TypeUtils; +import org.apache.fory.util.StringUtils; + +/** + * Resolves the version history of a row-codec bean. Each entry exposes the schema as it appeared at + * a particular version, along with a strict hash that uniquely identifies the historical layout. + * Only used when {@code withSchemaEvolution()} is configured on the codec builder. + * + *

    The hash mixes field names and nullability in addition to types, so that two schemas that + * differ only in field order or naming are distinguishable. This is intentionally a different hash + * from {@link DataTypes#computeSchemaHash} and is used only by versioning code paths. + */ +@Internal +public final class SchemaHistory { + + /** Implicit version of a live field that carries no {@link ForyVersion}. */ + private static final int FIRST_VERSION = 1; + + /** FNV-1a 64-bit offset basis, the seed for every strict-hash mix in this class. */ + private static final long FNV_OFFSET_BASIS = 1469598103934665603L; + + /** One entry in a {@link SchemaHistory}. */ + public static final class VersionedSchema { + private final int version; + private final Schema schema; + private final long strictHash; + private final boolean current; + private final Set liveFieldNames; + private final Map, VersionedSchema> nestedBeanSchemas; + + VersionedSchema( + int version, + Schema schema, + long strictHash, + boolean current, + Set liveFieldNames, + Map, VersionedSchema> nestedBeanSchemas) { + this.version = version; + this.schema = schema; + this.strictHash = strictHash; + this.current = current; + this.liveFieldNames = liveFieldNames; + this.nestedBeanSchemas = nestedBeanSchemas; + } + + public int version() { + return version; + } + + public Schema schema() { + return schema; + } + + public long strictHash() { + return strictHash; + } + + /** + * True when this entry is its bean's current (writer-side) schema. Routing uses this to decide + * whether a nested-bean slot embeds the current-version codec class (no suffix) or a historical + * projection class. + */ + public boolean isCurrent() { + return current; + } + + /** + * Names of fields in this version that still have a Java member on the current bean class. + * Other fields are read-and-discarded during projection. + */ + public Set liveFieldNames() { + return liveFieldNames; + } + + /** + * For each nested versioned bean class referenced by this schema, the exact inner entry chosen + * for this combination. Empty when the schema has no nested versioned beans. Each value carries + * its own {@code strictHash} and {@code nestedBeanSchemas}, so routing can identify and recurse + * into the inner subtree to arbitrary depth without re-deriving it from a version number. + * + *

    Keyed by class, not by field. A writer writes one definition of a given bean class, so + * every field of that class in a single payload is at the same version; the enumeration carries + * one entry per class, and a class may back more than one field. + */ + public Map, VersionedSchema> nestedBeanSchemas() { + return nestedBeanSchemas; + } + } + + private final List versions; + private final VersionedSchema current; + + private SchemaHistory(List versions, VersionedSchema current) { + this.versions = versions; + this.current = current; + } + + /** + * The writer-side schema (latest version, all nested beans current). This is the same {@link + * VersionedSchema} instance that also appears in {@link #versions()}, so callers building + * per-version projection codecs may skip it with a reference identity check ({@code vs == + * current()}) rather than comparing hashes. + */ + public VersionedSchema current() { + return current; + } + + /** + * All distinct historical schemas, in build order: outer version ascending, and within an outer + * version, one entry per nested cross-product combination. This is not a strict sort by {@link + * VersionedSchema#version()} — collapsing field-set duplicates can place a later combination in + * an earlier slot. Dispatch keys on the strict hash, so callers must not rely on positional + * order. + */ + public List versions() { + return versions; + } + + /** + * Build a history from the bean's annotations. The schema for each version is transformed by + * {@code schemaTransform} after filtering; pass an identity for standard format, or {@code + * CompactBinaryRowWriter::sortSchema} for compact format. + */ + public static SchemaHistory build(Class beanClass, UnaryOperator schemaTransform) { + return build(beanClass, schemaTransform, new HashMap<>()); + } + + /** + * Build a history, reusing {@code built} for any bean class already enumerated in this build, so + * a bean reached through several field paths (a diamond in the type graph, or two fields of the + * same type) is enumerated once rather than once per path. The memo is per top-level build, not + * static: the transform (standard vs compact) and registry state must not leak across builds. + */ + private static SchemaHistory build( + Class beanClass, + UnaryOperator schemaTransform, + Map, SchemaHistory> built) { + SchemaHistory memoized = built.get(beanClass); + if (memoized != null) { + return memoized; + } + ForySchema schemaAnn = beanClass.getAnnotation(ForySchema.class); + Class removedFieldsClass = schemaAnn == null ? void.class : schemaAnn.removedFields(); + + List all = collectLiveFields(beanClass); + if (removedFieldsClass != void.class) { + all.addAll(collectRemovedFields(removedFieldsClass)); + } + + // Recursively expand any nested versioned bean field's own history. A versioned bean can be the + // field type directly, the element of a list, or the key or value of a map; we locate it at any + // of those sites so the outer's enumeration can cross-product over the inner's versions. A map + // contributes a key site and a value site independently. The inner schema substitutes back into + // the same site at materialization time. + // + // This recursion needs no cycle guard. TypeInference.inferField calls ctx.checkNoCycle on every + // bean it descends into, and RowCodecBuilder runs inferSchema in its constructor before build() + // reaches here, so a self-referential bean is already rejected. Recursion depth is bounded by + // the acyclic nesting of distinct versioned bean types. + for (FieldEntry fe : all) { + collectNestedSites(beanClass, fe.typeRef, schemaTransform, built, fe.nestedSites); + } + + validateNoNameCollision(all); + SchemaHistory history = enumerate(beanClass.getName(), all, schemaTransform); + built.put(beanClass, history); + return history; + } + + /** + * History of a top-level array element or map entry field, which has no version annotations of + * its own but may reach versioned beans through its list/map/array wrappers (a directly-typed + * bean, a list element, or a map key or value, to any depth). The single element field is + * enumerated over the cross-product of every reachable bean's versions, exactly as a bean field + * is on the row path, so the element schema's strict hash identifies all nested layouts jointly + * and an older payload restores every nested bean — on either map side — at its historical + * layout. + */ + public static SchemaHistory forElement( + String fieldName, TypeRef elementType, UnaryOperator schemaTransform) { + FieldEntry element = + new FieldEntry(fieldName, fieldName, elementType, FIRST_VERSION, Integer.MAX_VALUE, true); + // A top-level element has no enclosing bean; codec lookups behave as inferField's do at the + // top level, where only globally-registered (enclosing-agnostic) codecs can match. + collectNestedSites( + void.class, elementType, schemaTransform, new HashMap<>(), element.nestedSites); + List all = new ArrayList<>(1); + all.add(element); + return enumerate("element " + elementType, all, schemaTransform); + } + + /** + * Enumerate every distinct historical schema for {@code all} over its version boundaries and the + * cross-product of nested bean versions, returning a {@link SchemaHistory}. {@code label} names + * the owner for collision diagnostics. + */ + private static SchemaHistory enumerate( + String label, List all, UnaryOperator schemaTransform) { + // Materialize a schema at every version V where the field set changes — both "since" and + // "until" boundaries qualify, because either adds or removes a field from the active set. + // FIRST_VERSION is always materialized even when every field declares since >= 2: a payload + // written before those fields existed carries the v1 layout, so that schema must be decodable. + TreeSet schemaVersions = new TreeSet<>(); + schemaVersions.add(FIRST_VERSION); + for (FieldEntry fe : all) { + schemaVersions.add(fe.since); + if (fe.until != Integer.MAX_VALUE) { + schemaVersions.add(fe.until); + } + } + + // Sort by Java member name so the per-version schema matches the order + // TypeInference.inferSchema produces (which iterates Descriptor.getDescriptors, alphabetical + // by Java member name). Removed fields synthesize a Java name from their wire name. + all.sort((a, b) -> a.javaName.compareTo(b.javaName)); + // A field with finite [since, until) can leave two boundaries with identical field sets + // (e.g. v1 and v4 both lack a field that lived in [v2, v4)). Collapse boundaries that + // produce the same schema into one VersionedSchema, since they round-trip identically. + // A real strict-hash collision — two distinct schemas producing the same hash — is caught by + // comparing schemas on insertion. Schema.equals compares fields by name, DataType, and + // nullability, the same identity the strict hash mixes, so it is the canonical dedup key. + int latestVersion = schemaVersions.last(); + Map bySchema = new LinkedHashMap<>(); + Map hashToSchema = new HashMap<>(); + Schema currentSchema = null; + for (int v : schemaVersions) { + List activeEntries = new ArrayList<>(); + for (FieldEntry fe : all) { + if (fe.since <= v && v < fe.until) { + activeEntries.add(fe); + } + } + // Cross-product over each nested versioned bean *class*, not each field. A writer always + // writes one definition of a given bean class, so every field of that class in a single + // payload is at the same version; the off-diagonal combinations (the same class at two + // versions in one record) are unreachable on the wire. Enumerating one dimension per class + // keeps the count a product over distinct nested classes rather than over fields, and lets + // a class appear in more than one field. If no entries have nested histories, this yields a + // single combination. + // + // The class count generated downstream is the product of the per-class version counts. If + // that growth becomes a concern, drop entries from each bean's History interface once you + // no longer need to read payloads from that range — that removes the corresponding + // VersionedSchema from this enumeration. Retiring history entries is purely a read-side + // concern; the writer always uses the current schema. + LinkedHashMap, List> innerChoices = new LinkedHashMap<>(); + for (FieldEntry fe : activeEntries) { + for (NestedSite site : fe.nestedSites) { + innerChoices.putIfAbsent(site.beanClass, site.history.versions()); + } + } + for (Map, VersionedSchema> combination : cartesian(innerChoices)) { + List fields = new ArrayList<>(activeEntries.size()); + Set liveNames = new HashSet<>(); + for (FieldEntry fe : activeEntries) { + Field current = TypeInference.inferNamedField(fe.name, fe.typeRef); + // Substitute each nested versioned site (a direct field, list element, map value, or map + // key) with the version this combination chose for its bean class, keeping every + // collection/map wrapper intact. A map can have both a key site and a value site, which + // are substituted independently. + Field field = current; + for (NestedSite site : fe.nestedSites) { + VersionedSchema innerVs = combination.get(site.beanClass); + field = + site.substitute( + field, fe.typeRef, new DataTypes.StructType(innerVs.schema().fields())); + } + fields.add(field); + if (fe.live) { + liveNames.add(fe.name); + } + } + Schema schema = schemaTransform.apply(new Schema(fields)); + long hash = computeStrictSchemaHash(schema); + Schema previousSchema = hashToSchema.putIfAbsent(hash, schema); + if (previousSchema != null && !previousSchema.equals(schema)) { + throw new IllegalStateException( + "Strict hash collision for " + + label + + " at version " + + v + + ": two distinct historical schemas hashed to the same value. Please file an " + + "issue with the bean definition."); + } + // This combination represents the writer-side configuration at outer version v only when + // every chosen inner is itself that inner's current schema. The bean's own current schema + // is the writer-side configuration at the latest version. + boolean innerAllCurrent = true; + for (VersionedSchema inner : combination.values()) { + if (!inner.isCurrent()) { + innerAllCurrent = false; + break; + } + } + boolean isCurrent = v == latestVersion && innerAllCurrent; + VersionedSchema vs = + new VersionedSchema( + v, + schema, + hash, + isCurrent, + Collections.unmodifiableSet(liveNames), + Collections.unmodifiableMap(new HashMap<>(combination))); + // Prefer the all-current combination on collapse so the stored VS's nestedBeanSchemas + // map reflects the writer-side state at this outer version. This guards a contract on + // current().nestedBeanSchemas() in case two combinations ever canonicalize to the same + // schema; today's inner-by-schema collapse means inner.versions() has no wire-equal + // duplicates, but the guard preserves the invariant for future callers. + if (innerAllCurrent) { + bySchema.put(schema, vs); + } else { + bySchema.putIfAbsent(schema, vs); + } + if (isCurrent) { + currentSchema = schema; + } + } + } + // The all-current combination at the latest version is always one of the cartesian entries, + // so currentSchema is always set and present here. + VersionedSchema current = bySchema.get(currentSchema); + if (current == null) { + throw new IllegalStateException("No current schema resolved for " + label); + } + return new SchemaHistory( + Collections.unmodifiableList(new ArrayList<>(bySchema.values())), current); + } + + /** Cartesian product over (nested bean class, list-of-inner-VersionedSchema). */ + private static List, VersionedSchema>> cartesian( + LinkedHashMap, List> choices) { + List, VersionedSchema>> out = new ArrayList<>(); + out.add(new HashMap<>()); + for (Map.Entry, List> choice : choices.entrySet()) { + Class cls = choice.getKey(); + List options = choice.getValue(); + List, VersionedSchema>> next = new ArrayList<>(out.size() * options.size()); + for (Map, VersionedSchema> prefix : out) { + for (VersionedSchema opt : options) { + Map, VersionedSchema> extended = new HashMap<>(prefix); + extended.put(cls, opt); + next.add(extended); + } + } + out = next; + } + return out; + } + + /** + * Whether a top-level array/map codec must take the evolution path for {@code elementType}, and a + * representative reachable bean for naming the generated codec. Descends list/map/array wrappers + * (a map on both its key and value sides) and returns the first bean at a leaf. The bean need not + * be versioned: an unversioned bean must still take the evolution path so the strict-hash prefix + * is always present and the producer and consumer stay wire-compatible. Returns null when no bean + * is reachable and the codec needs no projection. The actual per-version enumeration over every + * reachable bean is done by {@link #forElement}; this only decides whether to evolve and which + * package/name to give the generated codec. The two walks descend the same wrappers but answer + * different questions ("is any bean reachable, versioned or not" here, "which beans have more + * than one version" in {@link #collectNestedSites}) at different phases, so they are deliberately + * kept separate rather than merged into one traversal. + */ + public static Class evolutionBean(TypeRef elementType, TypeResolutionContext typeCtx) { + Wrapper w = Wrapper.classify(void.class, elementType); + switch (w.kind) { + case ENCODED: + case OPTIONAL: + case SEQUENCE: + return evolutionBean(w.child, typeCtx); + case MAP: + // Return one representative bean (value side preferred) only to name the generated codec. + // collectNestedSites enumerates both the key and value sites, so the codec's actual shape + // covers both; this asymmetry is safe precisely because the result is naming-only. Do not + // derive codec structure from it. + Class value = evolutionBean(w.value, typeCtx); + return value != null ? value : evolutionBean(w.key, typeCtx); + case LEAF: + return w.raw != null && TypeUtils.isBean(TypeRef.of(w.raw), typeCtx) ? w.raw : null; + default: + throw new AssertionError("Unhandled wrapper kind: " + w.kind); + } + } + + /** A branch taken at a map node while descending toward a nested bean: its key or its value. */ + private enum MapBranch { + KEY, + VALUE + } + + /** + * The wrapper kinds the row format unwraps before reaching a leaf, in {@code inferField} order. + */ + private enum WrapperKind { + /** A custom codec whose {@code encodedType()} the field is re-inferred as. No path entry. */ + ENCODED, + /** A {@link Optional} unwrapped to its element. No path entry. */ + OPTIONAL, + /** An array or any {@code Iterable}, descended through its single element. No path entry. */ + SEQUENCE, + /** A {@code Map}, descended independently through its key (KEY branch) and value (VALUE). */ + MAP, + /** A non-wrapper type: a bean or a scalar. Terminates the descent. */ + LEAF + } + + /** + * One step of the wrapper-descent grammar shared by {@link #evolutionBean}, {@link + * #collectNestedSites}, and {@link NestedSite#substitute}. It classifies a type into the next + * wrapper kind and resolves the child type(s) to recurse into, so the three walks agree on which + * types are transparent and how their children are derived. This grammar mirrors {@link + * TypeInference#inferField}; a new wrapper type must be added here and in {@code inferField} + * together, or nested versioned beans go undiscovered. + */ + private static final class Wrapper { + final WrapperKind kind; + final Class raw; + final TypeRef child; // ENCODED/OPTIONAL/SEQUENCE + final TypeRef key; // MAP + final TypeRef value; // MAP + + private Wrapper( + WrapperKind kind, Class raw, TypeRef child, TypeRef key, TypeRef value) { + this.kind = kind; + this.raw = raw; + this.child = child; + this.key = key; + this.value = value; + } + + static Wrapper classify(Class enclosing, TypeRef typeRef) { + // Resolve a bare type variable to its bound first, exactly as inferField does, so the + // wrapper checks below see the same parameterized type inferField sees. Without this a field + // typed as a type variable bounded to Optional/Iterable/Map would resolve its raw type to the + // bound but carry no type arguments, and the wrapper branches would read element types off an + // empty argument list. + Type type = typeRef.getType(); + if (type instanceof TypeVariable) { + return classify(enclosing, TypeRef.of(((TypeVariable) type).getBounds()[0])); + } + Class raw = TypeUtils.getRawType(typeRef); + if (raw == null) { + return new Wrapper(WrapperKind.LEAF, null, null, null, null); + } + TypeRef encoded = encodedTypeOf(enclosing, raw, typeRef); + if (encoded != null) { + return new Wrapper(WrapperKind.ENCODED, raw, encoded, null, null); + } + if (raw == Optional.class) { + return new Wrapper( + WrapperKind.OPTIONAL, raw, TypeUtils.getTypeArguments(typeRef).get(0), null, null); + } + if (raw.isArray() || TypeUtils.ITERABLE_TYPE.isSupertypeOf(typeRef)) { + return new Wrapper(WrapperKind.SEQUENCE, raw, elementTypeRef(typeRef, raw), null, null); + } + if (TypeUtils.MAP_TYPE.isSupertypeOf(typeRef)) { + Tuple2, TypeRef> kv = TypeUtils.getMapKeyValueType(typeRef); + return new Wrapper(WrapperKind.MAP, raw, null, kv.f0, kv.f1); + } + return new Wrapper(WrapperKind.LEAF, raw, null, null, null); + } + } + + /** + * A versioned bean reachable from a field, together with the branch taken at each map on the way + * down so the historical struct substitutes back into exactly that leaf. The path lists one entry + * per map crossed, in order from the field root; list/array wrappers add no entry because they + * have a single element leaf. A map field contributes a key site and a value site independently, + * each with its own path; every other field shape has at most one site. + */ + private static final class NestedSite { + final Class enclosing; + final Class beanClass; + final SchemaHistory history; + final List path; + + NestedSite( + Class enclosing, Class beanClass, SchemaHistory history, List path) { + this.enclosing = enclosing; + this.beanClass = beanClass; + this.history = history; + this.path = path; + } + + /** + * Replace this site's bean struct in {@code current} with {@code historical}, descending + * list/array wrappers and taking {@code path[depth]} at each map, while leaving every other + * leaf intact so independent sites (a map's key and value, or beans under different map sides + * at any depth) substitute without disturbing one another. + */ + Field substitute(Field current, TypeRef typeRef, DataTypes.StructType historical) { + return substitute(current, typeRef, historical, 0); + } + + private Field substitute( + Field current, TypeRef typeRef, DataTypes.StructType historical, int depth) { + // A custom-codec'd field is already inferred as its encodedType() shape in `current`, and an + // Optional unwraps straight through, so both descend without touching the field. + Wrapper w = Wrapper.classify(enclosing, typeRef); + switch (w.kind) { + case ENCODED: + case OPTIONAL: + return substitute(current, w.child, historical, depth); + case SEQUENCE: + Field element = + substitute(DataTypes.arrayElementField(current), w.child, historical, depth); + return DataTypes.arrayField(current.name(), element); + case MAP: + Field keyField = DataTypes.keyFieldForMap(current); + Field itemField = DataTypes.itemFieldForMap(current); + if (path.get(depth) == MapBranch.KEY) { + keyField = substitute(keyField, w.key, historical, depth + 1); + } else { + itemField = substitute(itemField, w.value, historical, depth + 1); + } + return DataTypes.mapField(current.name(), keyField, itemField); + case LEAF: + return DataTypes.field(current.name(), historical, current.nullable()); + default: + throw new AssertionError("Unhandled wrapper kind: " + w.kind); + } + } + } + + /** + * Collect the versioned beans reachable from a field type into {@code out}: the field type + * itself, a list/array element, a map value, and a map key, to arbitrary nesting depth. Each site + * records the map-branch path needed to substitute its historical struct back. A map contributes + * a key site and a value site independently, so an evolving key and an evolving value each become + * their own cross-product dimension. + */ + private static void collectNestedSites( + Class enclosing, + TypeRef typeRef, + UnaryOperator schemaTransform, + Map, SchemaHistory> built, + List out) { + collectNestedSites(enclosing, typeRef, new ArrayList<>(), schemaTransform, built, out); + } + + private static void collectNestedSites( + Class enclosing, + TypeRef typeRef, + List path, + UnaryOperator schemaTransform, + Map, SchemaHistory> built, + List out) { + // A custom codec replaces the whole field with its encodedType(); an Optional unwraps; arrays + // and any Iterable descend their element; all transparently, with no path entry. A map descends + // its key and value independently, each adding a path entry so its historical struct + // substitutes + // back into exactly that branch. + Wrapper w = Wrapper.classify(enclosing, typeRef); + switch (w.kind) { + case ENCODED: + case OPTIONAL: + case SEQUENCE: + collectNestedSites(enclosing, w.child, path, schemaTransform, built, out); + return; + case MAP: + collectNestedSites( + enclosing, w.key, append(path, MapBranch.KEY), schemaTransform, built, out); + collectNestedSites( + enclosing, w.value, append(path, MapBranch.VALUE), schemaTransform, built, out); + return; + case LEAF: + break; + default: + throw new AssertionError("Unhandled wrapper kind: " + w.kind); + } + if (w.raw != null && isBean(w.raw)) { + // A bean is an evolution site when it has more than one version — whether the variation comes + // from its own @ForyVersion fields or only from a nested versioned bean it reaches through + // its + // fields (e.g. a wrapper struct used as a map key, whose own fields are stable but whose + // detail field evolves). build() already expands that nested cross-product, so its version + // count is the exact evolution test; a single-version history means nothing here evolves and + // the site needs no projection. + SchemaHistory history = build(w.raw, schemaTransform, built); + if (history.versions().size() > 1) { + out.add(new NestedSite(enclosing, w.raw, history, path)); + } + } + } + + private static List append(List path, MapBranch branch) { + List next = new ArrayList<>(path.size() + 1); + next.addAll(path); + next.add(branch); + return next; + } + + /** + * Element type of a list/array field, derived the same way {@link TypeInference} does: arrays use + * the component type, iterables use the element type. + */ + private static TypeRef elementTypeRef(TypeRef typeRef, Class raw) { + return raw.isArray() ? typeRef.getComponentType() : TypeUtils.getElementType(typeRef); + } + + /** + * The type a custom codec encodes {@code raw} into, when that codec replaces the field's encoding + * with a recursively-inferred struct — the same recursion {@link TypeInference#inferField} takes + * (find the codec for the enclosing/field pair, then descend its {@code encodedType()}). Returns + * null when no codec applies, when the codec supplies its own terminal {@code foryField} (which + * is never a versioned bean), or when the encoded type is the declared type itself. The evolution + * walk follows this so a versioned bean reachable only through a codec is still enumerated. + */ + private static TypeRef encodedTypeOf(Class enclosing, Class raw, TypeRef typeRef) { + CustomCodec codec = + CustomTypeEncoderRegistry.customTypeHandler().findCodec(enclosing, raw); + if (codec == null || codec.getForyField("") != null) { + return null; + } + TypeRef encoded = codec.encodedType(); + return encoded == null || encoded.equals(typeRef) ? null : encoded; + } + + /** + * True if the row format treats {@code cls} as a bean, so it is safe to descend into for + * evolution-site discovery. Whether the bean actually evolves is decided by its version count in + * {@link #collectNestedSites}, not here. + * + *

    Only beans are introspected. TypeInference.inferField routes collection/map/array/enum field + * types away from Descriptor.getDescriptors, so a collection subclass that shadows a field name + * across its hierarchy round-trips fine even though getDescriptors would reject it. Gating on + * isBean keeps this probe consistent with inferField; getDescriptors then only throws for a class + * that genuinely cannot be a bean, which fails identically on the real encode/decode path. Use + * the same synthesize-interfaces context as inferField and the top-level array/map entry point + * (evolutionBean), so an interface bean nested as a field type, list element, or map key/value is + * discovered as a bean rather than rejected; otherwise its older versions are never enumerated + * and an older payload decodes at the interface's current layout. + */ + private static boolean isBean(Class cls) { + TypeResolutionContext typeCtx = + new TypeResolutionContext(CustomTypeEncoderRegistry.customTypeHandler(), true); + return TypeUtils.isBean(cls, typeCtx); + } + + private static List collectRemovedFields(Class historyClass) { + List descriptors = Descriptor.getDescriptors(historyClass); + List out = new ArrayList<>(descriptors.size()); + for (Descriptor d : descriptors) { + ForyVersion ann = lookupForyVersion(d); + if (ann == null) { + throw new IllegalStateException( + "Removed-field declaration " + + historyClass.getName() + + "." + + d.getName() + + " requires a @ForyVersion(until = ...) annotation"); + } + if (ann.until() == Integer.MAX_VALUE) { + throw new IllegalStateException( + "Removed-field declaration " + + historyClass.getName() + + "." + + d.getName() + + " must specify @ForyVersion.until (no upper bound makes no sense for a field " + + "that has been removed)"); + } + if (ann.since() < FIRST_VERSION) { + throw new IllegalStateException( + "Invalid @ForyVersion on " + + historyClass.getName() + + "." + + d.getName() + + ": since (" + + ann.since() + + ") must be >= " + + FIRST_VERSION + + " (the first schema version). A since below that adds a version no writer can " + + "emit."); + } + if (ann.since() >= ann.until()) { + throw new IllegalStateException( + "Invalid @ForyVersion on " + + historyClass.getName() + + "." + + d.getName() + + ": since (" + + ann.since() + + ") must be strictly less than until (" + + ann.until() + + ")"); + } + // The history method's name must mirror the live field/method name. Wire names are + // derived the same way the live path derives them: descriptor name -> lower_underscore. + // For Lombok @Data or record-style beans the descriptor name is the field name + // ("tags"); for interface beans or JavaBean-style classes it is the method name + // ("getTags"). The user writes the history method to match. + String wireName = StringUtils.lowerCamelToLowerUnderscore(d.getName()); + out.add( + new FieldEntry( + wireName, d.getName(), d.getTypeRef(), ann.since(), ann.until(), /*live*/ false)); + } + return out; + } + + private static List collectLiveFields(Class beanClass) { + List descriptors = Descriptor.getDescriptors(beanClass); + List out = new ArrayList<>(descriptors.size()); + for (Descriptor d : descriptors) { + ForyVersion ann = lookupForyVersion(d); + int since = ann == null ? FIRST_VERSION : ann.since(); + int until = ann == null ? Integer.MAX_VALUE : ann.until(); + if (since < FIRST_VERSION) { + throw new IllegalStateException( + "Invalid @ForyVersion on " + + beanClass.getName() + + "." + + d.getName() + + ": since (" + + since + + ") must be >= " + + FIRST_VERSION + + " (the first schema version). A since below that adds a version no writer can " + + "emit."); + } + // A live field still exists as a Java member, so it has no end-of-life version. A finite + // until would silently drop it from the current schema (until extends the version set, so + // latestVersion >= until excludes the field), and the writer would stop serializing a field + // the bean still has. Removals are declared on the history class via + // @ForySchema.removedFields. + if (until != Integer.MAX_VALUE) { + throw new IllegalStateException( + "Invalid @ForyVersion on " + + beanClass.getName() + + "." + + d.getName() + + ": a live field must not set until (" + + until + + "). Declare removed fields on the @ForySchema.removedFields history class instead."); + } + // No since/until ordering check here: a live field always has until == MAX_VALUE (enforced + // above), so the ordering check lives only on the removed-field path in collectRemovedFields. + String wireName = StringUtils.lowerCamelToLowerUnderscore(d.getName()); + out.add(new FieldEntry(wireName, d.getName(), d.getTypeRef(), since, until, /*live*/ true)); + } + return out; + } + + private static ForyVersion lookupForyVersion(Descriptor d) { + ForyVersion ann = readAnnotation(d.getField()); + if (ann != null) { + return ann; + } + return readAnnotation(d.getReadMethod()); + } + + private static ForyVersion readAnnotation(AnnotatedElement element) { + return element == null ? null : element.getAnnotation(ForyVersion.class); + } + + private static void validateNoNameCollision(List entries) { + // For each pair with the same name, their [since, until) windows must not overlap. + Map> byName = new HashMap<>(); + for (FieldEntry fe : entries) { + byName.computeIfAbsent(fe.name, k -> new ArrayList<>()).add(fe); + } + for (Map.Entry> e : byName.entrySet()) { + List group = e.getValue(); + if (group.size() < 2) { + continue; + } + group.sort((a, b) -> Integer.compare(a.since, b.since)); + for (int i = 1; i < group.size(); i++) { + FieldEntry prev = group.get(i - 1); + FieldEntry curr = group.get(i); + if (curr.since < prev.until) { + throw new IllegalStateException( + "Field name '" + + e.getKey() + + "' is declared with overlapping version windows [" + + prev.since + + "," + + prev.until + + ") and [" + + curr.since + + "," + + curr.until + + "); each version must have one definition per name. Adjust the @ForyVersion " + + "annotations on the live field or in the removed-fields class to make the " + + "windows disjoint."); + } + } + } + } + + /** + * Strict schema hash, used only by versioning code paths. Distinguishes schemas that differ in + * field name or nullability, unlike {@link DataTypes#computeSchemaHash}. + */ + static long computeStrictSchemaHash(Schema schema) { + long hash = FNV_OFFSET_BASIS; + Set seen = new HashSet<>(); + for (Field field : schema.fields()) { + if (!seen.add(field.name())) { + throw new IllegalStateException("Duplicate field name in schema: " + field.name()); + } + hash = hashField(hash, field); + } + return hash; + } + + private static long hashField(long hash, Field field) { + hash = mix(hash, field.name()); + DataType type = field.type(); + // The type's name() carries its identity including any inline width (e.g. + // fixedSizeBinary(N)), which is enough for every type except DecimalType, whose + // precision and scale are stored separately. Mix those in explicitly so two decimals of + // different shape don't collide. + hash = mix(hash, type.name()); + if (type instanceof DataTypes.DecimalType) { + hash = mix(hash, ((DataTypes.DecimalType) type).precision()); + hash = mix(hash, ((DataTypes.DecimalType) type).scale()); + } + hash = mix(hash, field.nullable() ? 1 : 0); + if (type instanceof DataTypes.ListType) { + hash = hashField(hash, DataTypes.arrayElementField(field)); + } else if (type instanceof DataTypes.MapType) { + hash = hashField(hash, DataTypes.keyFieldForMap(field)); + hash = hashField(hash, DataTypes.itemFieldForMap(field)); + } else if (type instanceof DataTypes.StructType) { + // Mix the child count before recursing. Unlike list and map, whose arity is fixed by the + // type kind, a struct has a variable number of children with no boundary marker between a + // nested struct's last child and the parent's next field. Without the count, {a:struct,b} + // and {a:struct} mix an identical byte sequence and collide. The count delimits the + // struct's extent so the hash stays injective over nesting structure. + List children = type.fields(); + hash = mix(hash, children.size()); + for (Field child : children) { + hash = hashField(hash, child); + } + } + return hash; + } + + /** + * Combine two strict hashes into one 64-bit value with the same FNV-1a mix used for schema + * hashing, so a map header carrying a single hash can identify a (key, value) layout combination + * jointly. Order-sensitive: {@code combineHashes(a, b) != combineHashes(b, a)}. + */ + public static long combineHashes(long first, long second) { + return mix(mix(FNV_OFFSET_BASIS, first), second); + } + + private static long mix(long hash, long value) { + hash ^= value; + hash *= 1099511628211L; // FNV prime + return hash; + } + + private static long mix(long hash, String value) { + for (int i = 0; i < value.length(); i++) { + hash = mix(hash, value.charAt(i)); + } + return mix(hash, 0); + } + + private static final class FieldEntry { + final String name; + + /** + * Java member name used for canonical ordering. Matches {@link Descriptor#getName} so live + * fields and removed fields (declared on the history class) sort into the same order as {@link + * TypeInference#inferSchema} produces. + */ + final String javaName; + + final TypeRef typeRef; + final int since; + final int until; + final boolean live; + + /** + * The versioned beans reachable from this field (the field type, a list element, a map value, + * or a map key), empty when none. Each site keys the outer cross-product by its bean class, so + * every field backed by the same class shares one version dimension; a map field with an + * evolving key and value contributes two sites. + */ + final List nestedSites = new ArrayList<>(2); + + FieldEntry( + String name, String javaName, TypeRef typeRef, int since, int until, boolean live) { + this.name = name; + this.javaName = javaName; + this.typeRef = typeRef; + this.since = since; + this.until = until; + this.live = live; + } + } +} diff --git a/java/fory-format/src/main/java/org/apache/fory/format/type/TypeInference.java b/java/fory-format/src/main/java/org/apache/fory/format/type/TypeInference.java index 1f3843f438..0d42e202d7 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/type/TypeInference.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/type/TypeInference.java @@ -133,6 +133,11 @@ private static Field inferField(TypeRef arrayTypeRef, TypeRef typeRef) { * When type is both iterable and bean, we take it as iterable in row-format. Note circular * references in bean class is not allowed. * + *

    This is the source of truth for the wrapper-descent grammar (custom codec, Optional, + * array/Iterable, map key/value). {@code SchemaHistory.Wrapper.classify} mirrors that subset for + * evolution-site discovery; a new wrapper type must be added in both, or nested versioned beans + * reachable only through it go undiscovered. + * * @return DataType of a typeToken */ private static Field inferField(String name, TypeRef typeRef, TypeResolutionContext ctx) { @@ -258,6 +263,16 @@ private static Field inferField(String name, TypeRef typeRef, TypeResolutionC } } + /** + * Infer a single named field from its Java type, used by schema-evolution code paths that need to + * reconstruct historical fields by name and type without going through a Java member. + */ + static Field inferNamedField(String name, TypeRef typeRef) { + TypeResolutionContext ctx = + new TypeResolutionContext(CustomTypeEncoderRegistry.customTypeHandler(), true); + return inferField(name, typeRef, ctx); + } + public static String inferTypeName(TypeRef token) { StringBuilder sb = new StringBuilder(); if (TypeUtils.ITERABLE_TYPE.isSupertypeOf(token)) { diff --git a/java/fory-format/src/main/java11/module-info.java b/java/fory-format/src/main/java11/module-info.java index 0f6064b8e9..82f6432366 100644 --- a/java/fory-format/src/main/java11/module-info.java +++ b/java/fory-format/src/main/java11/module-info.java @@ -24,6 +24,7 @@ requires static transitive org.apache.arrow.memory.core; requires static transitive org.apache.arrow.vector; + exports org.apache.fory.format.annotation; exports org.apache.fory.format.encoder; exports org.apache.fory.format.row; exports org.apache.fory.format.row.binary; diff --git a/java/fory-format/src/test/java/org/apache/fory/format/encoder/BinaryRowEncoderPointToTest.java b/java/fory-format/src/test/java/org/apache/fory/format/encoder/BinaryRowEncoderPointToTest.java index 645e7d28c8..08abc8b2ab 100644 --- a/java/fory-format/src/test/java/org/apache/fory/format/encoder/BinaryRowEncoderPointToTest.java +++ b/java/fory-format/src/test/java/org/apache/fory/format/encoder/BinaryRowEncoderPointToTest.java @@ -20,6 +20,7 @@ package org.apache.fory.format.encoder; import lombok.Data; +import org.apache.fory.exception.ClassNotCompatibleException; import org.apache.fory.format.row.binary.BinaryRow; import org.apache.fory.format.row.binary.writer.BinaryRowWriter; import org.apache.fory.format.type.Schema; @@ -68,4 +69,30 @@ public Object fromRow(BinaryRow row) { Assert.assertNotNull(captured[0], "decode must hand the row to fromRow"); Assert.assertEquals(captured[0].getSizeInBytes(), payload.length - 8); } + + /** + * A payload shorter than the 8-byte schema hash must fail loudly. Without the guard, decode reads + * the hash past the supplied bytes and hands {@code pointTo} a negative {@code size - 8} length. + */ + @Test + public void decodeRejectsPayloadShorterThanSchemaHash() { + Schema schema = TypeInference.inferSchema(Tiny.class); + BinaryRowEncoder encoder = + new BinaryRowEncoder<>( + schema, + new GeneratedRowEncoder() { + @Override + public BinaryRow toRow(Object obj) { + throw new AssertionError("toRow must not be reached for a truncated payload"); + } + + @Override + public Object fromRow(BinaryRow row) { + throw new AssertionError("fromRow must not be reached for a truncated payload"); + } + }, + new BinaryRowWriter(schema), + false); + Assert.assertThrows(ClassNotCompatibleException.class, () -> encoder.decode(new byte[7])); + } } diff --git a/java/fory-format/src/test/java/org/apache/fory/format/encoder/ImplementInterfaceTest.java b/java/fory-format/src/test/java/org/apache/fory/format/encoder/ImplementInterfaceTest.java index 448ea689d3..18c41fd890 100644 --- a/java/fory-format/src/test/java/org/apache/fory/format/encoder/ImplementInterfaceTest.java +++ b/java/fory-format/src/test/java/org/apache/fory/format/encoder/ImplementInterfaceTest.java @@ -20,7 +20,9 @@ package org.apache.fory.format.encoder; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalDouble; import java.util.OptionalInt; @@ -329,6 +331,41 @@ public void testListTooLazy() { Assert.assertEquals(deserializedBean.f1().get(0).f1(), 42); } + public interface MapOuter { + Map f1(); + } + + static class MapOuterImpl implements MapOuter { + private final Map f1; + + MapOuterImpl(final Map f1) { + this.f1 = f1; + } + + @Override + public Map f1() { + return f1; + } + } + + /** + * Interface bean as a map value. Type inference reaches the map value type through {@code + * isSupported}, which must recognize the interface as a synthesizable bean the same way it does + * for a direct field or list element. + */ + @Test + public void testMapValueInterface() { + final Map map = new HashMap<>(); + map.put("k", new ListInnerImpl(42)); + final MapOuter bean1 = new MapOuterImpl(map); + final RowEncoder encoder = Encoders.bean(MapOuter.class); + final BinaryRow row = encoder.toRow(bean1); + final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes()); + row.pointTo(buffer, 0, buffer.size()); + final MapOuter deserializedBean = encoder.fromRow(row); + Assert.assertEquals(deserializedBean.f1().get("k").f1(), 42); + } + public interface Value extends Comparable { int v(); @@ -433,12 +470,19 @@ public void testListElementsLazy() { new ListLazyElemInner(4))); final RowEncoder encoder = Encoders.bean(ListLazyElemOuter.class); final BinaryRow row = encoder.toRow(bean1); + // Only the accessed element (index 2, value 42) should be constructed; the constructor's check + // guard asserts that. Reset it in finally so the flag never leaks into another test decoding a + // ListLazyElemInner through its globally-registered codec. ListLazyElemInner.check = true; - final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes()); - row.pointTo(buffer, 0, buffer.size()); - final ListLazyElemOuter deserializedBean = encoder.fromRow(row); - Assert.assertEquals(deserializedBean.f1().get(2).f1(), 42); - Assert.assertEquals(deserializedBean.f1().get(3), null); + try { + final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes()); + row.pointTo(buffer, 0, buffer.size()); + final ListLazyElemOuter deserializedBean = encoder.fromRow(row); + Assert.assertEquals(deserializedBean.f1().get(2).f1(), 42); + Assert.assertEquals(deserializedBean.f1().get(3), null); + } finally { + ListLazyElemInner.check = false; + } } public interface IgnoredMethods { diff --git a/java/fory-format/src/test/java/org/apache/fory/format/encoder/SchemaEvolutionStressTest.java b/java/fory-format/src/test/java/org/apache/fory/format/encoder/SchemaEvolutionStressTest.java new file mode 100644 index 0000000000..5cc1d0a11c --- /dev/null +++ b/java/fory-format/src/test/java/org/apache/fory/format/encoder/SchemaEvolutionStressTest.java @@ -0,0 +1,1938 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.format.encoder; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Consumer; +import java.util.function.UnaryOperator; +import lombok.Data; +import org.apache.fory.exception.ClassNotCompatibleException; +import org.apache.fory.format.annotation.ForySchema; +import org.apache.fory.format.annotation.ForyVersion; +import org.apache.fory.format.type.SchemaHistory; +import org.apache.fory.reflect.TypeRef; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +/** + * Stress tests for row-codec schema evolution. Each test probes a specific edge case; the names say + * what is being stressed. Tests that surfaced real bugs are kept with a note pointing at the fix; + * tests kept for coverage are short. + */ +public class SchemaEvolutionStressTest { + + // --------------------------------------------------------------------------- + // Long version chain: a field added at each version 1..5, plus a removal at v3. + // Verifies projection codecs are built and dispatched for every historical version. + // --------------------------------------------------------------------------- + + @Data + public static class ChainV1 { + private int a; // since 1 + } + + @Data + public static class ChainV2 { + private int a; + + @ForyVersion(since = 2) + private String b; + } + + @Data + public static class ChainV3 { + private int a; + + @ForyVersion(since = 2) + private String b; + + @ForyVersion(since = 3) + private long c; + } + + @Data + public static class ChainV4 { + private int a; + + @ForyVersion(since = 2) + private String b; + + @ForyVersion(since = 3) + private long c; + + @ForyVersion(since = 4) + private double d; + } + + /** + * v5 also removes the v1 'a' field starting at v5. The reader must therefore know about three + * different historical schemas: v1, v2-3, and v4 (since 'a' is removed and a new field 'e' shows + * up in v5; 'a' removal makes v5 differ from v4). + */ + @Data + @ForySchema(removedFields = ChainV5.History.class) + public static class ChainV5 { + @ForyVersion(since = 2) + private String b; + + @ForyVersion(since = 3) + private long c; + + @ForyVersion(since = 4) + private double d; + + @ForyVersion(since = 5) + private boolean e; + + interface History { + @ForyVersion(until = 5) + int a(); + } + } + + @Test + public void longChainAllVersionsReadable() { + RowEncoder w1 = evolvingCodec(ChainV1.class); + RowEncoder w2 = evolvingCodec(ChainV2.class); + RowEncoder w3 = evolvingCodec(ChainV3.class); + RowEncoder w4 = evolvingCodec(ChainV4.class); + RowEncoder reader = evolvingCodec(ChainV5.class); + + ChainV1 v1 = new ChainV1(); + v1.setA(11); + ChainV2 v2 = new ChainV2(); + v2.setA(21); + v2.setB("two"); + ChainV3 v3 = new ChainV3(); + v3.setA(31); + v3.setB("three"); + v3.setC(333L); + ChainV4 v4 = new ChainV4(); + v4.setA(41); + v4.setB("four"); + v4.setC(444L); + v4.setD(4.4); + + ChainV5 out1 = reader.decode(w1.encode(v1)); + Assert.assertNull(out1.getB()); + Assert.assertEquals(out1.getC(), 0L); + Assert.assertEquals(out1.getD(), 0.0); + Assert.assertFalse(out1.isE()); + + ChainV5 out2 = reader.decode(w2.encode(v2)); + Assert.assertEquals(out2.getB(), "two"); + Assert.assertEquals(out2.getC(), 0L); + + ChainV5 out3 = reader.decode(w3.encode(v3)); + Assert.assertEquals(out3.getC(), 333L); + Assert.assertEquals(out3.getD(), 0.0); + + ChainV5 out4 = reader.decode(w4.encode(v4)); + Assert.assertEquals(out4.getB(), "four"); + Assert.assertEquals(out4.getC(), 444L); + Assert.assertEquals(out4.getD(), 4.4); + Assert.assertFalse(out4.isE()); + } + + // --------------------------------------------------------------------------- + // Compact format with alignment shuffle: v1 has only longs; v2 adds a byte. + // Compact sorts fields by alignment width so the v1 and v2 schemas have + // different physical orders, even though their logical field sets differ by + // only the added byte. + // --------------------------------------------------------------------------- + + @Data + public static class AlignV1 { + private long x; + private long y; + } + + @Data + public static class AlignV2 { + private long x; + private long y; + + @ForyVersion(since = 2) + private byte flag; + } + + @Test + public void compactAlignmentReshuffleAcrossVersions() { + RowEncoder writer = + Encoders.buildBeanCodec(AlignV1.class) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + RowEncoder reader = + Encoders.buildBeanCodec(AlignV2.class) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + AlignV1 in = new AlignV1(); + in.setX(11); + in.setY(22); + AlignV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getX(), 11); + Assert.assertEquals(out.getY(), 22); + Assert.assertEquals(out.getFlag(), (byte) 0); // primitive default + } + + // --------------------------------------------------------------------------- + // Boxed vs primitive default for an absent field. + // --------------------------------------------------------------------------- + + @Data + public static class DefaultsV1 { + private String name; + } + + @Data + public static class DefaultsV2 { + private String name; + + @ForyVersion(since = 2) + private int primitiveCount; // default 0 + + @ForyVersion(since = 2) + private Integer boxedCount; // default null + } + + @Test + public void primitiveAndBoxedDefaults() { + RowEncoder writer = evolvingCodec(DefaultsV1.class); + RowEncoder reader = evolvingCodec(DefaultsV2.class); + DefaultsV1 in = new DefaultsV1(); + in.setName("n"); + DefaultsV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getName(), "n"); + Assert.assertEquals(out.getPrimitiveCount(), 0); + Assert.assertNull(out.getBoxedCount()); + } + + // --------------------------------------------------------------------------- + // Disjoint-window false collision (regression). A field whose [since, until) + // window starts above the base version and ends below infinity leaves the + // pre-since and post-until boundaries with identical field sets. SchemaHistory + // must collapse those into one entry rather than flagging a false collision. + // --------------------------------------------------------------------------- + + @Data + @ForySchema(removedFields = GappedWindow.History.class) + public static class GappedWindow { + private String name; + + interface History { + @ForyVersion(since = 2, until = 4) + int oldField(); + } + } + + @Test + public void disjointWindowDoesNotFalseCollide() { + // Build alone is the assertion: the bug was an IllegalStateException at build time. + RowEncoder codec = evolvingCodec(GappedWindow.class); + GappedWindow in = new GappedWindow(); + in.setName("hi"); + Assert.assertEquals(codec.decode(codec.encode(in)).getName(), "hi"); + } + + // --------------------------------------------------------------------------- + // Removed field whose original type was a nested struct. The projection + // codec must skip the slot without trying to read or decode it. + // --------------------------------------------------------------------------- + + @Data + public static class StructRefV1 { + private String id; + private DefaultsV1 detail; // removed at v2 + private long tail; // live in both versions, positioned after the removed slot + } + + @Data + @ForySchema(removedFields = StructRefV2.History.class) + public static class StructRefV2 { + private String id; + private long tail; + + interface History { + @ForyVersion(until = 2) + DefaultsV1 detail(); + } + } + + // A struct that itself contains a live, evolving nested bean. Used as a map KEY so the key + // position must keep inKeyPosition set while recursing into the nested bean, not just for the + // top-level key bean. + @Data + public static class KeyHolderV1 { + private String id; + private DefaultsV1 detail; + } + + @Data + public static class KeyHolderV2 { + private String id; + private DefaultsV2 detail; + } + + @Test + public void removedNestedStructField() { + RowEncoder writer = evolvingCodec(StructRefV1.class); + RowEncoder reader = evolvingCodec(StructRefV2.class); + StructRefV1 in = new StructRefV1(); + in.setId("x"); + DefaultsV1 d = new DefaultsV1(); + d.setName("inner"); + in.setDetail(d); + in.setTail(42L); + StructRefV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getId(), "x"); + // The field after the removed slot must read correctly, proving projection skipped exactly the + // removed struct's width. + Assert.assertEquals(out.getTail(), 42L); + } + + // --------------------------------------------------------------------------- + // Removed collection-typed field. The history interface preserves the full + // parameterized type, so List and Map round-trip + // through the projection without losing element-type information. + // --------------------------------------------------------------------------- + + @Data + public static class CollectionsV1 { + private String id; + private List tags; // removed at v2 + private Map counters; // removed at v2 + private long tail; // live in both versions, positioned after the removed slots + } + + @Data + @ForySchema(removedFields = CollectionsV2.History.class) + public static class CollectionsV2 { + private String id; + private long tail; + + interface History { + @ForyVersion(until = 2) + List tags(); + + @ForyVersion(until = 2) + Map counters(); + } + } + + @Test + public void removedParameterizedCollectionFields() { + RowEncoder writer = evolvingCodec(CollectionsV1.class); + RowEncoder reader = evolvingCodec(CollectionsV2.class); + CollectionsV1 in = new CollectionsV1(); + in.setId("c"); + in.setTags(Arrays.asList("alpha", "beta")); + Map counters = new HashMap<>(); + counters.put("k1", 1L); + counters.put("k2", 2L); + in.setCounters(counters); + in.setTail(42L); + CollectionsV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getId(), "c"); + // The field after the removed slots must read correctly, proving projection skipped exactly the + // two removed collection slots' width. + Assert.assertEquals(out.getTail(), 42L); + } + + // --------------------------------------------------------------------------- + // Same wire-name retyped across versions: 'tag' was int [1,3), then String [3,inf). + // --------------------------------------------------------------------------- + + @Data + public static class RetypeV1 { + private int tag; // present in v1, v2 + } + + @Data + @ForySchema(removedFields = RetypeV3.History.class) + public static class RetypeV3 { + @ForyVersion(since = 3) + private String tag; + + interface History { + @ForyVersion(until = 3) + int tag(); + } + } + + @Test + public void retypedSameNameAcrossVersions() { + RowEncoder writer = evolvingCodec(RetypeV1.class); + RowEncoder reader = evolvingCodec(RetypeV3.class); + RetypeV1 in = new RetypeV1(); + in.setTag(7); + RetypeV3 out = reader.decode(writer.encode(in)); + // The 'tag' on the wire was int and is dropped during projection; the v3 String 'tag' has + // no source in this payload so defaults to null. + Assert.assertNull(out.getTag()); + } + + // --------------------------------------------------------------------------- + // Wide schema (more than 64 fields) crossing the null-bitmap word boundary. + // --------------------------------------------------------------------------- + + @Data + public static class WideV1 { + private int f00, f01, f02, f03, f04, f05, f06, f07, f08, f09; + private int f10, f11, f12, f13, f14, f15, f16, f17, f18, f19; + private int f20, f21, f22, f23, f24, f25, f26, f27, f28, f29; + private int f30, f31, f32, f33, f34, f35, f36, f37, f38, f39; + private int f40, f41, f42, f43, f44, f45, f46, f47, f48, f49; + private int f50, f51, f52, f53, f54, f55, f56, f57, f58, f59; + private int f60, f61, f62, f63, f64, f65, f66, f67; + } + + @Data + public static class WideV2 { + private int f00, f01, f02, f03, f04, f05, f06, f07, f08, f09; + private int f10, f11, f12, f13, f14, f15, f16, f17, f18, f19; + private int f20, f21, f22, f23, f24, f25, f26, f27, f28, f29; + private int f30, f31, f32, f33, f34, f35, f36, f37, f38, f39; + private int f40, f41, f42, f43, f44, f45, f46, f47, f48, f49; + private int f50, f51, f52, f53, f54, f55, f56, f57, f58, f59; + private int f60, f61, f62, f63, f64, f65, f66, f67; + + @ForyVersion(since = 2) + private String extra; + } + + @Test + public void wideSchemaAcrossBitmapWord() { + RowEncoder writer = evolvingCodec(WideV1.class); + RowEncoder reader = evolvingCodec(WideV2.class); + WideV1 in = new WideV1(); + in.setF00(100); + in.setF63(163); + in.setF67(167); // past the first 64-bit bitmap word + WideV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getF00(), 100); + Assert.assertEquals(out.getF63(), 163); + Assert.assertEquals(out.getF67(), 167); + Assert.assertNull(out.getExtra()); + } + + // --------------------------------------------------------------------------- + // Many elements through a single projection codec: 100 elements written by the + // same older version must all decode correctly via the same projection codec, + // with each element's data preserved and no carry-over of state across slots. + // --------------------------------------------------------------------------- + + @Test + public void arrayManyElementsThroughOneProjection() { + ArrayEncoder> writer = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder> reader = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + List in = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + ChainV2 e = new ChainV2(); + e.setA(i); + e.setB("elem-" + i); + in.add(e); + } + List out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.size(), 100); + for (int i = 0; i < 100; i++) { + Assert.assertEquals(out.get(i).getB(), "elem-" + i); + Assert.assertEquals(out.get(i).getC(), 0L); + Assert.assertFalse(out.get(i).isE()); + } + } + + // --------------------------------------------------------------------------- + // Sanity: two readers for the same (class, history) co-exist without + // interfering. The two readers share the cached generated codec class (by + // design of the codec cache), so the test exercises whether + // BinaryRowEncoder's per-instance projection map and current-codec instance + // are correctly per-reader rather than accidentally shared. + // --------------------------------------------------------------------------- + + @Test + public void twoIndependentReadersForSameClass() { + RowEncoder writer = evolvingCodec(DefaultsV1.class); + RowEncoder r1 = evolvingCodec(DefaultsV2.class); + RowEncoder r2 = evolvingCodec(DefaultsV2.class); + DefaultsV1 in1 = new DefaultsV1(); + in1.setName("first"); + DefaultsV1 in2 = new DefaultsV1(); + in2.setName("second"); + byte[] b1 = writer.encode(in1); + byte[] b2 = writer.encode(in2); + Assert.assertEquals(r1.decode(b1).getName(), "first"); + Assert.assertEquals(r2.decode(b2).getName(), "second"); + Assert.assertEquals(r1.decode(b2).getName(), "second"); + Assert.assertEquals(r2.decode(b1).getName(), "first"); + } + + // --------------------------------------------------------------------------- + // Schema-history misconfiguration: overlapping windows for the same name + // must fail builder construction, not at first bad payload. + // --------------------------------------------------------------------------- + + @Data + @ForySchema(removedFields = OverlapMisconfig.History.class) + public static class OverlapMisconfig { + // Live field 'x' since 1 (default) collides with the removed window [1, 5). + private int x; + + interface History { + @ForyVersion(since = 1, until = 5) + int x(); + } + } + + @Test(expectedExceptions = IllegalStateException.class) + public void overlappingWindowFailsAtBuild() { + evolvingCodec(OverlapMisconfig.class); + } + + // --------------------------------------------------------------------------- + // A removed-field history declaration must carry a well-formed @ForyVersion. + // Each misconfiguration fails at build with a message that names the offending + // declaration, so the user can fix the annotation rather than chase a decode error. + // --------------------------------------------------------------------------- + + @Data + @ForySchema(removedFields = MissingAnnotation.History.class) + public static class MissingAnnotation { + private int x; + + interface History { + // No @ForyVersion: a removed field has no [since, until) window without it. + String legacy(); + } + } + + @Data + @ForySchema(removedFields = MissingUntil.History.class) + public static class MissingUntil { + private int x; + + interface History { + @ForyVersion(since = 2) + String legacy(); + } + } + + @Data + @ForySchema(removedFields = EmptyWindow.History.class) + public static class EmptyWindow { + private int x; + + interface History { + @ForyVersion(since = 5, until = 5) + String legacy(); + } + } + + @Test + public void removedFieldWithoutForyVersionFailsAtBuild() { + IllegalStateException e = + Assert.expectThrows( + IllegalStateException.class, + () -> + Encoders.buildBeanCodec(MissingAnnotation.class) + .withSchemaEvolution() + .build() + .get()); + Assert.assertTrue(e.getMessage().contains("requires a @ForyVersion"), e.getMessage()); + } + + @Test + public void removedFieldWithoutUntilFailsAtBuild() { + IllegalStateException e = + Assert.expectThrows(IllegalStateException.class, () -> evolvingCodec(MissingUntil.class)); + Assert.assertTrue(e.getMessage().contains("must specify @ForyVersion.until"), e.getMessage()); + } + + @Test + public void removedFieldEmptyWindowFailsAtBuild() { + IllegalStateException e = + Assert.expectThrows(IllegalStateException.class, () -> evolvingCodec(EmptyWindow.class)); + Assert.assertTrue(e.getMessage().contains("must be strictly less than until"), e.getMessage()); + } + + /** A still-present field carrying a finite until; removals belong on the history class. */ + @Data + public static class LiveFieldWithUntil { + private int x; + + @ForyVersion(until = 3) + private String stillHere; + } + + @Test + public void liveFieldWithUntilFailsAtBuild() { + IllegalStateException e = + Assert.expectThrows( + IllegalStateException.class, () -> evolvingCodec(LiveFieldWithUntil.class)); + Assert.assertTrue(e.getMessage().contains("live field must not set until"), e.getMessage()); + } + + /** A since below the first version adds a schema version no writer can emit. */ + @Data + public static class LiveFieldSinceBelowFirst { + private int x; + + @ForyVersion(since = 0) + private String added; + } + + @Test + public void liveFieldSinceBelowFirstFailsAtBuild() { + IllegalStateException e = + Assert.expectThrows( + IllegalStateException.class, () -> evolvingCodec(LiveFieldSinceBelowFirst.class)); + Assert.assertTrue(e.getMessage().contains("must be >= 1"), e.getMessage()); + } + + @Data + @ForySchema(removedFields = RemovedFieldSinceBelowFirst.History.class) + public static class RemovedFieldSinceBelowFirst { + private int x; + + interface History { + @ForyVersion(since = 0, until = 3) + String legacy(); + } + } + + @Test + public void removedFieldSinceBelowFirstFailsAtBuild() { + IllegalStateException e = + Assert.expectThrows( + IllegalStateException.class, () -> evolvingCodec(RemovedFieldSinceBelowFirst.class)); + Assert.assertTrue(e.getMessage().contains("must be >= 1"), e.getMessage()); + } + + // --------------------------------------------------------------------------- + // A field whose type is a Collection subclass that shadows a field name across + // its own hierarchy. The row format encodes it through the iterable branch and + // never introspects it as a bean, so it round-trips fine. SchemaHistory must + // apply the same iterable/map/bean classification before introspecting a nested + // field type; otherwise it calls Descriptor.getDescriptors on the shadowed + // collection class and fails the whole history build on a bean that works. + // --------------------------------------------------------------------------- + + public static class TaggedListBase extends ArrayList { + protected String marker; + } + + // Shadows TaggedListBase.marker, which makes Descriptor.getDescriptors reject + // this class even though the codec treats it purely as a List. + public static class TaggedList extends TaggedListBase { + protected String marker; + } + + @Data + public static class ShadowedCollectionV2 { + private TaggedList labels; + + @ForyVersion(since = 2) + private String tag; + } + + @Test + public void versionedBeanWithShadowedCollectionFieldBuilds() { + RowEncoder codec = evolvingCodec(ShadowedCollectionV2.class); + ShadowedCollectionV2 in = new ShadowedCollectionV2(); + TaggedList labels = new TaggedList<>(); + labels.add("a"); + labels.add("b"); + in.setLabels(labels); + in.setTag("t"); + ShadowedCollectionV2 out = codec.decode(codec.encode(in)); + Assert.assertEquals(out.getLabels(), Arrays.asList("a", "b")); + Assert.assertEquals(out.getTag(), "t"); + } + + // --------------------------------------------------------------------------- + // Roundtrip a List field nested inside a versioned outer record. + // Verifies the projection codec generated for the outer correctly handles + // an inline list of plain beans whose layout is fixed. + // --------------------------------------------------------------------------- + + @Data + public static class NestedListV1 { + private List items; + } + + @Data + public static class NestedListV2 { + private List items; + + @ForyVersion(since = 2) + private String tag; + } + + // --------------------------------------------------------------------------- + // Evolution flag asymmetry: same class, one side opt-in, the other not. + // Documented as wire-incompatible across the row, array, and map codecs. + // Forward (evolution-on reading evolution-off bytes) is a clean + // ClassNotCompatibleException on every codec: the missing strict-hash prefix + // is read as a mismatched hash. Reverse (evolution-off reading evolution-on + // bytes) differs by codec. The row's hash slot exists in both modes, so the + // off-reader sees a mismatched hash and throws cleanly. Array/map have no hash + // slot when off, so the prefix bleeds into the header and trips an internal + // bounds check (assertion-gated, fires under Surefire's -ea); we pin only that + // the decode does not silently return. + // --------------------------------------------------------------------------- + + @DataProvider + public Object[][] flagAsymmetryCases() { + DefaultsV1 v = new DefaultsV1(); + v.setName("hi"); + + RowEncoder rowOn = evolvingCodec(DefaultsV1.class); + RowEncoder rowOff = Encoders.buildBeanCodec(DefaultsV1.class).build().get(); + + ArrayEncoder> arrayOn = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder> arrayOff = + Encoders.buildArrayCodec(new TypeRef>() {}).build().get(); + List list = Arrays.asList(v); + + MapEncoder> mapOn = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> mapOff = + Encoders.buildMapCodec(new TypeRef>() {}).build().get(); + LinkedHashMap map = new LinkedHashMap<>(); + map.put("k", v); + + return new Object[][] { + { + "row", + new FlagAsymmetryCase( + rowOn.encode(v), rowOff.encode(v), rowOn::decode, rowOff::decode, true) + }, + { + "array", + new FlagAsymmetryCase( + arrayOn.encode(list), arrayOff.encode(list), arrayOn::decode, arrayOff::decode, false) + }, + { + "map", + new FlagAsymmetryCase( + mapOn.encode(map), mapOff.encode(map), mapOn::decode, mapOff::decode, false) + }, + }; + } + + @Test(dataProvider = "flagAsymmetryCases") + public void evolutionFlagAsymmetryFailsLoud(String label, FlagAsymmetryCase c) { + // Forward: evolution-on reader, evolution-off bytes -> clean ClassNotCompatibleException. + Assert.expectThrows(ClassNotCompatibleException.class, () -> c.onDecoder.accept(c.offBytes)); + // Reverse: evolution-off reader, evolution-on bytes. + if (c.reverseIsCleanException) { + Assert.expectThrows(ClassNotCompatibleException.class, () -> c.offDecoder.accept(c.onBytes)); + } else { + assertDecodeThrows(() -> c.offDecoder.accept(c.onBytes)); + } + } + + private static final class FlagAsymmetryCase { + final byte[] onBytes; + final byte[] offBytes; + final Consumer onDecoder; + final Consumer offDecoder; + final boolean reverseIsCleanException; + + FlagAsymmetryCase( + byte[] onBytes, + byte[] offBytes, + Consumer onDecoder, + Consumer offDecoder, + boolean reverseIsCleanException) { + this.onBytes = onBytes; + this.offBytes = offBytes; + this.onDecoder = onDecoder; + this.offDecoder = offDecoder; + this.reverseIsCleanException = reverseIsCleanException; + } + } + + // --------------------------------------------------------------------------- + // mapClassSuffix injectivity: the generated map codec class name is val + "_K" + key, and the + // class is cached by name, so two distinct (value-combo, key-combo) pairs must never produce the + // same suffix or the second pair would reuse the first's codec class. The "_K" boundary is the + // only place value and key strings meet, so the adversarial case is a value-side nested bean + // whose + // own suffix token contains the literal "_K_V" boundary string. A bean named K_V yields exactly + // that token. + // --------------------------------------------------------------------------- + + @Data + public static class KVTokenV2 { + private int a; + + @ForyVersion(since = 2) + private String b; + } + + /** + * Outer value bean carrying a nested bean whose simple name is K_V, so its suffix token is _K_V*. + */ + @Data + public static class ValueWithKVNested { + private K_V inner; + + @ForyVersion(since = 2) + private String tag; + } + + /** Simple name is literally "K_V": its projection token is "_K_Vh". */ + @Data + public static class K_V { + private int x; + + @ForyVersion(since = 2) + private String y; + } + + @Test + public void mapClassSuffixIsInjective() { + // Build histories for a value position that contains the adversarial _K_V token and a key + // position that also evolves, then join every (value, key) combination the way mapClassSuffix + // does and require the joined suffixes to be distinct. + SchemaHistory valHistory = + SchemaHistory.build(ValueWithKVNested.class, UnaryOperator.identity()); + SchemaHistory keyHistory = SchemaHistory.build(KVTokenV2.class, UnaryOperator.identity()); + + Set joined = new HashSet<>(); + Set pairs = new HashSet<>(); + for (SchemaHistory.VersionedSchema valVs : valHistory.versions()) { + String val = valVs.isCurrent() ? "" : ProjectionRouting.projectionSuffix(valVs); + for (SchemaHistory.VersionedSchema keyVs : keyHistory.versions()) { + String keyRaw = keyVs.isCurrent() ? "" : ProjectionRouting.projectionSuffix(keyVs); + String key = keyRaw.isEmpty() ? "" : "_K" + keyRaw; + // Mirror mapClassSuffix() exactly. + String suffix = val + key; + pairs.add(val + "\0" + key); + Assert.assertTrue( + joined.add(suffix), + "mapClassSuffix collision: distinct (value, key) combinations produced the same class " + + "suffix \"" + + suffix + + "\""); + } + } + // Sanity: we actually exercised more than one distinct (value, key) pair, including a value + // suffix that contains the _K_V boundary string. + Assert.assertTrue(pairs.size() > 1, "expected multiple combinations"); + Assert.assertTrue( + valHistory.versions().stream() + .anyMatch( + vs -> !vs.isCurrent() && ProjectionRouting.projectionSuffix(vs).contains("_K_V")), + "test did not exercise the _K_V boundary token it was designed to probe"); + } + + /** + * Asserts that an evolution-off decode of evolution-on bytes does not silently succeed. This + * direction has no hash slot to compare, so the failure surfaces as an internal bounds check + * rather than a {@link ClassNotCompatibleException}; that check is assertion-gated, so the test + * only has teeth with JVM assertions enabled. Verify {@code -ea} is on first, otherwise this + * helper would pass vacuously and the guarantee could rot unnoticed. + */ + private static void assertDecodeThrows(Runnable decode) { + boolean assertionsEnabled = false; + assert assertionsEnabled = true; + Assert.assertTrue( + assertionsEnabled, + "JVM assertions (-ea) must be enabled for this test: the evolution-off reverse-decode " + + "guard is assertion-gated and would pass vacuously otherwise"); + try { + decode.run(); + } catch (RuntimeException | AssertionError expected) { + return; + } + Assert.fail("evolution-off decoder silently accepted evolution-on bytes"); + } + + // --------------------------------------------------------------------------- + // Map with a versioned bean as the KEY: current-version round-trip. + // Cross-version key evolution is covered by evolveMapKey* below; this pins the + // same-schema baseline so a regression there is distinguishable from a build + // or encode/decode fault. + // --------------------------------------------------------------------------- + + @Test + public void mapWithVersionedKey() { + MapEncoder> codec = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + DefaultsV2 k = new DefaultsV2(); + k.setName("k"); + k.setPrimitiveCount(1); + k.setBoxedCount(2); + Map in = new HashMap<>(); + in.put(k, "v"); + Map out = codec.decode(codec.encode(in)); + Assert.assertEquals(out.size(), 1); + DefaultsV2 outKey = out.keySet().iterator().next(); + Assert.assertEquals(outKey.getName(), "k"); + Assert.assertEquals(outKey.getPrimitiveCount(), 1); + Assert.assertEquals(outKey.getBoxedCount(), Integer.valueOf(2)); + } + + // A top-level map whose value evolves while the key stays a struct bean. The value projects from + // an older version; the key (same shape on both sides) must round-trip unchanged. The map codec + // only applies the value's projection suffix to the value position (MapEncoderBuilder scopes + // nestedBeanSuffix to inValuePosition), so the key bean is always decoded at its current schema. + @Test + public void mapStructKeyValueEvolution() { + MapEncoder> writer = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> reader = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + DefaultsV2 key = new DefaultsV2(); + key.setName("k"); + key.setPrimitiveCount(7); + key.setBoxedCount(8); + DefaultsV1 val = new DefaultsV1(); + val.setName("val"); + Map in = new HashMap<>(); + in.put(key, val); + Map out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.size(), 1); + Map.Entry entry = out.entrySet().iterator().next(); + Assert.assertEquals(entry.getKey().getName(), "k"); + Assert.assertEquals(entry.getKey().getPrimitiveCount(), 7); + Assert.assertEquals(entry.getKey().getBoxedCount(), Integer.valueOf(8)); + Assert.assertEquals(entry.getValue().getName(), "val"); + Assert.assertEquals(entry.getValue().getPrimitiveCount(), 0); + Assert.assertNull(entry.getValue().getBoxedCount()); + } + + // A row field typed as Map. findVersionedBean must not treat the map + // key + // as a version dimension: keys carry no per-payload hash and are read with the current schema, so + // enumerating key versions would only generate projection codecs decode never dispatches to. The + // outer bean still evolves on its own fields; the keyed map round-trips with the key at current. + @Data + public static class KeyMapHolderV1 { + private Map byKey; + } + + @Data + public static class KeyMapHolderV2 { + private Map byKey; + + @ForyVersion(since = 2) + private String note; + } + + @Test + public void versionedBeanAsMapKeyInRowField() { + RowEncoder writer = evolvingCodec(KeyMapHolderV1.class); + RowEncoder reader = evolvingCodec(KeyMapHolderV2.class); + DefaultsV2 key = new DefaultsV2(); + key.setName("k"); + key.setPrimitiveCount(7); + key.setBoxedCount(8); + KeyMapHolderV1 in = new KeyMapHolderV1(); + in.setByKey(new HashMap<>()); + in.getByKey().put(key, "v"); + KeyMapHolderV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getByKey().size(), 1); + DefaultsV2 outKey = out.getByKey().keySet().iterator().next(); + Assert.assertEquals(outKey.getName(), "k"); + Assert.assertEquals(outKey.getPrimitiveCount(), 7); + Assert.assertEquals(out.getByKey().get(outKey), "v"); + Assert.assertNull(out.getNote()); // note added at v2; v1 payload defaults it + } + + // A row field whose map KEY is a versioned bean that actually skews across writer and reader: + // writer's key is DefaultsV1 (name only), reader's key is DefaultsV2 (adds two since=2 fields). + // Unlike versionedBeanAsMapKeyInRowField, the key version differs on the two sides, so this is + // the case that must not corrupt: the reader must materialize the since=2 fields at their + // defaults rather than reinterpreting the writer's bytes against the current key layout. + @Data + public static class SkewKeyMapHolderV1 { + private Map byKey; + } + + @Data + public static class SkewKeyMapHolderV2 { + private Map byKey; + } + + @Test + public void evolvingMapKeyInRowField() { + RowEncoder writer = evolvingCodec(SkewKeyMapHolderV1.class); + RowEncoder reader = evolvingCodec(SkewKeyMapHolderV2.class); + DefaultsV1 key = new DefaultsV1(); + key.setName("k"); + SkewKeyMapHolderV1 in = new SkewKeyMapHolderV1(); + in.setByKey(new HashMap<>()); + in.getByKey().put(key, "v"); + SkewKeyMapHolderV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getByKey().size(), 1); + DefaultsV2 outKey = out.getByKey().keySet().iterator().next(); + Assert.assertEquals(outKey.getName(), "k"); + Assert.assertEquals(outKey.getPrimitiveCount(), 0); // since=2 field absent in writer; defaults + Assert.assertNull(outKey.getBoxedCount()); + Assert.assertEquals(out.getByKey().get(outKey), "v"); + } + + // A versioned bean as the KEY of a map nested inside another map's value: the substitution path + // must take value at the outer map and key at the inner map. A single key/value flag cannot + // express that two-step descent, so this exercises the multi-step branch path. + @Data + public static class NestedKeyMapHolderV1 { + private Map> outer; + } + + @Data + public static class NestedKeyMapHolderV2 { + private Map> outer; + } + + @Test + public void evolvingKeyOfNestedMap() { + RowEncoder writer = evolvingCodec(NestedKeyMapHolderV1.class); + RowEncoder reader = evolvingCodec(NestedKeyMapHolderV2.class); + DefaultsV1 key = new DefaultsV1(); + key.setName("k"); + Map inner = new HashMap<>(); + inner.put(key, "v"); + NestedKeyMapHolderV1 in = new NestedKeyMapHolderV1(); + in.setOuter(new HashMap<>()); + in.getOuter().put("o", inner); + NestedKeyMapHolderV2 out = reader.decode(writer.encode(in)); + Map outInner = out.getOuter().get("o"); + Assert.assertEquals(outInner.size(), 1); + DefaultsV2 outKey = outInner.keySet().iterator().next(); + Assert.assertEquals(outKey.getName(), "k"); + Assert.assertEquals(outKey.getPrimitiveCount(), 0); // since=2 field absent in writer; defaults + Assert.assertNull(outKey.getBoxedCount()); + Assert.assertEquals(outInner.get(outKey), "v"); + } + + // --------------------------------------------------------------------------- + // Evolving map key: the map header's combined (key,value) hash selects the + // historical key layout, so an older key decodes correctly instead of being + // read against the current layout and corrupting silently. + // --------------------------------------------------------------------------- + + @Test + public void evolveMapKeyValuePrimitive() { + // Writer key is DefaultsV1 (name only); reader key is DefaultsV2 (adds two since=2 fields). + // Value is a plain String on both sides, so only the key dimension evolves. + MapEncoder> writer = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> reader = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + DefaultsV1 k = new DefaultsV1(); + k.setName("k"); + Map in = new HashMap<>(); + in.put(k, "v"); + Map out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.size(), 1); + Map.Entry e = out.entrySet().iterator().next(); + Assert.assertEquals(e.getKey().getName(), "k"); + Assert.assertEquals(e.getKey().getPrimitiveCount(), 0); // absent since=2 field defaults + Assert.assertNull(e.getKey().getBoxedCount()); + Assert.assertEquals(e.getValue(), "v"); + } + + @Test + public void evolveBothMapKeyAndValue() { + // Both key and value evolve independently: V1 writer, V2 reader on each side. + MapEncoder> writer = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> reader = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + DefaultsV1 k = new DefaultsV1(); + k.setName("k"); + DefaultsV1 v = new DefaultsV1(); + v.setName("val"); + Map in = new HashMap<>(); + in.put(k, v); + Map out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.size(), 1); + Map.Entry e = out.entrySet().iterator().next(); + Assert.assertEquals(e.getKey().getName(), "k"); + Assert.assertEquals(e.getKey().getPrimitiveCount(), 0); + Assert.assertNull(e.getKey().getBoxedCount()); + Assert.assertEquals(e.getValue().getName(), "val"); + Assert.assertEquals(e.getValue().getPrimitiveCount(), 0); + Assert.assertNull(e.getValue().getBoxedCount()); + } + + @Test + public void evolveMapKeyCurrentValueOlder() { + // Writer key is already current (V2) but value is older (V1). Exercises the (key-current, + // value-older) projection, which must not collide with the (key-older, value-current) class + // name -- both bean suffixes are otherwise _V1. + MapEncoder> writer = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> reader = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + DefaultsV2 k = new DefaultsV2(); + k.setName("k"); + k.setPrimitiveCount(9); + k.setBoxedCount(3); + DefaultsV1 v = new DefaultsV1(); + v.setName("val"); + Map in = new HashMap<>(); + in.put(k, v); + Map out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.size(), 1); + Map.Entry e = out.entrySet().iterator().next(); + Assert.assertEquals(e.getKey().getName(), "k"); + Assert.assertEquals(e.getKey().getPrimitiveCount(), 9); // key fully present + Assert.assertEquals(e.getKey().getBoxedCount(), Integer.valueOf(3)); + Assert.assertEquals(e.getValue().getName(), "val"); + Assert.assertEquals(e.getValue().getPrimitiveCount(), 0); // value's since=2 fields default + Assert.assertNull(e.getValue().getBoxedCount()); + } + + @Test + public void evolveMapSameBeanKeyAndValueCrossCombos() { + // Key and value are the SAME versioned bean class, so the (key-older, value-current) and + // (key-current, value-older) combinations share both bean suffixes (_V1). Without namespacing + // the key suffix in the map class name they would collapse onto one generated class, and one of + // the two payloads below would decode with the wrong codec. Reader at (V2,V2) must decode both. + MapEncoder> keyOlderWriter = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> valOlderWriter = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> reader = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + + DefaultsV1 oldKey = new DefaultsV1(); + oldKey.setName("ok"); + DefaultsV2 newVal = new DefaultsV2(); + newVal.setName("nv"); + newVal.setPrimitiveCount(5); + newVal.setBoxedCount(6); + Map keyOlder = new HashMap<>(); + keyOlder.put(oldKey, newVal); + Map outA = reader.decode(keyOlderWriter.encode(keyOlder)); + Map.Entry a = outA.entrySet().iterator().next(); + Assert.assertEquals(a.getKey().getName(), "ok"); + Assert.assertEquals(a.getKey().getPrimitiveCount(), 0); // key was V1, since=2 fields default + Assert.assertEquals(a.getValue().getName(), "nv"); + Assert.assertEquals(a.getValue().getPrimitiveCount(), 5); // value was V2, fully present + Assert.assertEquals(a.getValue().getBoxedCount(), Integer.valueOf(6)); + + DefaultsV2 newKey = new DefaultsV2(); + newKey.setName("nk"); + newKey.setPrimitiveCount(7); + newKey.setBoxedCount(8); + DefaultsV1 oldVal = new DefaultsV1(); + oldVal.setName("ov"); + Map valOlder = new HashMap<>(); + valOlder.put(newKey, oldVal); + Map outB = reader.decode(valOlderWriter.encode(valOlder)); + Map.Entry b = outB.entrySet().iterator().next(); + Assert.assertEquals(b.getKey().getName(), "nk"); + Assert.assertEquals(b.getKey().getPrimitiveCount(), 7); // key was V2, fully present + Assert.assertEquals(b.getKey().getBoxedCount(), Integer.valueOf(8)); + Assert.assertEquals(b.getValue().getName(), "ov"); + Assert.assertEquals( + b.getValue().getPrimitiveCount(), 0); // value was V1, since=2 fields default + } + + @Test + public void evolveMapKeyCompact() { + // The key dimension must also project under the compact format, which sorts schema fields by + // alignment; the historical key schema is sorted the same way as the writer's, so the layouts + // line up. + MapEncoder> writer = + Encoders.buildMapCodec(new TypeRef>() {}) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + MapEncoder> reader = + Encoders.buildMapCodec(new TypeRef>() {}) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + DefaultsV1 k = new DefaultsV1(); + k.setName("k"); + Map in = new HashMap<>(); + in.put(k, "v"); + Map out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.size(), 1); + Map.Entry e = out.entrySet().iterator().next(); + Assert.assertEquals(e.getKey().getName(), "k"); + Assert.assertEquals(e.getKey().getPrimitiveCount(), 0); + Assert.assertEquals(e.getValue(), "v"); + } + + @Test + public void evolveMapStructKeyWithNestedBean() { + // The key is a struct that itself wraps an evolving nested bean. inKeyPosition must stay set + // while the key subtree recurses into the nested bean, both at expression construction and in + // KeyPositionScope's genCode. If it does not, the nested key bean registers/decodes under the + // value-position codec key and decode fails loud ("No bean codec registered ... key/value + // position"). The nested detail is written at V1 and read by a V2 reader, so the nested key + // bean must project to its historical layout, not the value's. + MapEncoder> writer = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> reader = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + KeyHolderV1 k = new KeyHolderV1(); + k.setId("h"); + DefaultsV1 d = new DefaultsV1(); + d.setName("inner"); + k.setDetail(d); + Map in = new HashMap<>(); + in.put(k, "v"); + Map out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.size(), 1); + Map.Entry e = out.entrySet().iterator().next(); + Assert.assertEquals(e.getKey().getId(), "h"); + Assert.assertEquals(e.getKey().getDetail().getName(), "inner"); + Assert.assertEquals(e.getKey().getDetail().getPrimitiveCount(), 0); // since=2, absent in writer + Assert.assertNull(e.getKey().getDetail().getBoxedCount()); + Assert.assertEquals(e.getValue(), "v"); + } + + @Test + public void evolveMapCollectionKey() { + // The key is a List of versioned beans, so the key subtree is a collection rather than a bare + // bean. This drives the keyScoped(deserializeForCollection) branch: inKeyPosition must stay set + // through the collection element decode so the element bean projects to its historical layout + // under the key-position codec, not the value's. + MapEncoder, String>> writer = + Encoders.buildMapCodec(new TypeRef, String>>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder, String>> reader = + Encoders.buildMapCodec(new TypeRef, String>>() {}) + .withSchemaEvolution() + .build() + .get(); + DefaultsV1 d = new DefaultsV1(); + d.setName("k"); + Map, String> in = new HashMap<>(); + in.put(Arrays.asList(d), "v"); + Map, String> out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.size(), 1); + Map.Entry, String> e = out.entrySet().iterator().next(); + Assert.assertEquals(e.getKey().size(), 1); + Assert.assertEquals(e.getKey().get(0).getName(), "k"); + Assert.assertEquals(e.getKey().get(0).getPrimitiveCount(), 0); // since=2, absent in writer + Assert.assertNull(e.getKey().get(0).getBoxedCount()); + Assert.assertEquals(e.getValue(), "v"); + } + + // A versioned bean as the KEY of an inner map reached through a top-level array or map wrapper. + // The top-level codec's own key/value hash only covers a top-level map's own key, and the + // row-field path (collectNestedSites) only applies inside a bean field, so this wrapper-reached + // inner key is the shape that must still evolve through the top-level array/map entry point. + + @Test + public void arrayOfMapKeyOlderPayloadReadByNewerCodec() { + ArrayEncoder>> writer = + Encoders.buildArrayCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder>> reader = + Encoders.buildArrayCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + DefaultsV1 k = new DefaultsV1(); + k.setName("k"); + Map inner = new HashMap<>(); + inner.put(k, "v"); + List> out = reader.decode(writer.encode(Arrays.asList(inner))); + Assert.assertEquals(out.size(), 1); + Map.Entry e = out.get(0).entrySet().iterator().next(); + Assert.assertEquals(e.getKey().getName(), "k"); + Assert.assertEquals(e.getKey().getPrimitiveCount(), 0); // since=2 field absent in writer + Assert.assertNull(e.getKey().getBoxedCount()); + Assert.assertEquals(e.getValue(), "v"); + } + + // A top-level map whose VALUE is itself a Map with two DIFFERENT versioned bean + // classes. The value position must evolve both inner beans; before the fix the value position + // enumerated only one and generated a codec for only it. + @Test + public void mapValueWrappingTwoBeansOlderPayloadReadByNewerCodec() { + MapEncoder>> writer = + Encoders.buildMapCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder>> reader = + Encoders.buildMapCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + AlignV1 k = new AlignV1(); + k.setX(11); + k.setY(22); + DefaultsV1 v = new DefaultsV1(); + v.setName("val"); + Map inner = new HashMap<>(); + inner.put(k, v); + Map> in = new HashMap<>(); + in.put("o", inner); + Map> out = reader.decode(writer.encode(in)); + Map.Entry e = out.get("o").entrySet().iterator().next(); + Assert.assertEquals(e.getKey().getX(), 11); + Assert.assertEquals(e.getKey().getY(), 22); + Assert.assertEquals(e.getKey().getFlag(), (byte) 0); + Assert.assertEquals(e.getValue().getName(), "val"); + Assert.assertEquals(e.getValue().getPrimitiveCount(), 0); + Assert.assertNull(e.getValue().getBoxedCount()); + } + + @Test + public void mapOfMapKeyOlderPayloadReadByNewerCodec() { + MapEncoder>> writer = + Encoders.buildMapCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder>> reader = + Encoders.buildMapCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + DefaultsV1 k = new DefaultsV1(); + k.setName("k"); + Map inner = new HashMap<>(); + inner.put(k, "v"); + Map> in = new HashMap<>(); + in.put("o", inner); + Map> out = reader.decode(writer.encode(in)); + Map.Entry e = out.get("o").entrySet().iterator().next(); + Assert.assertEquals(e.getKey().getName(), "k"); + Assert.assertEquals(e.getKey().getPrimitiveCount(), 0); // since=2 field absent in writer + Assert.assertNull(e.getKey().getBoxedCount()); + Assert.assertEquals(e.getValue(), "v"); + } + + // A nested map whose KEY and VALUE are DIFFERENT versioned beans, reached through a top-level + // array wrapper. Key is AlignV1/V2 (V2 adds a byte flag), value is DefaultsV1/V2 (V2 adds two + // since=2 fields). The element schema's strict hash must cover both sides at their chosen + // versions, just as the enclosing-bean hash does on the row-field path, so an older payload + // restores both beans at their historical layout. Old field values are non-default so reading + // either side at the wrong (current) layout would misread the bytes rather than coincide with a + // default. The two beans evolve independently, so dispatch must distinguish their combination. + @Test + public void arrayOfMapBothSidesOlderPayloadReadByNewerCodec() { + ArrayEncoder>> writer = + Encoders.buildArrayCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder>> reader = + Encoders.buildArrayCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + AlignV1 k = new AlignV1(); + k.setX(11); + k.setY(22); + DefaultsV1 v = new DefaultsV1(); + v.setName("val"); + Map inner = new HashMap<>(); + inner.put(k, v); + List> out = reader.decode(writer.encode(Arrays.asList(inner))); + Assert.assertEquals(out.size(), 1); + Map.Entry e = out.get(0).entrySet().iterator().next(); + Assert.assertEquals(e.getKey().getX(), 11); + Assert.assertEquals(e.getKey().getY(), 22); + Assert.assertEquals(e.getKey().getFlag(), (byte) 0); // key since=2 field absent in writer + Assert.assertEquals(e.getValue().getName(), "val"); + Assert.assertEquals(e.getValue().getPrimitiveCount(), 0); // value since=2 fields absent + Assert.assertNull(e.getValue().getBoxedCount()); + } + + // --------------------------------------------------------------------------- + // Removed nullable struct that was null on the wire: the v1 writer leaves + // the slot's null bit set; the v2 reader skips the slot during projection. + // --------------------------------------------------------------------------- + + @Data + public static class NullableStructV1 { + private String id; + private DefaultsV1 detail; // nullable, removed at v2 + private long tail; // live in both versions, positioned after the removed slot + } + + @Data + @ForySchema(removedFields = NullableStructV2.History.class) + public static class NullableStructV2 { + private String id; + private long tail; + + interface History { + @ForyVersion(until = 2) + DefaultsV1 detail(); + } + } + + @Test + public void removedNullableStructWasNullOnWire() { + RowEncoder writer = evolvingCodec(NullableStructV1.class); + RowEncoder reader = evolvingCodec(NullableStructV2.class); + NullableStructV1 in = new NullableStructV1(); + in.setId("only-id"); + // detail intentionally left null + in.setTail(42L); + NullableStructV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getId(), "only-id"); + // The field after the removed (null-on-wire) slot must read correctly, proving projection + // skipped the slot rather than reading its null bit as part of a later field. + Assert.assertEquals(out.getTail(), 42L); + } + + // --------------------------------------------------------------------------- + // Builder method ordering: compactEncoding() before vs after withSchemaEvolution() + // must produce equivalent codecs. + // --------------------------------------------------------------------------- + + @Test + public void builderMethodOrderingIsCommutative() { + RowEncoder w = + Encoders.buildBeanCodec(DefaultsV1.class) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + RowEncoder rOrderA = + Encoders.buildBeanCodec(DefaultsV2.class) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + RowEncoder rOrderB = + Encoders.buildBeanCodec(DefaultsV2.class) + .withSchemaEvolution() + .compactEncoding() + .build() + .get(); + DefaultsV1 in = new DefaultsV1(); + in.setName("commute"); + byte[] bytes = w.encode(in); + Assert.assertEquals(rOrderA.decode(bytes).getName(), "commute"); + Assert.assertEquals(rOrderB.decode(bytes).getName(), "commute"); + } + + @Test + public void nestedListSurvivesOuterProjection() { + RowEncoder writer = evolvingCodec(NestedListV1.class); + RowEncoder reader = evolvingCodec(NestedListV2.class); + DefaultsV1 a = new DefaultsV1(); + a.setName("a"); + DefaultsV1 b = new DefaultsV1(); + b.setName("b"); + NestedListV1 in = new NestedListV1(); + in.setItems(Arrays.asList(a, b)); + NestedListV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getItems().size(), 2); + Assert.assertEquals(out.getItems().get(0).getName(), "a"); + Assert.assertEquals(out.getItems().get(1).getName(), "b"); + Assert.assertNull(out.getTag()); + } + + // --------------------------------------------------------------------------- + // Nested versioned bean: a parent bean with a struct field whose own type is + // versioned independently. The wire layout for the inner struct is inline in + // the parent's bytes with no per-inner hash. The reader, dispatching on the + // parent's strict hash, needs to choose an inner schema consistent with what + // the writer used. + // --------------------------------------------------------------------------- + + /** Stand-in for "older code that wrote the inner struct without field x". */ + @Data + public static class NestedInnerWriter { + private String name; + } + + /** Stand-in for "older code that wrote the outer containing NestedInnerWriter". */ + @Data + public static class NestedOuterWriter { + private long id; + private NestedInnerWriter inner; + } + + /** Newer inner with an added field at v2. */ + @Data + public static class NestedInnerV2 { + private String name; + + @ForyVersion(since = 2) + private String addedField; + } + + /** Newer outer that still has just (id, inner) but its inner type evolved. */ + @Data + public static class NestedOuterV2 { + private long id; + private NestedInnerV2 inner; + } + + @Test + public void nestedInnerEvolution_readerInnerNewerThanWriter() { + // Writer uses the "older shape" inner. Both writer and reader are evolution-on so they + // agree on strict-hash framing. + RowEncoder writer = evolvingCodec(NestedOuterWriter.class); + RowEncoder reader = evolvingCodec(NestedOuterV2.class); + + NestedOuterWriter in = new NestedOuterWriter(); + in.setId(42); + NestedInnerWriter inn = new NestedInnerWriter(); + inn.setName("hello"); + in.setInner(inn); + + byte[] bytes = writer.encode(in); + NestedOuterV2 out = reader.decode(bytes); + Assert.assertEquals(out.getId(), 42); + Assert.assertNotNull(out.getInner()); + Assert.assertEquals(out.getInner().getName(), "hello"); + Assert.assertNull(out.getInner().getAddedField()); + } + + // --------------------------------------------------------------------------- + // Outer + inner versioned independently. The cross-product enumeration must + // generate a projection codec for each (outer-version, inner-version) pair + // that isn't the current combination. + // --------------------------------------------------------------------------- + + /** Outer with its own added field at v2; inner stays at v1. */ + @Data + public static class CrossOuterV2_InnerV1 { + private long id; + private NestedInnerWriter inner; + + @ForyVersion(since = 2) + private String label; + } + + /** Outer v2 reader with inner evolved to v2. Both dimensions evolve independently. */ + @Data + public static class CrossOuterV2_InnerV2 { + private long id; + private NestedInnerV2 inner; + + @ForyVersion(since = 2) + private String label; + } + + @Test + public void crossOuterAndInnerEvolution() { + // Writer writes outer V1 + inner V1 (no label, no addedField). + RowEncoder writer = evolvingCodec(NestedOuterWriter.class); + RowEncoder reader = evolvingCodec(CrossOuterV2_InnerV2.class); + + NestedOuterWriter in = new NestedOuterWriter(); + in.setId(100); + NestedInnerWriter inn = new NestedInnerWriter(); + inn.setName("legacy-inner"); + in.setInner(inn); + + byte[] bytes = writer.encode(in); + CrossOuterV2_InnerV2 out = reader.decode(bytes); + Assert.assertEquals(out.getId(), 100); + Assert.assertEquals(out.getInner().getName(), "legacy-inner"); + Assert.assertNull(out.getInner().getAddedField()); + Assert.assertNull(out.getLabel()); + } + + /** + * Contract: {@code SchemaHistory.current().nestedBeanSchemas()} must report each nested bean at + * its current entry. Two cross-product combinations canonicalizing to the same signature is rare + * today (the inner's own bySignature collapses wire-equal schemas before the outer sees them) but + * the contract is documented and future callers may rely on it. + */ + @Test + public void schemaHistoryCurrentReflectsCurrentInnerVersions() { + SchemaHistory history = + SchemaHistory.build(CrossOuterV2_InnerV2.class, UnaryOperator.identity()); + SchemaHistory.VersionedSchema current = history.current(); + Assert.assertTrue(current.isCurrent(), "history.current() must be marked current"); + for (Map.Entry, SchemaHistory.VersionedSchema> e : + current.nestedBeanSchemas().entrySet()) { + SchemaHistory innerHistory = SchemaHistory.build(e.getKey(), UnaryOperator.identity()); + Assert.assertTrue( + e.getValue().isCurrent(), + "current().nestedBeanSchemas() must report inner " + e.getKey() + " at its current"); + Assert.assertEquals( + e.getValue().version(), + innerHistory.current().version(), + "inner current version mismatch for " + e.getKey()); + } + } + + // --------------------------------------------------------------------------- + // Cross-product enumeration must route inner-bean versions through array and + // map projection codecs, not just through the row codec. The reader's outer + // type has N outer versions x M inner versions; multiple cross-product entries + // share an outer version number, so the per-class suffix must encode the + // inner version to keep them from colliding on the codegen cache. + // --------------------------------------------------------------------------- + + @Test + public void crossOuterAndInnerEvolution_array() { + ArrayEncoder> writer = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder> reader = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + + List in = new ArrayList<>(); + for (int i = 0; i < 3; i++) { + NestedOuterWriter e = new NestedOuterWriter(); + e.setId(i); + NestedInnerWriter inn = new NestedInnerWriter(); + inn.setName("legacy-" + i); + e.setInner(inn); + in.add(e); + } + + List out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.size(), 3); + for (int i = 0; i < 3; i++) { + Assert.assertEquals(out.get(i).getId(), i); + Assert.assertEquals(out.get(i).getInner().getName(), "legacy-" + i); + Assert.assertNull(out.get(i).getInner().getAddedField()); + Assert.assertNull(out.get(i).getLabel()); + } + } + + @Test + public void crossOuterAndInnerEvolution_map() { + MapEncoder> writer = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> reader = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + + LinkedHashMap in = new LinkedHashMap<>(); + for (int i = 0; i < 3; i++) { + NestedOuterWriter e = new NestedOuterWriter(); + e.setId(i); + NestedInnerWriter inn = new NestedInnerWriter(); + inn.setName("legacy-" + i); + e.setInner(inn); + in.put("k" + i, e); + } + + Map out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.size(), 3); + for (int i = 0; i < 3; i++) { + CrossOuterV2_InnerV2 v = out.get("k" + i); + Assert.assertNotNull(v, "missing key k" + i); + Assert.assertEquals(v.getId(), i); + Assert.assertEquals(v.getInner().getName(), "legacy-" + i); + Assert.assertNull(v.getInner().getAddedField()); + Assert.assertNull(v.getLabel()); + } + } + + // --------------------------------------------------------------------------- + // Under evolution, array/map payloads carry an 8-byte schema-hash prefix. A + // payload too small to hold that prefix is malformed and must fail loudly + // rather than feed a negative size into pointTo. + // --------------------------------------------------------------------------- + + @Test + public void arrayPayloadBelowHashPrefixFailsLoudly() { + ArrayEncoder> codec = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + Assert.expectThrows(ClassNotCompatibleException.class, () -> codec.decode(new byte[3])); + } + + @Test + public void mapPayloadBelowHashPrefixFailsLoudly() { + MapEncoder> codec = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + Assert.expectThrows(ClassNotCompatibleException.class, () -> codec.decode(new byte[3])); + } + + // --------------------------------------------------------------------------- + // A full-length payload whose strict schema hash matches neither the reader's + // current schema nor any entry in its projection history must fail loudly. + // This is the projection-map miss, distinct from the too-short-for-the-prefix + // case above: the payload clears the 8-byte guard, so it can only be caught by + // the unknown-hash branch. Encoding one bean and decoding it with an evolving + // codec for a structurally different bean reaches that branch deterministically. + // --------------------------------------------------------------------------- + + @Test + public void rowUnknownSchemaHashFailsLoudly() { + ChainV2 writer = new ChainV2(); + writer.setA(7); + writer.setB("foreign"); + byte[] foreign = evolvingCodec(ChainV2.class).encode(writer); + Assert.assertTrue(foreign.length > 8, "payload must clear the 8-byte hash prefix"); + RowEncoder reader = evolvingCodec(DefaultsV1.class); + Assert.expectThrows(ClassNotCompatibleException.class, () -> reader.decode(foreign)); + } + + @Test + public void mapUnknownSchemaHashFailsLoudly() { + MapEncoder> writer = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + ChainV2 v = new ChainV2(); + v.setA(7); + v.setB("foreign"); + LinkedHashMap in = new LinkedHashMap<>(); + in.put("k", v); + byte[] foreign = writer.encode(in); + Assert.assertTrue(foreign.length > 8, "payload must clear the 8-byte hash prefix"); + MapEncoder> reader = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + Assert.expectThrows(ClassNotCompatibleException.class, () -> reader.decode(foreign)); + } + + @Test + public void arrayUnknownSchemaHashFailsLoudly() { + ArrayEncoder> writer = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + ChainV2 v = new ChainV2(); + v.setA(7); + v.setB("foreign"); + List in = new ArrayList<>(); + in.add(v); + byte[] foreign = writer.encode(in); + Assert.assertTrue(foreign.length > 8, "payload must clear the 8-byte hash prefix"); + ArrayEncoder> reader = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + Assert.expectThrows(ClassNotCompatibleException.class, () -> reader.decode(foreign)); + } + + // --------------------------------------------------------------------------- + // Three-level nesting: L1 -> L2 -> L3, each independently versioned. Because + // L2's own history cross-products over L3's versions, L2's history holds two + // entries that share a version number but differ in their L3 layout. Routing + // must pick the L2 entry whose L3 matches the writer, not the first one with a + // matching version number. Identifies the inner combination by strict hash, so + // it resolves the correct subtree to arbitrary depth. + // --------------------------------------------------------------------------- + + @Data + public static class L3Writer { + private String name; + } + + @Data + public static class L2Writer { + private long tag; + private L3Writer leaf; + } + + @Data + public static class L1Writer { + private long id; + private L2Writer mid; + } + + @Data + public static class L3V2 { + private String name; + + @ForyVersion(since = 2) + private String note; + } + + @Data + public static class L2V2 { + private long tag; + private L3V2 leaf; + + @ForyVersion(since = 2) + private String midLabel; + } + + @Data + public static class L1V2 { + private long id; + private L2V2 mid; + + @ForyVersion(since = 2) + private String outerLabel; + } + + @Test + public void threeLevelNestedEvolution() { + RowEncoder writer = evolvingCodec(L1Writer.class); + RowEncoder reader = evolvingCodec(L1V2.class); + + L1Writer in = new L1Writer(); + in.setId(7); + L2Writer mid = new L2Writer(); + mid.setTag(11); + L3Writer leaf = new L3Writer(); + leaf.setName("deep"); + mid.setLeaf(leaf); + in.setMid(mid); + + L1V2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getId(), 7); + Assert.assertNull(out.getOuterLabel()); + Assert.assertEquals(out.getMid().getTag(), 11); + Assert.assertNull(out.getMid().getMidLabel()); + Assert.assertEquals(out.getMid().getLeaf().getName(), "deep"); + Assert.assertNull(out.getMid().getLeaf().getNote()); + } + + // --------------------------------------------------------------------------- + // The same versioned bean class in two fields. A writer writes one definition + // of that class, so both fields are always at the same version on the wire; + // the enumeration carries one version dimension per class, not per field, so a + // class may back more than one slot. + // --------------------------------------------------------------------------- + + @Data + public static class TwoLeafWriter { + private L3Writer first; + private L3Writer second; + } + + @Data + public static class TwoLeafV2 { + private L3V2 first; + private L3V2 second; + } + + @Test + public void sameClassInTwoFields() { + RowEncoder writer = evolvingCodec(TwoLeafWriter.class); + RowEncoder reader = evolvingCodec(TwoLeafV2.class); + + TwoLeafWriter in = new TwoLeafWriter(); + L3Writer a = new L3Writer(); + a.setName("alpha"); + L3Writer b = new L3Writer(); + b.setName("beta"); + in.setFirst(a); + in.setSecond(b); + + TwoLeafV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getFirst().getName(), "alpha"); + Assert.assertNull(out.getFirst().getNote()); + Assert.assertEquals(out.getSecond().getName(), "beta"); + Assert.assertNull(out.getSecond().getNote()); + } + + private static RowEncoder evolvingCodec(Class beanClass) { + return Encoders.buildBeanCodec(beanClass).withSchemaEvolution().build().get(); + } +} diff --git a/java/fory-format/src/test/java/org/apache/fory/format/encoder/SchemaEvolutionTest.java b/java/fory-format/src/test/java/org/apache/fory/format/encoder/SchemaEvolutionTest.java new file mode 100644 index 0000000000..15da6ff1df --- /dev/null +++ b/java/fory-format/src/test/java/org/apache/fory/format/encoder/SchemaEvolutionTest.java @@ -0,0 +1,1214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.format.encoder; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import lombok.Data; +import org.apache.fory.exception.ClassNotCompatibleException; +import org.apache.fory.format.annotation.ForySchema; +import org.apache.fory.format.annotation.ForyVersion; +import org.apache.fory.format.type.Field; +import org.apache.fory.memory.MemoryBuffer; +import org.apache.fory.reflect.TypeRef; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class SchemaEvolutionTest { + + /** Original v1 bean: just a name and an age. */ + @Data + public static class PersonV1 { + private String name; + private int age; + } + + /** + * v2: added an email. The codec built against this class must still be able to read v1 payloads + * (email will default to null). + */ + @Data + public static class PersonV2 { + private String name; + private int age; + + @ForyVersion(since = 2) + private String email; + } + + /** + * v3: same as v2 with the age field removed. The codec built against this class must read v1 + * payloads (with age) and v2 payloads (with age + email). + */ + @Data + @ForySchema(removedFields = PersonV3.History.class) + public static class PersonV3 { + private String name; + + @ForyVersion(since = 2) + private String email; + + interface History { + @ForyVersion(until = 3) + int age(); + } + } + + /** Round-trip at the current version: writing PersonV2, reading PersonV2 with evolution on. */ + @Test + public void currentVersionRoundTrip() { + RowEncoder codec = evolvingCodec(PersonV2.class); + PersonV2 in = new PersonV2(); + in.setName("alice"); + in.setAge(30); + in.setEmail("alice@example.com"); + byte[] bytes = codec.encode(in); + PersonV2 out = codec.decode(bytes); + Assert.assertEquals(out.getName(), "alice"); + Assert.assertEquals(out.getAge(), 30); + Assert.assertEquals(out.getEmail(), "alice@example.com"); + } + + /** + * The crux: a payload produced by PersonV1 (literally a different Java class with the v1-shaped + * schema) decoded by PersonV2's evolution-enabled codec. We use PersonV1 as a stand-in for "what + * older code wrote." Both classes are encoded with schema evolution on so they share the + * strict-hash format; PersonV1's history is a single entry, and PersonV2's history contains both + * v1 (without email) and v2 (with email) entries that match PersonV1's single entry by hash. + */ + @Test + public void olderPayloadReadByNewerCodec() { + RowEncoder oldWriter = evolvingCodec(PersonV1.class); + RowEncoder newReader = evolvingCodec(PersonV2.class); + + PersonV1 in = new PersonV1(); + in.setName("alice"); + in.setAge(30); + byte[] bytes = oldWriter.encode(in); + + PersonV2 out = newReader.decode(bytes); + Assert.assertEquals(out.getName(), "alice"); + Assert.assertEquals(out.getAge(), 30); + Assert.assertNull(out.getEmail()); + } + + // --- Compact row format --- + + @Test + public void compactRowOlderPayloadReadByNewerCodec() { + RowEncoder oldWriter = + Encoders.buildBeanCodec(PersonV1.class) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + RowEncoder newReader = + Encoders.buildBeanCodec(PersonV2.class) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + PersonV1 in = new PersonV1(); + in.setName("bob"); + in.setAge(42); + byte[] bytes = oldWriter.encode(in); + PersonV2 out = newReader.decode(bytes); + Assert.assertEquals(out.getName(), "bob"); + Assert.assertEquals(out.getAge(), 42); + Assert.assertNull(out.getEmail()); + } + + /** + * The byte[] overloads use bytes.length for the body size; the MemoryBuffer overloads write and + * read an embedded int32 size prefix ahead of the 8-byte hash. That framing is a distinct code + * path, so exercise a projection hit (older payload, newer reader) through it. Two records are + * written into one buffer and read back in order to confirm the reader advances past each + * record's embedded size. + */ + @Test + public void streamingOlderPayloadReadByNewerCodec() { + RowEncoder oldWriter = evolvingCodec(PersonV1.class); + RowEncoder newReader = evolvingCodec(PersonV2.class); + + PersonV1 alice = new PersonV1(); + alice.setName("alice"); + alice.setAge(30); + PersonV1 bob = new PersonV1(); + bob.setName("bob"); + bob.setAge(42); + + MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(32); + oldWriter.encode(buffer, alice); + oldWriter.encode(buffer, bob); + + PersonV2 outAlice = newReader.decode(buffer); + PersonV2 outBob = newReader.decode(buffer); + Assert.assertEquals(outAlice.getName(), "alice"); + Assert.assertEquals(outAlice.getAge(), 30); + Assert.assertNull(outAlice.getEmail()); + Assert.assertEquals(outBob.getName(), "bob"); + Assert.assertEquals(outBob.getAge(), 42); + Assert.assertNull(outBob.getEmail()); + } + + // --- Array of versioned beans --- + + @Test + public void arrayStandardOlderPayloadReadByNewerCodec() { + ArrayEncoder> oldWriter = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder> newReader = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + PersonV1 a = new PersonV1(); + a.setName("alice"); + a.setAge(30); + PersonV1 b = new PersonV1(); + b.setName("bob"); + b.setAge(42); + byte[] bytes = oldWriter.encode(Arrays.asList(a, b)); + List out = newReader.decode(bytes); + Assert.assertEquals(out.size(), 2); + Assert.assertEquals(out.get(0).getName(), "alice"); + Assert.assertEquals(out.get(0).getAge(), 30); + Assert.assertNull(out.get(0).getEmail()); + Assert.assertEquals(out.get(1).getName(), "bob"); + } + + @Test + public void arrayCompactOlderPayloadReadByNewerCodec() { + ArrayEncoder> oldWriter = + Encoders.buildArrayCodec(new TypeRef>() {}) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder> newReader = + Encoders.buildArrayCodec(new TypeRef>() {}) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + PersonV1 p = new PersonV1(); + p.setName("carol"); + p.setAge(25); + byte[] bytes = oldWriter.encode(Arrays.asList(p)); + List out = newReader.decode(bytes); + Assert.assertEquals(out.size(), 1); + Assert.assertEquals(out.get(0).getName(), "carol"); + Assert.assertEquals(out.get(0).getAge(), 25); + Assert.assertNull(out.get(0).getEmail()); + } + + // --- Map with versioned bean values --- + + @Test + public void mapStandardOlderPayloadReadByNewerCodec() { + MapEncoder> oldWriter = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> newReader = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + Map in = new HashMap<>(); + PersonV1 p = new PersonV1(); + p.setName("dave"); + p.setAge(40); + in.put("k1", p); + byte[] bytes = oldWriter.encode(in); + Map out = newReader.decode(bytes); + Assert.assertEquals(out.size(), 1); + Assert.assertEquals(out.get("k1").getName(), "dave"); + Assert.assertEquals(out.get("k1").getAge(), 40); + Assert.assertNull(out.get("k1").getEmail()); + } + + @Test + public void mapCompactOlderPayloadReadByNewerCodec() { + MapEncoder> oldWriter = + Encoders.buildMapCodec(new TypeRef>() {}) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + MapEncoder> newReader = + Encoders.buildMapCodec(new TypeRef>() {}) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + Map in = new HashMap<>(); + PersonV1 p = new PersonV1(); + p.setName("eve"); + p.setAge(28); + in.put("k1", p); + byte[] bytes = oldWriter.encode(in); + Map out = newReader.decode(bytes); + Assert.assertEquals(out.get("k1").getName(), "eve"); + Assert.assertEquals(out.get("k1").getAge(), 28); + Assert.assertNull(out.get("k1").getEmail()); + } + + // An evolution-on array/map prepends the 8-byte schema hash even when the collection is empty, so + // the framed payload is exactly the prefix with a zero-length body (size == 8, bodySize == 0). + // The reader must dispatch on the hash and return an empty collection rather than tripping the + // size guard or reading past the prefix. + @Test + public void emptyArrayRoundTrip() { + ArrayEncoder> oldWriter = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder> newReader = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + byte[] bytes = oldWriter.encode(Arrays.asList()); + List out = newReader.decode(bytes); + Assert.assertTrue(out.isEmpty()); + } + + @Test + public void emptyMapRoundTrip() { + MapEncoder> oldWriter = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> newReader = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + byte[] bytes = oldWriter.encode(new HashMap<>()); + Map out = newReader.decode(bytes); + Assert.assertTrue(out.isEmpty()); + } + + /** + * Streaming counterpart of {@link #arrayStandardOlderPayloadReadByNewerCodec}: the + * encode(MemoryBuffer) overload frames each record with an embedded int32 size. Two older-schema + * lists in one buffer confirm the reader advances past each record's size on a projection hit. + */ + @Test + public void arrayStreamingOlderPayloadReadByNewerCodec() { + ArrayEncoder> oldWriter = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder> newReader = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + PersonV1 a = new PersonV1(); + a.setName("alice"); + a.setAge(30); + PersonV1 b = new PersonV1(); + b.setName("bob"); + b.setAge(42); + + MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(32); + oldWriter.encode(buffer, Arrays.asList(a)); + oldWriter.encode(buffer, Arrays.asList(b)); + + List outA = newReader.decode(buffer); + List outB = newReader.decode(buffer); + Assert.assertEquals(outA.size(), 1); + Assert.assertEquals(outA.get(0).getName(), "alice"); + Assert.assertEquals(outA.get(0).getAge(), 30); + Assert.assertNull(outA.get(0).getEmail()); + Assert.assertEquals(outB.get(0).getName(), "bob"); + Assert.assertEquals(outB.get(0).getAge(), 42); + } + + /** + * Streaming counterpart of {@link #mapStandardOlderPayloadReadByNewerCodec}. The map + * encode(MemoryBuffer) path restores both the key and value writer buffers in a finally block; + * two records in one buffer exercise that restore across records. + */ + @Test + public void mapStreamingOlderPayloadReadByNewerCodec() { + MapEncoder> oldWriter = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> newReader = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + Map first = new HashMap<>(); + PersonV1 dave = new PersonV1(); + dave.setName("dave"); + dave.setAge(40); + first.put("k1", dave); + Map second = new HashMap<>(); + PersonV1 erin = new PersonV1(); + erin.setName("erin"); + erin.setAge(35); + second.put("k2", erin); + + MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(32); + oldWriter.encode(buffer, first); + oldWriter.encode(buffer, second); + + Map outFirst = newReader.decode(buffer); + Map outSecond = newReader.decode(buffer); + Assert.assertEquals(outFirst.get("k1").getName(), "dave"); + Assert.assertEquals(outFirst.get("k1").getAge(), 40); + Assert.assertNull(outFirst.get("k1").getEmail()); + Assert.assertEquals(outSecond.get("k2").getName(), "erin"); + Assert.assertEquals(outSecond.get("k2").getAge(), 35); + } + + // --- Unversioned element bean still carries the strict-hash prefix --- + // + // A top-level array/map built with withSchemaEvolution() must emit the 8-byte prefix even when + // its element/value bean has no @ForyVersion fields, so two independently-built evolution-on + // codecs stay wire-compatible (see SchemaHistory#evolutionBean). Each test compares the evolving + // payload against a non-evolving one: the lengths must differ by exactly the prefix, which a + // regression skipping the evolution path for an unversioned element would not produce. + + @Test + public void arrayOfUnversionedBeanCarriesHashPrefix() { + ArrayEncoder> evolving = + Encoders.buildArrayCodec(new TypeRef>() {}).withSchemaEvolution().build().get(); + ArrayEncoder> plain = + Encoders.buildArrayCodec(new TypeRef>() {}).build().get(); + Item item = new Item(); + item.setName("widget"); + item.setQuantity(7); + List in = Arrays.asList(item); + + byte[] evolvingBytes = evolving.encode(in); + byte[] plainBytes = plain.encode(in); + Assert.assertEquals(evolvingBytes.length, plainBytes.length + 8); + + // A separately-built evolution-on codec reads the prefixed payload back. + ArrayEncoder> reader = + Encoders.buildArrayCodec(new TypeRef>() {}).withSchemaEvolution().build().get(); + List out = reader.decode(evolvingBytes); + Assert.assertEquals(out.size(), 1); + Assert.assertEquals(out.get(0).getName(), "widget"); + Assert.assertEquals(out.get(0).getQuantity(), 7); + } + + @Test + public void mapOfUnversionedBeanCarriesHashPrefix() { + MapEncoder> evolving = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> plain = + Encoders.buildMapCodec(new TypeRef>() {}).build().get(); + Item item = new Item(); + item.setName("gadget"); + item.setQuantity(3); + Map in = new HashMap<>(); + in.put("k1", item); + + byte[] evolvingBytes = evolving.encode(in); + byte[] plainBytes = plain.encode(in); + Assert.assertEquals(evolvingBytes.length, plainBytes.length + 8); + + MapEncoder> reader = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + Map out = reader.decode(evolvingBytes); + Assert.assertEquals(out.size(), 1); + Assert.assertEquals(out.get("k1").getName(), "gadget"); + Assert.assertEquals(out.get("k1").getQuantity(), 3); + } + + // --- Interface-typed beans --- + // + // The wire field name is derived from each interface's accessor method name (via + // lowerCamelToLowerUnderscore), so two interfaces that share the same accessor names produce + // the same wire layout. Use accessor-style getters consistently across versions. + + /** v1 interface: just name and age. */ + public interface PersonIfaceV1 { + String getName(); + + int getAge(); + } + + /** v2 interface: adds email. Same accessor naming so the wire field names match. */ + public interface PersonIfaceV2 { + String getName(); + + int getAge(); + + @ForyVersion(since = 2) + String getEmail(); + } + + @Test + public void interfaceOlderPayloadReadByNewerCodec() { + RowEncoder oldWriter = evolvingCodec(PersonIfaceV1.class); + RowEncoder newReader = evolvingCodec(PersonIfaceV2.class); + PersonIfaceV1 in = + new PersonIfaceV1() { + public String getName() { + return "alice"; + } + + public int getAge() { + return 30; + } + }; + byte[] bytes = oldWriter.encode(in); + PersonIfaceV2 out = newReader.decode(bytes); + Assert.assertEquals(out.getName(), "alice"); + Assert.assertEquals(out.getAge(), 30); + // email was added in v2; v1 payload has none. The interface proxy returns the default. + Assert.assertNull(out.getEmail()); + } + + /** + * v3 interface: name and email; age removed (only present in v1 and v2). The history interface + * declares the removed field's original signature; its method name follows the same JavaBeans + * accessor convention as the live interface, so {@code getAge()} maps to wire name {@code age}. + */ + @ForySchema(removedFields = PersonIfaceV3.History.class) + public interface PersonIfaceV3 { + String getName(); + + @ForyVersion(since = 2) + String getEmail(); + + interface History { + @ForyVersion(until = 3) + int getAge(); + } + } + + @Test + public void interfaceRemovedFieldReadByNewerCodec() { + RowEncoder v2Writer = evolvingCodec(PersonIfaceV2.class); + RowEncoder v3Reader = evolvingCodec(PersonIfaceV3.class); + PersonIfaceV2 in = + new PersonIfaceV2() { + public String getName() { + return "alice"; + } + + public int getAge() { + return 30; + } + + public String getEmail() { + return "alice@example.com"; + } + }; + byte[] bytes = v2Writer.encode(in); + PersonIfaceV3 out = v3Reader.decode(bytes); + Assert.assertEquals(out.getName(), "alice"); + Assert.assertEquals(out.getEmail(), "alice@example.com"); + } + + /** + * v1 writer interface: just a name. Used to produce a payload that the reader below projects to + * its v1 schema, where {@code score} is absent. + */ + public interface ScoredV1 { + String getName(); + } + + /** + * Current reader interface. {@code getScore()} is a live {@code since=2} accessor, so when a v1 + * payload is projected it is absent and gets a default-value body. {@code getScore(int)} is a + * parameterized overload sharing that name and return type. It is not an accessor — accessors + * take no arguments — so the projection proxy must throw for it rather than silence it into a + * default. Without the {@code parameterCount() != 0} guard in {@code isAccessorOfAbsentField}, it + * would match the absent {@code score} descriptor by name and return type and return {@code 0}. + */ + public interface ScoredV2 { + String getName(); + + @ForyVersion(since = 2) + int getScore(); + + int getScore(int seed); + } + + @Test + public void projectionNonAccessorOverloadStillThrows() { + RowEncoder v1Writer = evolvingCodec(ScoredV1.class); + RowEncoder reader = evolvingCodec(ScoredV2.class); + ScoredV1 in = () -> "alice"; + ScoredV2 out = reader.decode(v1Writer.encode(in)); + Assert.assertEquals(out.getName(), "alice"); + // score was added in v2; the v1 payload has none, so the no-arg accessor defaults to 0. + Assert.assertEquals(out.getScore(), 0); + try { + out.getScore(7); + Assert.fail( + "parameterized getScore is not an accessor and must not be silenced to a default"); + } catch (UnsupportedOperationException expected) { + // The projection proxy does not implement non-accessor methods. + } + } + + /** Removed-field test: v3 codec reads v2 payload, dropping the no-longer-present 'age'. */ + @Test + public void removedFieldReadByNewerCodec() { + RowEncoder v2Writer = evolvingCodec(PersonV2.class); + RowEncoder v3Reader = evolvingCodec(PersonV3.class); + + PersonV2 in = new PersonV2(); + in.setName("alice"); + in.setAge(30); + in.setEmail("alice@example.com"); + byte[] bytes = v2Writer.encode(in); + + PersonV3 out = v3Reader.decode(bytes); + Assert.assertEquals(out.getName(), "alice"); + Assert.assertEquals(out.getEmail(), "alice@example.com"); + } + + // --------------------------------------------------------------------------- + // Compositional test + // + // Outer mutable bean evolves v1 -> v2 (adds displayName, removes legacyName). + // The bean carries diverse nested data shapes that themselves do not evolve: + // a concrete struct, an interface-typed struct (lazy proxy), an inline list + // of structs, and an inline map. The test exercises one + // dispatch boundary (the outer codec, or the outer list codec) and verifies + // that the projected outer correctly carries every nested shape through. + // --------------------------------------------------------------------------- + + @Data + public static class Profile { + private String bio; + private int rating; + } + + /** Address is interface-typed; the row codec generates a lazy proxy for reads. */ + public interface Address { + String getStreet(); + + String getCity(); + } + + @Data + public static class Item { + private String name; + private long quantity; + } + + @Data + public static class OuterV1 { + private long id; + private String legacyName; + private Profile profile; + private Address address; + private List items; + private Map properties; + } + + /** + * OuterV2 adds {@code displayName} at version 2 and removes {@code legacyName} at version 2. + * Everything else carries forward unchanged. The compositional test writes an OuterV1 and reads + * as OuterV2. + */ + @Data + @ForySchema(removedFields = OuterV2.History.class) + public static class OuterV2 { + private long id; + + @ForyVersion(since = 2) + private String displayName; + + private Profile profile; + private Address address; + private List items; + private Map properties; + + interface History { + @ForyVersion(until = 2) + String legacyName(); + } + } + + private static OuterV1 sampleV1() { + OuterV1 in = new OuterV1(); + in.setId(7); + in.setLegacyName("retired"); + Profile p = new Profile(); + p.setBio("hello"); + p.setRating(5); + in.setProfile(p); + in.setAddress( + new Address() { + public String getStreet() { + return "1 Main"; + } + + public String getCity() { + return "Springfield"; + } + }); + Item a = new Item(); + a.setName("a"); + a.setQuantity(1); + Item b = new Item(); + b.setName("b"); + b.setQuantity(2); + in.setItems(Arrays.asList(a, b)); + Map props = new HashMap<>(); + props.put("k1", a); + props.put("k2", b); + in.setProperties(props); + return in; + } + + private static void assertProjectedToV2(OuterV2 out) { + Assert.assertEquals(out.getId(), 7); + Assert.assertNull(out.getDisplayName()); // added in v2, absent in v1 wire + Assert.assertEquals(out.getProfile().getBio(), "hello"); + Assert.assertEquals(out.getProfile().getRating(), 5); + Assert.assertEquals(out.getAddress().getStreet(), "1 Main"); + Assert.assertEquals(out.getAddress().getCity(), "Springfield"); + Assert.assertEquals(out.getItems().size(), 2); + Assert.assertEquals(out.getItems().get(0).getName(), "a"); + Assert.assertEquals(out.getItems().get(1).getQuantity(), 2); + Assert.assertEquals(out.getProperties().get("k1").getName(), "a"); + Assert.assertEquals(out.getProperties().get("k2").getQuantity(), 2); + } + + @Test + public void compositionalRowEvolution() { + RowEncoder writer = evolvingCodec(OuterV1.class); + RowEncoder reader = evolvingCodec(OuterV2.class); + byte[] bytes = writer.encode(sampleV1()); + assertProjectedToV2(reader.decode(bytes)); + } + + @Test + public void compositionalArrayEvolution() { + ArrayEncoder> writer = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder> reader = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + byte[] bytes = writer.encode(Arrays.asList(sampleV1(), sampleV1())); + List out = reader.decode(bytes); + Assert.assertEquals(out.size(), 2); + assertProjectedToV2(out.get(0)); + assertProjectedToV2(out.get(1)); + } + + // --------------------------------------------------------------------------- + // A versioned bean nested inside a collection field of the outer bean. The + // outer's SchemaHistory must look through the list/map wrapper to discover the + // inner bean and enumerate its versions, so an older payload (inner at v1) is + // projected into the newer reader (inner at v2). Without that, the outer has no + // projection matching the older inner layout and decode throws. + // --------------------------------------------------------------------------- + + @Data + public static class TagV1 { + private String key; + } + + @Data + public static class TagV2 { + private String key; + + @ForyVersion(since = 2) + private long weight; + } + + @Data + public static class CatalogV1 { + private String id; + private List tags; + private Map labels; + } + + @Data + public static class CatalogV2 { + private String id; + private List tags; + private Map labels; + } + + private static CatalogV1 sampleCatalog() { + CatalogV1 in = new CatalogV1(); + in.setId("c1"); + TagV1 a = new TagV1(); + a.setKey("alpha"); + TagV1 b = new TagV1(); + b.setKey("beta"); + in.setTags(Arrays.asList(a, b)); + Map labels = new HashMap<>(); + labels.put("k1", a); + in.setLabels(labels); + return in; + } + + @Test + public void evolvingBeanInCollectionField() { + RowEncoder writer = evolvingCodec(CatalogV1.class); + RowEncoder reader = evolvingCodec(CatalogV2.class); + CatalogV2 out = reader.decode(writer.encode(sampleCatalog())); + Assert.assertEquals(out.getId(), "c1"); + Assert.assertEquals(out.getTags().size(), 2); + Assert.assertEquals(out.getTags().get(0).getKey(), "alpha"); + Assert.assertEquals(out.getTags().get(1).getKey(), "beta"); + // weight was added at v2; the v1 payload has no source for it. + Assert.assertEquals(out.getTags().get(0).getWeight(), 0L); + Assert.assertEquals(out.getLabels().get("k1").getKey(), "alpha"); + } + + // --------------------------------------------------------------------------- + // A versioned *interface* bean nested inside an evolving outer bean. Interface + // beans are valid versioned row beans at the top level (see PersonIfaceV1/V2), + // so they must also be discovered when nested as a field type, a list element, + // or a map value. SchemaHistory.findVersionedBean has to recognize an interface + // the same way the top-level container path does (synthesizing the interface as + // a bean); otherwise the outer's cross-product never enumerates the inner's + // older versions, an older inner payload has no matching projection, and decode + // fails with a schema-hash mismatch (ClassNotCompatibleException). + // --------------------------------------------------------------------------- + + /** v1 interface bean: a single key accessor. */ + public interface SlugV1 { + String getKey(); + } + + /** v2 interface bean: adds a weight at version 2. Same accessor naming as v1. */ + public interface SlugV2 { + String getKey(); + + @ForyVersion(since = 2) + long getWeight(); + } + + @Data + public static class BoxV1 { + private String id; + private SlugV1 slug; + private List slugs; + private Map labels; + } + + @Data + public static class BoxV2 { + private String id; + private SlugV2 slug; + private List slugs; + private Map labels; + } + + private static SlugV1 slugV1(String key) { + return () -> key; + } + + @Test + public void evolvingInterfaceBeanNestedInOuterBean() { + RowEncoder writer = evolvingCodec(BoxV1.class); + RowEncoder reader = evolvingCodec(BoxV2.class); + + BoxV1 in = new BoxV1(); + in.setId("b1"); + in.setSlug(slugV1("direct")); + in.setSlugs(Arrays.asList(slugV1("alpha"), slugV1("beta"))); + Map labels = new HashMap<>(); + labels.put("k1", slugV1("gamma")); + in.setLabels(labels); + + BoxV2 out = reader.decode(writer.encode(in)); + + Assert.assertEquals(out.getId(), "b1"); + Assert.assertEquals(out.getSlug().getKey(), "direct"); + Assert.assertEquals(out.getSlugs().size(), 2); + Assert.assertEquals(out.getSlugs().get(0).getKey(), "alpha"); + Assert.assertEquals(out.getSlugs().get(1).getKey(), "beta"); + Assert.assertEquals(out.getLabels().get("k1").getKey(), "gamma"); + // weight was added at v2; the v1 payload has no source, so it defaults. + Assert.assertEquals(out.getSlug().getWeight(), 0L); + Assert.assertEquals(out.getSlugs().get(0).getWeight(), 0L); + Assert.assertEquals(out.getLabels().get("k1").getWeight(), 0L); + } + + // --- Versioned bean nested inside a top-level container's element/value --- + // + // A top-level array or map whose element/value is itself a collection of a versioned bean + // (List, Map<.., Person>) must still evolve. The versioned bean is reachable through + // the container element/value the same way SchemaHistory.findVersionedBean descends, so an + // older payload must decode under the newer codec rather than being read at a stale layout. + + @Test + public void mapOfListValueOlderPayloadReadByNewerCodec() { + MapEncoder>> oldWriter = + Encoders.buildMapCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder>> newReader = + Encoders.buildMapCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + Map> in = new HashMap<>(); + PersonV1 p = new PersonV1(); + p.setName("dave"); + p.setAge(40); + in.put("k1", Arrays.asList(p)); + byte[] bytes = oldWriter.encode(in); + Map> out = newReader.decode(bytes); + Assert.assertEquals(out.size(), 1); + PersonV2 read = out.get("k1").get(0); + Assert.assertEquals(read.getName(), "dave"); + Assert.assertEquals(read.getAge(), 40); + Assert.assertNull(read.getEmail()); + } + + @Test + public void arrayOfListElementOlderPayloadReadByNewerCodec() { + ArrayEncoder>> oldWriter = + Encoders.buildArrayCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder>> newReader = + Encoders.buildArrayCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + PersonV1 p = new PersonV1(); + p.setName("dave"); + p.setAge(40); + byte[] bytes = oldWriter.encode(Arrays.asList(Arrays.asList(p))); + List> out = newReader.decode(bytes); + Assert.assertEquals(out.size(), 1); + PersonV2 read = out.get(0).get(0); + Assert.assertEquals(read.getName(), "dave"); + Assert.assertEquals(read.getAge(), 40); + Assert.assertNull(read.getEmail()); + } + + /** Map value is itself a map of the versioned bean, exercising the map-wrapper projection. */ + @Test + public void mapOfMapValueOlderPayloadReadByNewerCodec() { + MapEncoder>> oldWriter = + Encoders.buildMapCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder>> newReader = + Encoders.buildMapCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + PersonV1 p = new PersonV1(); + p.setName("dave"); + p.setAge(40); + Map inner = new HashMap<>(); + inner.put("inner", p); + Map> in = new HashMap<>(); + in.put("k1", inner); + Map> out = newReader.decode(oldWriter.encode(in)); + PersonV2 read = out.get("k1").get("inner"); + Assert.assertEquals(read.getName(), "dave"); + Assert.assertEquals(read.getAge(), 40); + Assert.assertNull(read.getEmail()); + } + + // --------------------------------------------------------------------------- + // Added reference-typed fields. Every other added-field test defaults a scalar + // (String/int/...); defaulting an added struct or collection slot is a distinct + // projection path. v2 adds a nested struct and a list of structs that are absent + // from the v1 wire, so both must read back as null. + // --------------------------------------------------------------------------- + + @Data + public static class HolderV1 { + private long id; + } + + @Data + public static class HolderV2 { + private long id; + + @ForyVersion(since = 2) + private Profile profile; + + @ForyVersion(since = 2) + private List items; + } + + @Test + public void addedReferenceFieldsDefaultToNull() { + RowEncoder writer = evolvingCodec(HolderV1.class); + RowEncoder reader = evolvingCodec(HolderV2.class); + + HolderV1 in = new HolderV1(); + in.setId(7); + HolderV2 out = reader.decode(writer.encode(in)); + + Assert.assertEquals(out.getId(), 7); + Assert.assertNull(out.getProfile()); + Assert.assertNull(out.getItems()); + } + + // --------------------------------------------------------------------------- + // A versioned bean reached only through an Optional field. TypeInference.inferField + // unwraps Optional to a nullable T and encodes the inner bean as a struct, so the + // evolution walk must look through Optional the same way. Without that, the outer's + // cross-product never enumerates the inner's older versions and an older payload + // (inner at v1) has no matching projection under the newer reader. + // --------------------------------------------------------------------------- + + @Data + public static class OptionalHolderV1 { + private String id; + private Optional tag; + } + + @Data + public static class OptionalHolderV2 { + private String id; + private Optional tag; + } + + @Test + public void evolvingBeanInOptionalField() { + RowEncoder writer = evolvingCodec(OptionalHolderV1.class); + RowEncoder reader = evolvingCodec(OptionalHolderV2.class); + + OptionalHolderV1 in = new OptionalHolderV1(); + in.setId("o1"); + TagV1 tag = new TagV1(); + tag.setKey("alpha"); + in.setTag(Optional.of(tag)); + + OptionalHolderV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getId(), "o1"); + Assert.assertEquals(out.getTag().get().getKey(), "alpha"); + // weight was added at v2; the v1 payload has no source for it. + Assert.assertEquals(out.getTag().get().getWeight(), 0L); + } + + // --------------------------------------------------------------------------- + // A versioned bean reached through a bare Iterable field. TypeInference encodes + // any Iterable (ITERABLE_TYPE.isSupertypeOf), not just Collection, as an array, + // so the evolution walk must descend an Iterable element too. A field typed + // Iterable (not Collection) otherwise slips past the collection + // branch and its element bean is never enumerated. + // --------------------------------------------------------------------------- + + @Data + public static class IterableHolderV1 { + private String id; + private Iterable tags; + } + + @Data + public static class IterableHolderV2 { + private String id; + private Iterable tags; + } + + @Test + public void evolvingBeanInIterableField() { + RowEncoder writer = evolvingCodec(IterableHolderV1.class); + RowEncoder reader = evolvingCodec(IterableHolderV2.class); + + IterableHolderV1 in = new IterableHolderV1(); + in.setId("i1"); + TagV1 a = new TagV1(); + a.setKey("alpha"); + TagV1 b = new TagV1(); + b.setKey("beta"); + in.setTags(Arrays.asList(a, b)); + + IterableHolderV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getId(), "i1"); + List tags = new ArrayList<>(); + out.getTags().forEach(tags::add); + Assert.assertEquals(tags.size(), 2); + Assert.assertEquals(tags.get(0).getKey(), "alpha"); + Assert.assertEquals(tags.get(1).getKey(), "beta"); + Assert.assertEquals(tags.get(0).getWeight(), 0L); + } + + // --------------------------------------------------------------------------- + // A versioned bean reached only through a custom codec's encodedType(). inferField + // resolves the codec and recurses into encodedType(), encoding the versioned struct, + // so the evolution walk must resolve the codec the same way. The declared field type + // (Money) is not itself a bean, so without codec resolution the inner versioned bean + // is never enumerated and an older payload decodes at the current layout, reading a + // field that does not exist in the older row. + // --------------------------------------------------------------------------- + + /** A domain type encoded through a custom codec into a versioned struct. */ + public static final class Money { + final long cents; + + Money(long cents) { + this.cents = cents; + } + } + + @Data + public static class AmountV1 { + private long cents; + } + + @Data + public static class AmountV2 { + private long cents; + + @ForyVersion(since = 2) + private String currency; + } + + static final class MoneyCodecV1 implements CustomCodec { + @Override + public Field getForyField(String fieldName) { + return null; // default inference: recurse into encodedType() + } + + @Override + public AmountV1 encode(Money value) { + AmountV1 a = new AmountV1(); + a.setCents(value.cents); + return a; + } + + @Override + public Money decode(AmountV1 a) { + return new Money(a.getCents()); + } + + @Override + public TypeRef encodedType() { + return TypeRef.of(AmountV1.class); + } + } + + static final class MoneyCodecV2 implements CustomCodec { + @Override + public Field getForyField(String fieldName) { + return null; + } + + @Override + public AmountV2 encode(Money value) { + AmountV2 a = new AmountV2(); + a.setCents(value.cents); + return a; + } + + @Override + public Money decode(AmountV2 a) { + return new Money(a.getCents()); + } + + @Override + public TypeRef encodedType() { + return TypeRef.of(AmountV2.class); + } + } + + @Data + public static class WalletV1 { + private String id; + private Money balance; + } + + @Data + public static class WalletV2 { + private String id; + private Money balance; + } + + @Test + public void evolvingBeanThroughCustomCodec() { + Encoders.registerCustomCodec(WalletV1.class, Money.class, new MoneyCodecV1()); + Encoders.registerCustomCodec(WalletV2.class, Money.class, new MoneyCodecV2()); + + RowEncoder writer = evolvingCodec(WalletV1.class); + RowEncoder reader = evolvingCodec(WalletV2.class); + + WalletV1 in = new WalletV1(); + in.setId("w1"); + in.setBalance(new Money(500)); + + WalletV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getId(), "w1"); + // The v1 payload encoded balance as AmountV1 (cents only); the v2 codec projects it. + Assert.assertEquals(out.getBalance().cents, 500L); + } + + /** + * Forward reads are intentionally unsupported: a reader built against an older class cannot + * decode a payload written at a newer schema it has never seen. PersonV2 writes a v2 payload + * (with the since=2 email field); a PersonV1-built reader knows only v1's strict hash, so the v2 + * hash matches neither its current schema nor any historical projection, and decode fails loud + * rather than silently dropping the newer field. This pins the asymmetry of the + * old-writer/new-reader contract. + */ + @Test(expectedExceptions = ClassNotCompatibleException.class) + public void newerPayloadRejectedByOlderCodec() { + RowEncoder newWriter = evolvingCodec(PersonV2.class); + RowEncoder oldReader = evolvingCodec(PersonV1.class); + + PersonV2 in = new PersonV2(); + in.setName("alice"); + in.setAge(30); + in.setEmail("alice@example.com"); + + oldReader.decode(newWriter.encode(in)); + } + + private static RowEncoder evolvingCodec(Class beanClass) { + return Encoders.buildBeanCodec(beanClass).withSchemaEvolution().build().get(); + } +} diff --git a/java/fory-format/src/test/java/org/apache/fory/format/type/SchemaHistoryTest.java b/java/fory-format/src/test/java/org/apache/fory/format/type/SchemaHistoryTest.java new file mode 100644 index 0000000000..d58904fbef --- /dev/null +++ b/java/fory-format/src/test/java/org/apache/fory/format/type/SchemaHistoryTest.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.format.type; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotEquals; + +import java.lang.reflect.TypeVariable; +import java.util.Optional; +import java.util.function.UnaryOperator; +import org.apache.fory.format.annotation.ForyVersion; +import org.apache.fory.reflect.TypeRef; +import org.apache.fory.type.TypeResolutionContext; +import org.testng.annotations.Test; + +public class SchemaHistoryTest { + + /** + * The strict hash must be injective over nesting structure. A struct has a variable number of + * children with no boundary marker between a nested struct's last child and the parent's next + * field, so without mixing the child count {@code {a: struct, b}} and {@code {a: struct}} mix an identical byte sequence and collide. These two schemas round-trip differently, so a + * shared hash would route an old payload to the wrong projection codec. + */ + @Test + public void structBoundaryDoesNotCollide() { + Schema fieldOutsideStruct = + DataTypes.schema(DataTypes.field("a", DataTypes.struct(field("x")), false), field("b")); + Schema fieldInsideStruct = + DataTypes.schema(DataTypes.field("a", DataTypes.struct(field("x"), field("b")), false)); + + assertNotEquals( + SchemaHistory.computeStrictSchemaHash(fieldOutsideStruct), + SchemaHistory.computeStrictSchemaHash(fieldInsideStruct)); + } + + /** + * The child-count delimiter must distinguish an empty nested struct followed by a sibling from a + * nested struct that contains that sibling, the minimal form of the boundary ambiguity. + */ + @Test + public void emptyStructBoundaryDoesNotCollide() { + Schema emptyThenSibling = + DataTypes.schema(DataTypes.field("a", DataTypes.struct(), false), field("b")); + Schema siblingInsideStruct = + DataTypes.schema(DataTypes.field("a", DataTypes.struct(field("b")), false)); + + assertNotEquals( + SchemaHistory.computeStrictSchemaHash(emptyThenSibling), + SchemaHistory.computeStrictSchemaHash(siblingInsideStruct)); + } + + /** + * Structurally identical schemas must still hash equal; the delimiter must not over-discriminate. + */ + @Test + public void identicalNestedStructsHashEqual() { + assertEquals( + SchemaHistory.computeStrictSchemaHash( + DataTypes.schema( + DataTypes.field("a", DataTypes.struct(field("x"), field("b")), false))), + SchemaHistory.computeStrictSchemaHash( + DataTypes.schema( + DataTypes.field("a", DataTypes.struct(field("x"), field("b")), false)))); + } + + /** + * {@link DataTypes.DecimalType#name} returns a bare "decimal" with no precision or scale, so the + * strict hash must mix those in explicitly. Two decimals that differ only in precision or scale + * must hash apart, or an old payload would route to a projection codec with the wrong numeric + * layout. This guards the dedicated decimal branch in {@code hashField}. + */ + @Test + public void decimalPrecisionAndScaleDoNotCollide() { + long p10s2 = + SchemaHistory.computeStrictSchemaHash( + DataTypes.schema(DataTypes.field("a", DataTypes.decimal(10, 2), false))); + long p20s2 = + SchemaHistory.computeStrictSchemaHash( + DataTypes.schema(DataTypes.field("a", DataTypes.decimal(20, 2), false))); + long p10s4 = + SchemaHistory.computeStrictSchemaHash( + DataTypes.schema(DataTypes.field("a", DataTypes.decimal(10, 4), false))); + assertNotEquals(p10s2, p20s2); + assertNotEquals(p10s2, p10s4); + assertNotEquals(p20s2, p10s4); + } + + /** Identical decimal shapes must still hash equal; the precision/scale mix must not be noisy. */ + @Test + public void identicalDecimalsHashEqual() { + assertEquals( + SchemaHistory.computeStrictSchemaHash( + DataTypes.schema(DataTypes.field("a", DataTypes.decimal(10, 2), false))), + SchemaHistory.computeStrictSchemaHash( + DataTypes.schema(DataTypes.field("a", DataTypes.decimal(10, 2), false)))); + } + + /** + * A map header carries one combined hash for the (key, value) layout pair. When key and value are + * the same versioned bean at different versions, the two cross combinations have swapped per-side + * hashes, so {@link SchemaHistory#combineHashes} must be order-sensitive or the combinations + * collide and one payload decodes with the other's codec. This pins the invariant the map + * key/value cross-product dispatch relies on. + */ + @Test + public void combineHashesIsOrderSensitive() { + long a = 0x0123456789abcdefL; + long b = 0x7766554433221100L; + assertNotEquals(SchemaHistory.combineHashes(a, b), SchemaHistory.combineHashes(b, a)); + } + + /** + * A field typed as a bare type variable bounded to a wrapper (the Scala 3 LTS case that {@code + * TypeInference.inferField} resolves to its bound, see issue 2439) must descend through the same + * wrapper grammar during evolution-site discovery. Before the bound was resolved in {@code + * Wrapper.classify}, {@code getRawType} resolved the variable to {@code Optional} but the bare + * variable carried no type arguments, so the OPTIONAL branch threw {@link + * IndexOutOfBoundsException} reading the missing element. The bean inside the bound must be + * discovered instead. + */ + @Test + public void typeVariableBoundedToWrapperResolvesToInnerBean() { + TypeVariable boundedToOptionalBean = OptionalBeanHolder.class.getTypeParameters()[0]; + TypeResolutionContext ctx = + new TypeResolutionContext(CustomTypeEncoderRegistry.customTypeHandler(), true); + + assertEquals(SchemaHistory.evolutionBean(TypeRef.of(boundedToOptionalBean), ctx), Inner.class); + } + + /** {@code >}: a wrapper-bounded type variable for the test above. */ + private static final class OptionalBeanHolder> {} + + @lombok.Data + public static class Inner { + private String name; + } + + /** + * The writer-side hash is {@code current().strictHash()}, but the non-evolution row codec and the + * decode hot path infer the live layout straight from {@link TypeInference#inferSchema}. The two + * must produce byte-identical schemas, or a current-version payload misses the fast path. The + * history derives its per-version schema by sorting fields on the Java member name to match + * {@code inferSchema}'s {@code Descriptor.getDescriptors} order; this pins that equivalence so a + * drift in the sort key fails loudly. Fields are declared out of alphabetical order on purpose so + * the assertion has teeth. + */ + @Test + public void currentSchemaMatchesInferSchema() { + for (Class bean : + new Class[] { + OutOfOrderFields.class, NestedHolder.class, CollectionHolder.class, EvolvingBean.class + }) { + assertEquals( + SchemaHistory.build(bean, UnaryOperator.identity()).current().schema(), + TypeInference.inferSchema(bean), + "current() schema diverged from inferSchema for " + bean.getSimpleName()); + } + } + + @lombok.Data + public static class OutOfOrderFields { + private int zebra; + private String alpha; + private long mid; + } + + @lombok.Data + public static class NestedHolder { + private Inner inner; + private int count; + } + + @lombok.Data + public static class CollectionHolder { + private java.util.List items; + private java.util.Map byName; + } + + @lombok.Data + public static class EvolvingBean { + private int base; + + @ForyVersion(since = 2) + private String added; + } + + private static Field field(String name) { + return DataTypes.field(name, DataTypes.int32(), false); + } +} diff --git a/java/fory-latest-jdk-tests/src/test/java/org/apache/fory/integration_tests/RecordRowTest.java b/java/fory-latest-jdk-tests/src/test/java/org/apache/fory/integration_tests/RecordRowTest.java index 99c61c64ce..4e9089a585 100644 --- a/java/fory-latest-jdk-tests/src/test/java/org/apache/fory/integration_tests/RecordRowTest.java +++ b/java/fory-latest-jdk-tests/src/test/java/org/apache/fory/integration_tests/RecordRowTest.java @@ -21,6 +21,8 @@ import java.time.Instant; import java.time.LocalDate; +import org.apache.fory.format.annotation.ForySchema; +import org.apache.fory.format.annotation.ForyVersion; import org.apache.fory.format.encoder.Encoders; import org.apache.fory.format.encoder.RowEncoder; import org.apache.fory.format.row.binary.BinaryRow; @@ -86,4 +88,107 @@ public void testRecordNestedInterface() { final TestRecordNestedInterface deserializedBean = encoder.fromRow(row); Assert.assertEquals(deserializedBean.f1().f1(), bean.f1().f1()); } + + // --------------------------------------------------------------------------- + // Records with schema evolution. @ForyVersion on a record component propagates + // to the backing field and the accessor (its FIELD/METHOD targets), where the + // codec reads it, so a newer reader record can pick up older payloads and + // default components added later. The history interface still works because the + // bean is a record: live component names match the wire field names (record + // short-style naming). + // --------------------------------------------------------------------------- + + public record PersonV1(String name, int age) {} + + @ForySchema(removedFields = PersonV2.History.class) + public record PersonV2(String name, @ForyVersion(since = 2) String email) { + interface History { + @ForyVersion(until = 2) + int age(); + } + } + + @Test + public void recordSchemaEvolution_readsOlderPayloads() { + RowEncoder writer = + Encoders.buildBeanCodec(PersonV1.class).withSchemaEvolution().build().get(); + RowEncoder reader = + Encoders.buildBeanCodec(PersonV2.class).withSchemaEvolution().build().get(); + PersonV2 out = reader.decode(writer.encode(new PersonV1("Luna", 7))); + Assert.assertEquals(out.name(), "Luna"); + Assert.assertNull(out.email()); + } + + @Test + public void recordSchemaEvolution_currentRoundTrip() { + RowEncoder codec = + Encoders.buildBeanCodec(PersonV2.class).withSchemaEvolution().build().get(); + PersonV2 in = new PersonV2("Mars", "mars@example.com"); + Assert.assertEquals(codec.decode(codec.encode(in)), in); + } + + /** Record with a primitive added at v2: an older payload must produce the primitive default. */ + public record CounterV1(String name) {} + + public record CounterV2(String name, @ForyVersion(since = 2) int count) {} + + @Test + public void recordSchemaEvolution_primitiveDefault() { + RowEncoder writer = + Encoders.buildBeanCodec(CounterV1.class).withSchemaEvolution().build().get(); + RowEncoder reader = + Encoders.buildBeanCodec(CounterV2.class).withSchemaEvolution().build().get(); + CounterV2 out = reader.decode(writer.encode(new CounterV1("Luna"))); + Assert.assertEquals(out.name(), "Luna"); + Assert.assertEquals(out.count(), 0); + } + + // A record component whose own type is a versioned record. The inner struct is + // inline in the outer's bytes with no per-inner hash, so the reader must pick an + // inner schema consistent with the outer's strict hash. This drives the nested + // cross-product enumeration with record-component field naming. + public record InnerV1(String name) {} + + public record InnerV2(String name, @ForyVersion(since = 2) String tag) {} + + public record OuterInnerV1(long id, InnerV1 inner) {} + + public record OuterInnerV2(long id, InnerV2 inner) {} + + @Test + public void recordSchemaEvolution_nestedRecordInnerNewerThanWriter() { + RowEncoder writer = + Encoders.buildBeanCodec(OuterInnerV1.class).withSchemaEvolution().build().get(); + RowEncoder reader = + Encoders.buildBeanCodec(OuterInnerV2.class).withSchemaEvolution().build().get(); + OuterInnerV2 out = reader.decode(writer.encode(new OuterInnerV1(42, new InnerV1("hello")))); + Assert.assertEquals(out.id(), 42); + Assert.assertEquals(out.inner().name(), "hello"); + Assert.assertNull(out.inner().tag()); + } + + // A reference component added at v2 is absent from a v1 payload, so decode supplies null + // for it and the record's canonical constructor runs with that null. A constructor that + // rejects null for the added component would throw during decode; the supported pattern is + // to tolerate the missing value, e.g. by normalizing null to a default in the constructor. + public record DefaultedV1(String name) {} + + public record DefaultedV2(String name, @ForyVersion(since = 2) String email) { + public DefaultedV2 { + if (email == null) { + email = "unknown"; + } + } + } + + @Test + public void recordSchemaEvolution_constructorDefaultsAddedComponent() { + RowEncoder writer = + Encoders.buildBeanCodec(DefaultedV1.class).withSchemaEvolution().build().get(); + RowEncoder reader = + Encoders.buildBeanCodec(DefaultedV2.class).withSchemaEvolution().build().get(); + DefaultedV2 out = reader.decode(writer.encode(new DefaultedV1("Luna"))); + Assert.assertEquals(out.name(), "Luna"); + Assert.assertEquals(out.email(), "unknown"); + } }