diff --git a/cpp/fory/serialization/collection_serializer.h b/cpp/fory/serialization/collection_serializer.h index 3768275d14..4193526855 100644 --- a/cpp/fory/serialization/collection_serializer.h +++ b/cpp/fory/serialization/collection_serializer.h @@ -392,6 +392,13 @@ inline void collection_insert(Container &result, T &&elem) { /// Read collection data for polymorphic or shared-ref elements. template inline Container read_collection_data_slow(ReadContext &ctx, uint32_t length) { + // Guardrail: Enforce max_collection_size for collection reads + if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) { + ctx.set_error( + Error::invalid_data("Collection length exceeds max_collection_size")); + return Container{}; + } + Container result; if constexpr (has_reserve_v) { result.reserve(length); @@ -611,15 +618,22 @@ struct Serializer< if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::vector(); } + // Guardrail: Enforce max_binary_size for binary byte-length reads + if (FORY_PREDICT_FALSE(total_bytes_u32 > ctx.config().max_binary_size)) { + ctx.set_error(Error::invalid_data("Binary size exceeds max_binary_size")); + return std::vector(); + } if (sizeof(T) == 0) { return std::vector(); } + + size_t elem_count = total_bytes_u32 / sizeof(T); + if (total_bytes_u32 % sizeof(T) != 0) { ctx.set_error(Error::invalid_data( "Vector byte size not aligned with element size")); return std::vector(); } - size_t elem_count = total_bytes_u32 / sizeof(T); std::vector result(elem_count); if (total_bytes_u32 > 0) { ctx.read_bytes(result.data(), static_cast(total_bytes_u32), @@ -677,6 +691,13 @@ struct Serializer< if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::vector(); } + + if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) { + ctx.set_error( + Error::invalid_data("Collection length exceeds max_collection_size")); + return std::vector(); + } + // Per xlang spec: header and type_info are omitted when length is 0 if (length == 0) { return std::vector(); @@ -808,6 +829,13 @@ struct Serializer< if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::vector(); } + + if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { + ctx.set_error( + Error::invalid_data("Collection length exceeds max_collection_size")); + return std::vector(); + } + std::vector result; result.reserve(size); for (uint32_t i = 0; i < size; ++i) { @@ -897,6 +925,12 @@ template struct Serializer> { if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::vector(); } + + if (FORY_PREDICT_FALSE(size > ctx.config().max_binary_size)) { + ctx.set_error(Error::invalid_data("Binary size exceeds max_binary_size")); + return std::vector(); + } + std::vector result(size); // Fast path: bulk read all bytes at once if we have enough buffer Buffer &buffer = ctx.buffer(); @@ -971,6 +1005,13 @@ template struct Serializer> { if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::list(); } + + if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) { + ctx.set_error( + Error::invalid_data("Collection length exceeds max_collection_size")); + return std::list(); + } + // Per xlang spec: header and type_info are omitted when length is 0 if (length == 0) { return std::list(); @@ -1101,6 +1142,13 @@ template struct Serializer> { if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::list(); } + + if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { + ctx.set_error( + Error::invalid_data("Collection length exceeds max_collection_size")); + return std::list(); + } + std::list result; for (uint32_t i = 0; i < size; ++i) { if (FORY_PREDICT_FALSE(ctx.has_error())) { @@ -1161,6 +1209,13 @@ template struct Serializer> { if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::deque(); } + + if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) { + ctx.set_error( + Error::invalid_data("Collection length exceeds max_collection_size")); + return std::deque(); + } + // Per xlang spec: header and type_info are omitted when length is 0 if (length == 0) { return std::deque(); @@ -1291,6 +1346,13 @@ template struct Serializer> { if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::deque(); } + + if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { + ctx.set_error( + Error::invalid_data("Collection length exceeds max_collection_size")); + return std::deque(); + } + std::deque result; for (uint32_t i = 0; i < size; ++i) { if (FORY_PREDICT_FALSE(ctx.has_error())) { @@ -1352,6 +1414,13 @@ struct Serializer> { if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::forward_list(); } + + if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) { + ctx.set_error( + Error::invalid_data("Collection length exceeds max_collection_size")); + return std::forward_list(); + } + // Per xlang spec: header and type_info are omitted when length is 0 if (length == 0) { return std::forward_list(); @@ -1716,6 +1785,13 @@ struct Serializer> { if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::forward_list(); } + + if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { + ctx.set_error( + Error::invalid_data("Collection length exceeds max_collection_size")); + return std::forward_list(); + } + std::vector temp; temp.reserve(size); for (uint32_t i = 0; i < size; ++i) { @@ -1814,6 +1890,13 @@ struct Serializer> { if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::set(); } + + if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { + ctx.set_error( + Error::invalid_data("Collection length exceeds max_collection_size")); + return std::set(); + } + // Per xlang spec: header and type_info are omitted when length is 0 if (size == 0) { return std::set(); @@ -1894,6 +1977,13 @@ struct Serializer> { if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::set(); } + + if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { + ctx.set_error( + Error::invalid_data("Collection length exceeds max_collection_size")); + return std::set(); + } + std::set result; for (uint32_t i = 0; i < size; ++i) { if (FORY_PREDICT_FALSE(ctx.has_error())) { @@ -1988,6 +2078,12 @@ struct Serializer> { return std::unordered_set(); } + if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { + ctx.set_error( + Error::invalid_data("Collection length exceeds max_collection_size")); + return std::unordered_set(); + } + // Per xlang spec: header and type_info are omitted when length is 0 if (size == 0) { return std::unordered_set(); @@ -2070,6 +2166,13 @@ struct Serializer> { if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::unordered_set(); } + + if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { + ctx.set_error( + Error::invalid_data("Collection length exceeds max_collection_size")); + return std::unordered_set(); + } + std::unordered_set result; result.reserve(size); for (uint32_t i = 0; i < size; ++i) { diff --git a/cpp/fory/serialization/collection_serializer_test.cc b/cpp/fory/serialization/collection_serializer_test.cc index 0394ff2566..e50afa2901 100644 --- a/cpp/fory/serialization/collection_serializer_test.cc +++ b/cpp/fory/serialization/collection_serializer_test.cc @@ -620,6 +620,42 @@ TEST(CollectionSerializerTest, ForwardListEmptyRoundTrip) { EXPECT_TRUE(deserialized.strings.empty()); } +// Test max_collection_size using objects (e.g., strings) +TEST(CollectionSerializerTest, MaxCollectionSizeNativeGuardrail) { + auto fory = Fory::builder().xlang(false).max_collection_size(2).build(); + fory.register_struct(200); + + VectorStringHolder original; + original.strings = {"A", "B", "C"}; + + auto bytes_result = fory.serialize(original); + ASSERT_TRUE(bytes_result.ok()); + + auto deserialize_result = fory.deserialize( + bytes_result->data(), bytes_result->size()); + + ASSERT_FALSE(deserialize_result.ok()); + EXPECT_TRUE(deserialize_result.error().message().find( + "exceeds max_collection_size") != std::string::npos); +} + +// Test max_binary_size using primitive numbers +TEST(CollectionSerializerTest, MaxBinarySizeNativeGuardrail) { + auto fory = Fory::builder().xlang(false).max_binary_size(10).build(); + + std::vector large_data = {1, 2, 3, 4, 5}; + + auto bytes_result = fory.serialize(large_data); + ASSERT_TRUE(bytes_result.ok()); + + auto deserialize_result = fory.deserialize>( + bytes_result->data(), bytes_result->size()); + + ASSERT_FALSE(deserialize_result.ok()); + EXPECT_TRUE(deserialize_result.error().message().find( + "exceeds max_binary_size") != std::string::npos); +} + } // namespace } // namespace serialization } // namespace fory diff --git a/cpp/fory/serialization/config.h b/cpp/fory/serialization/config.h index d471c39074..63062c7ee5 100644 --- a/cpp/fory/serialization/config.h +++ b/cpp/fory/serialization/config.h @@ -52,6 +52,12 @@ struct Config { /// When enabled, avoids duplicating shared objects and handles cycles. bool track_ref = true; + /// Maximum allowed size for binary data in bytes. + uint32_t max_binary_size = 64 * 1024 * 1024; // 64MB default + + /// Maximum allowed number of elements in a collection or entries in a map. + uint32_t max_collection_size = 1024 * 1024; // 1M elements default + /// Default constructor with sensible defaults Config() = default; }; diff --git a/cpp/fory/serialization/context.h b/cpp/fory/serialization/context.h index e080604f67..8fba3b6121 100644 --- a/cpp/fory/serialization/context.h +++ b/cpp/fory/serialization/context.h @@ -643,6 +643,9 @@ class ReadContext { /// reset context for reuse. void reset(); + /// get associated configuration. + inline const Config &config() const { return *config_; } + private: // Error state - accumulated during deserialization, checked at the end Error error_; diff --git a/cpp/fory/serialization/fory.h b/cpp/fory/serialization/fory.h index 1c5f19522c..25cd5ec255 100644 --- a/cpp/fory/serialization/fory.h +++ b/cpp/fory/serialization/fory.h @@ -123,6 +123,19 @@ class ForyBuilder { /// Build a thread-safe Fory instance (uses context pools). ThreadSafeFory build_thread_safe(); + /// Set the maximum allowed size for binary data in bytes. + inline ForyBuilder &max_binary_size(uint32_t size) { + config_.max_binary_size = size; + return *this; + } + + /// Set the maximum allowed number of elements in a collection or entries in a + /// map. + inline ForyBuilder &max_collection_size(uint32_t size) { + config_.max_collection_size = size; + return *this; + } + private: Config config_; std::shared_ptr type_resolver_; diff --git a/cpp/fory/serialization/map_serializer.h b/cpp/fory/serialization/map_serializer.h index dd2952da99..ace9e297b3 100644 --- a/cpp/fory/serialization/map_serializer.h +++ b/cpp/fory/serialization/map_serializer.h @@ -551,6 +551,13 @@ inline MapType read_map_data_fast(ReadContext &ctx, uint32_t length) { static_assert(!is_shared_ref_v && !is_shared_ref_v, "Fast path is for non-shared-ref types only"); + // Guardrail: Enforce max_collection_size for map reads (entry count) + if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) { + ctx.set_error( + Error::invalid_data("Map entry count exceeds max_collection_size")); + return MapType{}; + } + MapType result; MapReserver::reserve(result, length); @@ -682,6 +689,13 @@ inline MapType read_map_data_fast(ReadContext &ctx, uint32_t length) { /// Read map data for polymorphic or shared-ref maps template inline MapType read_map_data_slow(ReadContext &ctx, uint32_t length) { + // Guardrail: Enforce max_collection_size for map reads (entry count) + if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) { + ctx.set_error( + Error::invalid_data("Map entry count exceeds max_collection_size")); + return MapType{}; + } + MapType result; MapReserver::reserve(result, length); diff --git a/cpp/fory/serialization/map_serializer_test.cc b/cpp/fory/serialization/map_serializer_test.cc index bf91e939f1..6f27c17294 100644 --- a/cpp/fory/serialization/map_serializer_test.cc +++ b/cpp/fory/serialization/map_serializer_test.cc @@ -780,6 +780,22 @@ TEST(MapSerializerTest, LargeMapWithPolymorphicValues) { EXPECT_EQ(deserialized[299]->name, "value_y_299"); } +TEST(MapSerializerTest, MaxMapSizeGuardrail) { + auto fory = Fory::builder().xlang(true).max_collection_size(2).build(); + + std::map large_map = {{"a", 1}, {"b", 2}, {"c", 3}}; + + auto serialize_result = fory.serialize(large_map); + ASSERT_TRUE(serialize_result.ok()); + + auto deserialize_result = fory.deserialize>( + serialize_result->data(), serialize_result->size()); + + ASSERT_FALSE(deserialize_result.ok()); + EXPECT_TRUE(deserialize_result.error().message().find( + "exceeds max_collection_size") != std::string::npos); +} + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/cpp/fory/serialization/unsigned_serializer.h b/cpp/fory/serialization/unsigned_serializer.h index e53ff250f2..5705b5ceb2 100644 --- a/cpp/fory/serialization/unsigned_serializer.h +++ b/cpp/fory/serialization/unsigned_serializer.h @@ -703,6 +703,12 @@ template <> struct Serializer> { static inline std::vector read_data(ReadContext &ctx) { uint32_t length = ctx.read_var_uint32(ctx.error()); + + if (FORY_PREDICT_FALSE(length > ctx.config().max_binary_size)) { + ctx.set_error(Error::invalid_data("Binary size exceeds max_binary_size")); + return std::vector(); + } + if (FORY_PREDICT_FALSE(length > ctx.buffer().remaining_size())) { ctx.set_error( Error::invalid_data("Invalid length: " + std::to_string(length))); @@ -798,6 +804,12 @@ template <> struct Serializer> { if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::vector(); } + + if (FORY_PREDICT_FALSE(total_bytes > ctx.config().max_binary_size)) { + ctx.set_error(Error::invalid_data("Binary size exceeds max_binary_size")); + return std::vector(); + } + if (total_bytes % sizeof(uint16_t) != 0) { ctx.set_error(Error::invalid_data("Invalid length: " + std::to_string(total_bytes))); @@ -900,6 +912,12 @@ template <> struct Serializer> { if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::vector(); } + + if (FORY_PREDICT_FALSE(total_bytes > ctx.config().max_binary_size)) { + ctx.set_error(Error::invalid_data("Binary size exceeds max_binary_size")); + return std::vector(); + } + if (total_bytes % sizeof(uint32_t) != 0) { ctx.set_error(Error::invalid_data("Invalid length: " + std::to_string(total_bytes))); @@ -1002,6 +1020,12 @@ template <> struct Serializer> { if (FORY_PREDICT_FALSE(ctx.has_error())) { return std::vector(); } + + if (FORY_PREDICT_FALSE(total_bytes > ctx.config().max_binary_size)) { + ctx.set_error(Error::invalid_data("Binary size exceeds max_binary_size")); + return std::vector(); + } + if (total_bytes % sizeof(uint64_t) != 0) { ctx.set_error(Error::invalid_data("Invalid length: " + std::to_string(total_bytes))); diff --git a/cpp/fory/serialization/unsigned_serializer_test.cc b/cpp/fory/serialization/unsigned_serializer_test.cc index 30196c1deb..75173515ed 100644 --- a/cpp/fory/serialization/unsigned_serializer_test.cc +++ b/cpp/fory/serialization/unsigned_serializer_test.cc @@ -271,6 +271,24 @@ TEST(UnsignedSerializerTest, UnsignedArrayTypeIdsAreDistinct) { static_cast(TypeId::BINARY)); } +TEST(UnsignedSerializerTest, MaxBinarySizeNativeGuardrail) { + // Set limit to 10 bytes + auto fory = Fory::builder().xlang(false).max_binary_size(10).build(); + + // 10 elements of uint32_t = 40 bytes > 10 byte limit + std::vector large_data(10, 42); + + auto bytes_result = fory.serialize(large_data); + ASSERT_TRUE(bytes_result.ok()); + + auto deserialize_result = fory.deserialize>( + bytes_result->data(), bytes_result->size()); + + ASSERT_FALSE(deserialize_result.ok()); + EXPECT_TRUE(deserialize_result.error().message().find( + "exceeds max_binary_size") != std::string::npos); +} + } // namespace test } // namespace serialization } // namespace fory