-
Notifications
You must be signed in to change notification settings - Fork 400
feat(c++): add configurable deserialization size guardrails #3455
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
chaokunyang
merged 6 commits into
apache:main
from
shivendra-dev54:configurable_size_guardrails_for_untrusted_payloads
Mar 10, 2026
+234
−1
Merged
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
e029167
feat(c++): add configurable deserialization size guardrails
shivendra-dev54 88e9197
style: format C++ code with clang-format
shivendra-dev54 4b28141
Updated the arithmetic vector path to use max_binary_size instead of …
shivendra-dev54 6628008
style: format C++ code with clang-format
shivendra-dev54 db9d347
refactor(c++): harden deserialization guardrails and expand native tests
shivendra-dev54 5e65bef
refactor(c++): complete guardrail coverage for unsigned types and nat…
shivendra-dev54 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -392,6 +392,13 @@ inline void collection_insert(Container &result, T &&elem) { | |
| /// Read collection data for polymorphic or shared-ref elements. | ||
| template <typename T, typename Container> | ||
| inline Container read_collection_data_slow(ReadContext &ctx, uint32_t length) { | ||
| // Guardrail: Enforce max_collection_size for collection reads | ||
| if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) { | ||
| ctx.set_error( | ||
| Error::invalid_data("Collection length exceeds max_collection_size")); | ||
| return Container{}; | ||
| } | ||
|
|
||
| Container result; | ||
| if constexpr (has_reserve_v<Container>) { | ||
| result.reserve(length); | ||
|
|
@@ -611,15 +618,22 @@ struct Serializer< | |
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
| return std::vector<T, Alloc>(); | ||
| } | ||
| // Guardrail: Enforce max_binary_size for binary byte-length reads | ||
| if (FORY_PREDICT_FALSE(total_bytes_u32 > ctx.config().max_binary_size)) { | ||
| ctx.set_error(Error::invalid_data("Binary size exceeds max_binary_size")); | ||
| return std::vector<T, Alloc>(); | ||
| } | ||
| if (sizeof(T) == 0) { | ||
| return std::vector<T, Alloc>(); | ||
| } | ||
|
|
||
| size_t elem_count = total_bytes_u32 / sizeof(T); | ||
|
|
||
| if (total_bytes_u32 % sizeof(T) != 0) { | ||
| ctx.set_error(Error::invalid_data( | ||
| "Vector byte size not aligned with element size")); | ||
| return std::vector<T, Alloc>(); | ||
| } | ||
| size_t elem_count = total_bytes_u32 / sizeof(T); | ||
| std::vector<T, Alloc> result(elem_count); | ||
| if (total_bytes_u32 > 0) { | ||
| ctx.read_bytes(result.data(), static_cast<uint32_t>(total_bytes_u32), | ||
|
|
@@ -677,6 +691,13 @@ struct Serializer< | |
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
| return std::vector<T, Alloc>(); | ||
| } | ||
|
|
||
| if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) { | ||
| ctx.set_error( | ||
| Error::invalid_data("Collection length exceeds max_collection_size")); | ||
| return std::vector<T, Alloc>(); | ||
| } | ||
|
|
||
| // Per xlang spec: header and type_info are omitted when length is 0 | ||
| if (length == 0) { | ||
| return std::vector<T, Alloc>(); | ||
|
|
@@ -808,6 +829,13 @@ struct Serializer< | |
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
| return std::vector<T, Alloc>(); | ||
| } | ||
|
|
||
| if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { | ||
| ctx.set_error( | ||
| Error::invalid_data("Collection length exceeds max_collection_size")); | ||
| return std::vector<T, Alloc>(); | ||
| } | ||
|
|
||
| std::vector<T, Alloc> result; | ||
| result.reserve(size); | ||
| for (uint32_t i = 0; i < size; ++i) { | ||
|
|
@@ -897,6 +925,12 @@ template <typename Alloc> struct Serializer<std::vector<bool, Alloc>> { | |
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
| return std::vector<bool, Alloc>(); | ||
| } | ||
|
|
||
| if (FORY_PREDICT_FALSE(size > ctx.config().max_binary_size)) { | ||
| ctx.set_error(Error::invalid_data("Binary size exceeds max_binary_size")); | ||
| return std::vector<bool, Alloc>(); | ||
| } | ||
|
|
||
| std::vector<bool, Alloc> result(size); | ||
| // Fast path: bulk read all bytes at once if we have enough buffer | ||
| Buffer &buffer = ctx.buffer(); | ||
|
|
@@ -971,6 +1005,13 @@ template <typename T, typename Alloc> struct Serializer<std::list<T, Alloc>> { | |
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
| return std::list<T, Alloc>(); | ||
| } | ||
|
|
||
| if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) { | ||
| ctx.set_error( | ||
| Error::invalid_data("Collection length exceeds max_collection_size")); | ||
| return std::list<T, Alloc>(); | ||
| } | ||
|
|
||
| // Per xlang spec: header and type_info are omitted when length is 0 | ||
| if (length == 0) { | ||
| return std::list<T, Alloc>(); | ||
|
|
@@ -1101,6 +1142,13 @@ template <typename T, typename Alloc> struct Serializer<std::list<T, Alloc>> { | |
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
| return std::list<T, Alloc>(); | ||
| } | ||
|
|
||
| if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { | ||
| ctx.set_error( | ||
| Error::invalid_data("Collection length exceeds max_collection_size")); | ||
| return std::list<T, Alloc>(); | ||
| } | ||
|
|
||
| std::list<T, Alloc> result; | ||
| for (uint32_t i = 0; i < size; ++i) { | ||
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
|
|
@@ -1161,6 +1209,13 @@ template <typename T, typename Alloc> struct Serializer<std::deque<T, Alloc>> { | |
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
| return std::deque<T, Alloc>(); | ||
| } | ||
|
|
||
| if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) { | ||
| ctx.set_error( | ||
| Error::invalid_data("Collection length exceeds max_collection_size")); | ||
| return std::deque<T, Alloc>(); | ||
| } | ||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is still an unguarded preallocation in forward_list::read: temp.reserve(length) happens before any max_collection_size validation. A malicious length can force a large allocation before we fail. Please validate length immediately after reading it, before reserve. |
||
| // Per xlang spec: header and type_info are omitted when length is 0 | ||
| if (length == 0) { | ||
| return std::deque<T, Alloc>(); | ||
|
|
@@ -1291,6 +1346,13 @@ template <typename T, typename Alloc> struct Serializer<std::deque<T, Alloc>> { | |
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
| return std::deque<T, Alloc>(); | ||
| } | ||
|
|
||
| if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { | ||
| ctx.set_error( | ||
| Error::invalid_data("Collection length exceeds max_collection_size")); | ||
| return std::deque<T, Alloc>(); | ||
| } | ||
|
|
||
| std::deque<T, Alloc> result; | ||
| for (uint32_t i = 0; i < size; ++i) { | ||
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
|
|
@@ -1352,6 +1414,13 @@ struct Serializer<std::forward_list<T, Alloc>> { | |
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
| return std::forward_list<T, Alloc>(); | ||
| } | ||
|
|
||
| if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) { | ||
| ctx.set_error( | ||
| Error::invalid_data("Collection length exceeds max_collection_size")); | ||
| return std::forward_list<T, Alloc>(); | ||
| } | ||
|
|
||
| // Per xlang spec: header and type_info are omitted when length is 0 | ||
| if (length == 0) { | ||
| return std::forward_list<T, Alloc>(); | ||
|
|
@@ -1716,6 +1785,13 @@ struct Serializer<std::forward_list<T, Alloc>> { | |
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
| return std::forward_list<T, Alloc>(); | ||
| } | ||
|
|
||
| if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { | ||
| ctx.set_error( | ||
| Error::invalid_data("Collection length exceeds max_collection_size")); | ||
| return std::forward_list<T, Alloc>(); | ||
| } | ||
|
|
||
| std::vector<T> temp; | ||
| temp.reserve(size); | ||
| for (uint32_t i = 0; i < size; ++i) { | ||
|
|
@@ -1814,6 +1890,13 @@ struct Serializer<std::set<T, Args...>> { | |
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
| return std::set<T, Args...>(); | ||
| } | ||
|
|
||
| if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { | ||
| ctx.set_error( | ||
| Error::invalid_data("Collection length exceeds max_collection_size")); | ||
| return std::set<T, Args...>(); | ||
| } | ||
|
|
||
| // Per xlang spec: header and type_info are omitted when length is 0 | ||
| if (size == 0) { | ||
| return std::set<T, Args...>(); | ||
|
|
@@ -1894,6 +1977,13 @@ struct Serializer<std::set<T, Args...>> { | |
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
| return std::set<T, Args...>(); | ||
| } | ||
|
|
||
| if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { | ||
| ctx.set_error( | ||
| Error::invalid_data("Collection length exceeds max_collection_size")); | ||
| return std::set<T, Args...>(); | ||
| } | ||
|
|
||
| std::set<T, Args...> result; | ||
| for (uint32_t i = 0; i < size; ++i) { | ||
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
|
|
@@ -1988,6 +2078,12 @@ struct Serializer<std::unordered_set<T, Args...>> { | |
| return std::unordered_set<T, Args...>(); | ||
| } | ||
|
|
||
| if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { | ||
| ctx.set_error( | ||
| Error::invalid_data("Collection length exceeds max_collection_size")); | ||
| return std::unordered_set<T, Args...>(); | ||
| } | ||
|
|
||
| // Per xlang spec: header and type_info are omitted when length is 0 | ||
| if (size == 0) { | ||
| return std::unordered_set<T, Args...>(); | ||
|
|
@@ -2070,6 +2166,13 @@ struct Serializer<std::unordered_set<T, Args...>> { | |
| if (FORY_PREDICT_FALSE(ctx.has_error())) { | ||
| return std::unordered_set<T, Args...>(); | ||
| } | ||
|
|
||
| if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) { | ||
| ctx.set_error( | ||
| Error::invalid_data("Collection length exceeds max_collection_size")); | ||
| return std::unordered_set<T, Args...>(); | ||
| } | ||
|
|
||
| std::unordered_set<T, Args...> result; | ||
| result.reserve(size); | ||
| for (uint32_t i = 0; i < size; ++i) { | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice addition on the read() path. One gap remains: several read_data() paths still trust payload size without max_collection_size checks (for example vector non-arithmetic, vector, list, deque, and forward_list). Those paths are used in native/nested deserialization, so guardrails can still be bypassed there.