From 0a5c787992901b0847af013554c99d12612a0463 Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Wed, 30 Jun 2021 16:20:10 +1000 Subject: [PATCH 01/15] cborparser: Document `cbor_parser_init_reader`. Describe the input parameters for the function and how they are used as best we understand from on-paper analysis of the C code. --- src/cborparser.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/cborparser.c b/src/cborparser.c index 31c8d8bf..0c7f6407 100644 --- a/src/cborparser.c +++ b/src/cborparser.c @@ -345,6 +345,23 @@ CborError cbor_parser_init(const uint8_t *buffer, size_t size, uint32_t flags, C return preparse_value(it); } +/** + * Initializes the CBOR parser for parsing a document that is read by an + * abstract reader interface defined by \a ops. The iterator to the first + * element is returned in \a it. + * + * The \a parser structure needs to remain valid throughout the decoding + * process. It is not thread-safe to share one CborParser among multiple + * threads iterating at the same time, but the object can be copied so multiple + * threads can iterate. + * + * The \a ops structure defines functions that implement the read process from + * the buffer given, see \ref CborParserOperations for further details. + * + * The \a token is passed as the first argument to all + * \ref CborParserOperations methods, and may be used to pass additional + * context information to the reader implementation. + */ CborError cbor_parser_init_reader(const struct CborParserOperations *ops, CborParser *parser, CborValue *it, void *token) { memset(parser, 0, sizeof(*parser)); From 6820d654d5b5cbcc80df0281c25ecc8b7ddbaeed Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Wed, 30 Jun 2021 17:41:45 +1000 Subject: [PATCH 02/15] cbor: Document the reader interface. --- src/cbor.h | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/src/cbor.h b/src/cbor.h index d5570265..aae547c1 100644 --- a/src/cbor.h +++ b/src/cbor.h @@ -319,11 +319,80 @@ enum CborParserIteratorFlags }; struct CborValue; + +/** + * Defines an interface for abstract document readers. This structure is used + * in conjunction with \ref cbor_parser_init_reader to define how the various + * required operations are to be implemented. + */ struct CborParserOperations { + /** + * Determines whether \a len bytes may be read from the reader. This is + * called before \ref read_bytes and \ref transfer_bytes to ensure it is safe + * to read the requested number of bytes from the reader. + * + * \param token An opaque object passed to \ref cbor_parser_init_reader + * that may be used to pass context information between the + * \ref CborParserOperations methods. + * + * \param len The number of bytes sought. + * + * \retval true \a len bytes may be read from the reader. + * \retval false Insufficient data is available to be read at this time. + */ bool (*can_read_bytes)(void *token, size_t len); + + /** + * Reads \a len bytes from the reader starting at \a offset bytes from + * the current read position and copies them to \a dst. The read pointer + * is *NOT* modified by this operation. + * + * \param token An opaque object passed to \ref cbor_parser_init_reader + * that may be used to pass context information between the + * \ref CborParserOperations methods. + * + * \param dst The buffer the read bytes will be copied to. + * + * \param offset The starting position for the read relative to the + * current read position. + * + * \param len The number of bytes sought. + */ void *(*read_bytes)(void *token, void *dst, size_t offset, size_t len); + + /** + * Skips past \a len bytes from the reader without reading them. The read + * pointer is advanced in the process. + * + * \param token An opaque object passed to \ref cbor_parser_init_reader + * that may be used to pass context information between the + * \ref CborParserOperations methods. + * + * \param len The number of bytes skipped. + */ void (*advance_bytes)(void *token, size_t len); + + /** + * Overwrite the user-supplied pointer \a userptr with the address where the + * data indicated by \a offset is located, then advance the read pointer + * \a len bytes beyond that point. + * + * This routine is used for accessing strings embedded in CBOR documents + * (both text and binary strings). + * + * \param token An opaque object passed to \ref cbor_parser_init_reader + * that may be used to pass context information between the + * \ref CborParserOperations methods. + * + * \param userptr The pointer that will be updated to reference the location + * of the data in the buffer. + * + * \param offset The starting position for the read relative to the + * current read position. + * + * \param len The number of bytes sought. + */ CborError (*transfer_string)(void *token, const void **userptr, size_t offset, size_t len); }; From cacf527341fdebf40afe811d468b5bbd8a8854e4 Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Sat, 3 Jul 2021 11:56:13 +1000 Subject: [PATCH 03/15] cborencoder: Document the write callback function. What is not known, is what the significance is of `CborEncoderAppendType`. It basically tells the writer the nature of the data being written, but the default implementation ignores this and just blindly appends it no matter what. That raises the question of why it's important enough that the writer function needs to know about it. --- src/cbor.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/cbor.h b/src/cbor.h index aae547c1..491932ab 100644 --- a/src/cbor.h +++ b/src/cbor.h @@ -214,7 +214,20 @@ typedef enum CborEncoderAppendType CborEncoderAppendRawData = 2 } CborEncoderAppendType; -typedef CborError (*CborEncoderWriteFunction)(void *, const void *, size_t, CborEncoderAppendType); +/** + * Writer interface call-back function. When there is data to be written to + * the CBOR document, this routine will be called. The \a token parameter is + * taken from the \a token argument provided to \ref cbor_encoder_init_writer + * and may be used in any way the writer function sees fit. + * + * The \a data parameter contains a pointer to the raw bytes to be copied to + * the output buffer, with \a len specifying how long the payload is, which + * can be as small as a single byte or an entire (byte or text) string. + * + * The \a append parameter informs the writer function whether it is writing + * a string or general CBOR data. + */ +typedef CborError (*CborEncoderWriteFunction)(void *token, const void *data, size_t len, CborEncoderAppendType append); enum CborEncoderFlags { From 134864402ce792330bf8d67acf526d0e36726db9 Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Wed, 8 Sep 2021 08:22:53 +1000 Subject: [PATCH 04/15] cbor.h, cborparser.c: Migrate parser documentation. Not 100% sure of the syntax for documenting struct-members outside of the struct as I'm too used to doing it inline, hopefully this works as expected. :-) --- src/cbor.h | 69 +------------------------------------------ src/cborparser.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 68 deletions(-) diff --git a/src/cbor.h b/src/cbor.h index 491932ab..1ccf974f 100644 --- a/src/cbor.h +++ b/src/cbor.h @@ -333,79 +333,12 @@ enum CborParserIteratorFlags struct CborValue; -/** - * Defines an interface for abstract document readers. This structure is used - * in conjunction with \ref cbor_parser_init_reader to define how the various - * required operations are to be implemented. - */ + struct CborParserOperations { - /** - * Determines whether \a len bytes may be read from the reader. This is - * called before \ref read_bytes and \ref transfer_bytes to ensure it is safe - * to read the requested number of bytes from the reader. - * - * \param token An opaque object passed to \ref cbor_parser_init_reader - * that may be used to pass context information between the - * \ref CborParserOperations methods. - * - * \param len The number of bytes sought. - * - * \retval true \a len bytes may be read from the reader. - * \retval false Insufficient data is available to be read at this time. - */ bool (*can_read_bytes)(void *token, size_t len); - - /** - * Reads \a len bytes from the reader starting at \a offset bytes from - * the current read position and copies them to \a dst. The read pointer - * is *NOT* modified by this operation. - * - * \param token An opaque object passed to \ref cbor_parser_init_reader - * that may be used to pass context information between the - * \ref CborParserOperations methods. - * - * \param dst The buffer the read bytes will be copied to. - * - * \param offset The starting position for the read relative to the - * current read position. - * - * \param len The number of bytes sought. - */ void *(*read_bytes)(void *token, void *dst, size_t offset, size_t len); - - /** - * Skips past \a len bytes from the reader without reading them. The read - * pointer is advanced in the process. - * - * \param token An opaque object passed to \ref cbor_parser_init_reader - * that may be used to pass context information between the - * \ref CborParserOperations methods. - * - * \param len The number of bytes skipped. - */ void (*advance_bytes)(void *token, size_t len); - - /** - * Overwrite the user-supplied pointer \a userptr with the address where the - * data indicated by \a offset is located, then advance the read pointer - * \a len bytes beyond that point. - * - * This routine is used for accessing strings embedded in CBOR documents - * (both text and binary strings). - * - * \param token An opaque object passed to \ref cbor_parser_init_reader - * that may be used to pass context information between the - * \ref CborParserOperations methods. - * - * \param userptr The pointer that will be updated to reference the location - * of the data in the buffer. - * - * \param offset The starting position for the read relative to the - * current read position. - * - * \param len The number of bytes sought. - */ CborError (*transfer_string)(void *token, const void **userptr, size_t offset, size_t len); }; diff --git a/src/cborparser.c b/src/cborparser.c index 0c7f6407..a796bd0b 100644 --- a/src/cborparser.c +++ b/src/cborparser.c @@ -132,6 +132,83 @@ * \endif */ +/** + * \struct CborParserOperations + * + * Defines an interface for abstract document readers. This structure is used + * in conjunction with \ref cbor_parser_init_reader to define how the various + * required operations are to be implemented. + * + * + * \var CborParserOperations::can_read_bytes + * + * Determines whether \a len bytes may be read from the reader. This is + * called before \ref read_bytes and \ref transfer_bytes to ensure it is safe + * to read the requested number of bytes from the reader. + * + * \param token An opaque object passed to \ref cbor_parser_init_reader + * that may be used to pass context information between the + * \ref CborParserOperations methods. + * + * \param len The number of bytes sought. + * + * \retval true \a len bytes may be read from the reader. + * \retval false Insufficient data is available to be read at this time. + * + * + * \var CborParserOperations::read_bytes + * + * Reads \a len bytes from the reader starting at \a offset bytes from + * the current read position and copies them to \a dst. The read pointer + * is *NOT* modified by this operation. + * + * \param token An opaque object passed to \ref cbor_parser_init_reader + * that may be used to pass context information between the + * \ref CborParserOperations methods. + * + * \param dst The buffer the read bytes will be copied to. + * + * \param offset The starting position for the read relative to the + * current read position. + * + * \param len The number of bytes sought. + * + * + * \var CborParserOperations::advance_bytes + * + * Skips past \a len bytes from the reader without reading them. The read + * pointer is advanced in the process. + * + * \param token An opaque object passed to \ref cbor_parser_init_reader + * that may be used to pass context information between the + * \ref CborParserOperations methods. + * + * \param len The number of bytes skipped. + * + * + * \var CborParserOperations::transfer_string + * + * Overwrite the user-supplied pointer \a userptr with the address where the + * data indicated by \a offset is located, then advance the read pointer + * \a len bytes beyond that point. + * + * This routine is used for accessing strings embedded in CBOR documents + * (both text and binary strings). + * + * \param token An opaque object passed to \ref cbor_parser_init_reader + * that may be used to pass context information between the + * \ref CborParserOperations methods. + * + * \param userptr The pointer that will be updated to reference the location + * of the data in the buffer. + * + * \param offset The starting position for the read relative to the + * current read position. + * + * \param len The number of bytes sought. + */ + + static uint64_t extract_number_and_advance(CborValue *it) { /* This function is only called after we've verified that the number From a02319b0e01b0b7fce725c5cb644b3297ab260fc Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Wed, 8 Sep 2021 08:11:20 +1000 Subject: [PATCH 05/15] cbor.h, cborencoder.c: Migrate documentation for encoder functions --- src/cbor.h | 13 ------------- src/cborencoder.c | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/cbor.h b/src/cbor.h index 1ccf974f..6d34847a 100644 --- a/src/cbor.h +++ b/src/cbor.h @@ -214,19 +214,6 @@ typedef enum CborEncoderAppendType CborEncoderAppendRawData = 2 } CborEncoderAppendType; -/** - * Writer interface call-back function. When there is data to be written to - * the CBOR document, this routine will be called. The \a token parameter is - * taken from the \a token argument provided to \ref cbor_encoder_init_writer - * and may be used in any way the writer function sees fit. - * - * The \a data parameter contains a pointer to the raw bytes to be copied to - * the output buffer, with \a len specifying how long the payload is, which - * can be as small as a single byte or an entire (byte or text) string. - * - * The \a append parameter informs the writer function whether it is writing - * a string or general CBOR data. - */ typedef CborError (*CborEncoderWriteFunction)(void *token, const void *data, size_t len, CborEncoderAppendType append); enum CborEncoderFlags diff --git a/src/cborencoder.c b/src/cborencoder.c index b21c1da8..0f4bf03e 100644 --- a/src/cborencoder.c +++ b/src/cborencoder.c @@ -187,6 +187,23 @@ * Structure used to encode to CBOR. */ +/** + * \file cbor.h + * \typedef CborEncoderWriteFunction + * + * Writer interface call-back function. When there is data to be written to + * the CBOR document, this routine will be called. The \a token parameter is + * taken from the \a token argument provided to \ref cbor_encoder_init_writer + * and may be used in any way the writer function sees fit. + * + * The \a data parameter contains a pointer to the raw bytes to be copied to + * the output buffer, with \a len specifying how long the payload is, which + * can be as small as a single byte or an entire (byte or text) string. + * + * The \a append parameter informs the writer function whether it is writing + * a string or general CBOR data. + */ + /** * Initializes a CborEncoder structure \a encoder by pointing it to buffer \a * buffer of size \a size. The \a flags field is currently unused and must be From 03cdaebe9f564cbf36892f63fd297c3eff708aa6 Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Fri, 2 Jul 2021 14:02:00 +1000 Subject: [PATCH 06/15] cborparser: Pass CborValue to operation routines. The `token` parameter is not sufficient since it is effectively shared by all `CborValue` instances. Since `tinycbor` often uses a temporary `CborValue` context to perform some operation, we need to store our context inside that `CborValue` so that we don't pollute the global state of the reader. --- src/cbor.h | 8 ++++---- src/cborinternal_p.h | 16 ++++++++-------- src/cborparser.c | 19 +++++++------------ tests/parser/tst_parser.cpp | 16 ++++++++-------- 4 files changed, 27 insertions(+), 32 deletions(-) diff --git a/src/cbor.h b/src/cbor.h index 6d34847a..5bc4b77f 100644 --- a/src/cbor.h +++ b/src/cbor.h @@ -323,10 +323,10 @@ struct CborValue; struct CborParserOperations { - bool (*can_read_bytes)(void *token, size_t len); - void *(*read_bytes)(void *token, void *dst, size_t offset, size_t len); - void (*advance_bytes)(void *token, size_t len); - CborError (*transfer_string)(void *token, const void **userptr, size_t offset, size_t len); + bool (*can_read_bytes)(const struct CborValue *value, size_t len); + void *(*read_bytes)(const struct CborValue *value, void *dst, size_t offset, size_t len); + void (*advance_bytes)(struct CborValue *value, size_t len); + CborError (*transfer_string)(struct CborValue *value, const void **userptr, size_t offset, size_t len); }; struct CborParser diff --git a/src/cborinternal_p.h b/src/cborinternal_p.h index ee9c117e..80e88cad 100644 --- a/src/cborinternal_p.h +++ b/src/cborinternal_p.h @@ -207,9 +207,9 @@ static inline bool can_read_bytes(const CborValue *it, size_t n) if (CBOR_PARSER_READER_CONTROL >= 0) { if (it->parser->flags & CborParserFlag_ExternalSource || CBOR_PARSER_READER_CONTROL != 0) { #ifdef CBOR_PARSER_CAN_READ_BYTES_FUNCTION - return CBOR_PARSER_CAN_READ_BYTES_FUNCTION(it->source.token, n); + return CBOR_PARSER_CAN_READ_BYTES_FUNCTION(it, n); #else - return it->parser->source.ops->can_read_bytes(it->source.token, n); + return it->parser->source.ops->can_read_bytes(it, n); #endif } } @@ -225,9 +225,9 @@ static inline void advance_bytes(CborValue *it, size_t n) if (CBOR_PARSER_READER_CONTROL >= 0) { if (it->parser->flags & CborParserFlag_ExternalSource || CBOR_PARSER_READER_CONTROL != 0) { #ifdef CBOR_PARSER_ADVANCE_BYTES_FUNCTION - CBOR_PARSER_ADVANCE_BYTES_FUNCTION(it->source.token, n); + CBOR_PARSER_ADVANCE_BYTES_FUNCTION(it, n); #else - it->parser->source.ops->advance_bytes(it->source.token, n); + it->parser->source.ops->advance_bytes(it, n); #endif return; } @@ -241,9 +241,9 @@ static inline CborError transfer_string(CborValue *it, const void **ptr, size_t if (CBOR_PARSER_READER_CONTROL >= 0) { if (it->parser->flags & CborParserFlag_ExternalSource || CBOR_PARSER_READER_CONTROL != 0) { #ifdef CBOR_PARSER_TRANSFER_STRING_FUNCTION - return CBOR_PARSER_TRANSFER_STRING_FUNCTION(it->source.token, ptr, offset, len); + return CBOR_PARSER_TRANSFER_STRING_FUNCTION(it, ptr, offset, len); #else - return it->parser->source.ops->transfer_string(it->source.token, ptr, offset, len); + return it->parser->source.ops->transfer_string(it, ptr, offset, len); #endif } } @@ -262,9 +262,9 @@ static inline void *read_bytes_unchecked(const CborValue *it, void *dst, size_t if (CBOR_PARSER_READER_CONTROL >= 0) { if (it->parser->flags & CborParserFlag_ExternalSource || CBOR_PARSER_READER_CONTROL != 0) { #ifdef CBOR_PARSER_READ_BYTES_FUNCTION - return CBOR_PARSER_READ_BYTES_FUNCTION(it->source.token, dst, offset, n); + return CBOR_PARSER_READ_BYTES_FUNCTION(it, dst, offset, n); #else - return it->parser->source.ops->read_bytes(it->source.token, dst, offset, n); + return it->parser->source.ops->read_bytes(it, dst, offset, n); #endif } } diff --git a/src/cborparser.c b/src/cborparser.c index a796bd0b..ce81c6b4 100644 --- a/src/cborparser.c +++ b/src/cborparser.c @@ -146,9 +146,7 @@ * called before \ref read_bytes and \ref transfer_bytes to ensure it is safe * to read the requested number of bytes from the reader. * - * \param token An opaque object passed to \ref cbor_parser_init_reader - * that may be used to pass context information between the - * \ref CborParserOperations methods. + * \param value The CBOR value being parsed. * * \param len The number of bytes sought. * @@ -162,9 +160,7 @@ * the current read position and copies them to \a dst. The read pointer * is *NOT* modified by this operation. * - * \param token An opaque object passed to \ref cbor_parser_init_reader - * that may be used to pass context information between the - * \ref CborParserOperations methods. + * \param value The CBOR value being parsed. * * \param dst The buffer the read bytes will be copied to. * @@ -179,9 +175,10 @@ * Skips past \a len bytes from the reader without reading them. The read * pointer is advanced in the process. * - * \param token An opaque object passed to \ref cbor_parser_init_reader - * that may be used to pass context information between the - * \ref CborParserOperations methods. + * Skips past \a len bytes from the reader without reading them. The read + * pointer is advanced in the process. + * + * \param value The CBOR value being parsed. * * \param len The number of bytes skipped. * @@ -195,9 +192,7 @@ * This routine is used for accessing strings embedded in CBOR documents * (both text and binary strings). * - * \param token An opaque object passed to \ref cbor_parser_init_reader - * that may be used to pass context information between the - * \ref CborParserOperations methods. + * \param value The CBOR value being parsed. * * \param userptr The pointer that will be updated to reference the location * of the data in the buffer. diff --git a/tests/parser/tst_parser.cpp b/tests/parser/tst_parser.cpp index f9374583..65da3ca3 100644 --- a/tests/parser/tst_parser.cpp +++ b/tests/parser/tst_parser.cpp @@ -813,21 +813,21 @@ struct Input { }; static const CborParserOperations byteArrayOps = { - /* can_read_bytes = */ [](void *token, size_t len) { - auto input = static_cast(token); + /* can_read_bytes = */ [](const CborValue *value, size_t len) { + auto input = static_cast(value->source.token); return input->data.size() - input->consumed >= int(len); }, - /* read_bytes = */ [](void *token, void *dst, size_t offset, size_t len) { - auto input = static_cast(token); + /* read_bytes = */ [](const CborValue *value, void *dst, size_t offset, size_t len) { + auto input = static_cast(value->source.token); return memcpy(dst, input->data.constData() + input->consumed + offset, len); }, - /* advance_bytes = */ [](void *token, size_t len) { - auto input = static_cast(token); + /* advance_bytes = */ [](CborValue *value, size_t len) { + auto input = static_cast(value->source.token); input->consumed += int(len); }, - /* transfer_string = */ [](void *token, const void **userptr, size_t offset, size_t len) { + /* transfer_string = */ [](CborValue *value, const void **userptr, size_t offset, size_t len) { // ### - auto input = static_cast(token); + auto input = static_cast(value->source.token); if (input->data.size() - input->consumed < int(len + offset)) return CborErrorUnexpectedEOF; input->consumed += int(offset); From f3879740a3b04fe5725dc73f74f9034e3bb91418 Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Fri, 2 Jul 2021 14:19:18 +1000 Subject: [PATCH 07/15] cborparser: Move `ops` outside of `union` In its place, put an arbitrary `void *` pointer for reader context. The reader needs to store some context information which is specific to the `CborParser` instance it is serving. Right now, `CborValue::source::token` serves this purpose, but the problem is that we also need a per-`CborValue` context and have nowhere to put it. Better to spend an extra pointer (4 bytes on 32-bit platforms) in the `CborParser` (which there'll be just one of), then to do it in the `CborValue` (which there may be several of) or to use a `CborReader` object that itself carries two pointers (`ops` and the context, thus we'd need an extra 3 pointers). --- src/cbor.h | 5 +++-- src/cborinternal_p.h | 10 +++++----- src/cborparser.c | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/cbor.h b/src/cbor.h index 5bc4b77f..7c5cb6ae 100644 --- a/src/cbor.h +++ b/src/cbor.h @@ -333,8 +333,9 @@ struct CborParser { union { const uint8_t *end; - const struct CborParserOperations *ops; - } source; + void *ctx; + } data; + const struct CborParserOperations *ops; enum CborParserGlobalFlags flags; }; typedef struct CborParser CborParser; diff --git a/src/cborinternal_p.h b/src/cborinternal_p.h index 80e88cad..ae26ca2a 100644 --- a/src/cborinternal_p.h +++ b/src/cborinternal_p.h @@ -209,7 +209,7 @@ static inline bool can_read_bytes(const CborValue *it, size_t n) #ifdef CBOR_PARSER_CAN_READ_BYTES_FUNCTION return CBOR_PARSER_CAN_READ_BYTES_FUNCTION(it, n); #else - return it->parser->source.ops->can_read_bytes(it, n); + return it->parser->ops->can_read_bytes(it, n); #endif } } @@ -217,7 +217,7 @@ static inline bool can_read_bytes(const CborValue *it, size_t n) /* Convert the pointer subtraction to size_t since end >= ptr * (this prevents issues with (ptrdiff_t)n becoming negative). */ - return (size_t)(it->parser->source.end - it->source.ptr) >= n; + return (size_t)(it->parser->data.end - it->source.ptr) >= n; } static inline void advance_bytes(CborValue *it, size_t n) @@ -227,7 +227,7 @@ static inline void advance_bytes(CborValue *it, size_t n) #ifdef CBOR_PARSER_ADVANCE_BYTES_FUNCTION CBOR_PARSER_ADVANCE_BYTES_FUNCTION(it, n); #else - it->parser->source.ops->advance_bytes(it, n); + it->parser->ops->advance_bytes(it, n); #endif return; } @@ -243,7 +243,7 @@ static inline CborError transfer_string(CborValue *it, const void **ptr, size_t #ifdef CBOR_PARSER_TRANSFER_STRING_FUNCTION return CBOR_PARSER_TRANSFER_STRING_FUNCTION(it, ptr, offset, len); #else - return it->parser->source.ops->transfer_string(it, ptr, offset, len); + return it->parser->ops->transfer_string(it, ptr, offset, len); #endif } } @@ -264,7 +264,7 @@ static inline void *read_bytes_unchecked(const CborValue *it, void *dst, size_t #ifdef CBOR_PARSER_READ_BYTES_FUNCTION return CBOR_PARSER_READ_BYTES_FUNCTION(it, dst, offset, n); #else - return it->parser->source.ops->read_bytes(it, dst, offset, n); + return it->parser->ops->read_bytes(it, dst, offset, n); #endif } } diff --git a/src/cborparser.c b/src/cborparser.c index ce81c6b4..4070ce77 100644 --- a/src/cborparser.c +++ b/src/cborparser.c @@ -408,7 +408,7 @@ uint64_t _cbor_value_decode_int64_internal(const CborValue *value) CborError cbor_parser_init(const uint8_t *buffer, size_t size, uint32_t flags, CborParser *parser, CborValue *it) { memset(parser, 0, sizeof(*parser)); - parser->source.end = buffer + size; + parser->data.end = buffer + size; parser->flags = (enum CborParserGlobalFlags)flags; it->parser = parser; it->source.ptr = buffer; @@ -437,7 +437,7 @@ CborError cbor_parser_init(const uint8_t *buffer, size_t size, uint32_t flags, C CborError cbor_parser_init_reader(const struct CborParserOperations *ops, CborParser *parser, CborValue *it, void *token) { memset(parser, 0, sizeof(*parser)); - parser->source.ops = ops; + parser->ops = ops; parser->flags = CborParserFlag_ExternalSource; it->parser = parser; it->source.token = token; From b7482f628ca40a98b7ab7df16fb6222eb619abb8 Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Sat, 3 Jul 2021 09:43:41 +1000 Subject: [PATCH 08/15] cborparser: Move the reader context to CborParser. --- src/cborparser.c | 2 +- tests/parser/tst_parser.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/cborparser.c b/src/cborparser.c index 4070ce77..93a65cdd 100644 --- a/src/cborparser.c +++ b/src/cborparser.c @@ -439,8 +439,8 @@ CborError cbor_parser_init_reader(const struct CborParserOperations *ops, CborPa memset(parser, 0, sizeof(*parser)); parser->ops = ops; parser->flags = CborParserFlag_ExternalSource; + parser->data.ctx = token; it->parser = parser; - it->source.token = token; it->remaining = 1; return preparse_value(it); } diff --git a/tests/parser/tst_parser.cpp b/tests/parser/tst_parser.cpp index 65da3ca3..123f7e21 100644 --- a/tests/parser/tst_parser.cpp +++ b/tests/parser/tst_parser.cpp @@ -814,20 +814,20 @@ struct Input { static const CborParserOperations byteArrayOps = { /* can_read_bytes = */ [](const CborValue *value, size_t len) { - auto input = static_cast(value->source.token); + auto input = static_cast(value->parser->data.ctx); return input->data.size() - input->consumed >= int(len); }, /* read_bytes = */ [](const CborValue *value, void *dst, size_t offset, size_t len) { - auto input = static_cast(value->source.token); + auto input = static_cast(value->parser->data.ctx); return memcpy(dst, input->data.constData() + input->consumed + offset, len); }, /* advance_bytes = */ [](CborValue *value, size_t len) { - auto input = static_cast(value->source.token); + auto input = static_cast(value->parser->data.ctx); input->consumed += int(len); }, /* transfer_string = */ [](CborValue *value, const void **userptr, size_t offset, size_t len) { // ### - auto input = static_cast(value->source.token); + auto input = static_cast(value->parser->data.ctx); if (input->data.size() - input->consumed < int(len + offset)) return CborErrorUnexpectedEOF; input->consumed += int(offset); From 12b84c288bb26cf097a93a5684c8b6e5c8d8694c Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Sat, 3 Jul 2021 10:11:30 +1000 Subject: [PATCH 09/15] cborparser: Update documentation --- src/cborparser.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cborparser.c b/src/cborparser.c index 93a65cdd..c9aa3b03 100644 --- a/src/cborparser.c +++ b/src/cborparser.c @@ -175,9 +175,6 @@ * Skips past \a len bytes from the reader without reading them. The read * pointer is advanced in the process. * - * Skips past \a len bytes from the reader without reading them. The read - * pointer is advanced in the process. - * * \param value The CBOR value being parsed. * * \param len The number of bytes skipped. @@ -430,18 +427,21 @@ CborError cbor_parser_init(const uint8_t *buffer, size_t size, uint32_t flags, C * The \a ops structure defines functions that implement the read process from * the buffer given, see \ref CborParserOperations for further details. * - * The \a token is passed as the first argument to all - * \ref CborParserOperations methods, and may be used to pass additional - * context information to the reader implementation. + * The \a ctx is stored in the \ref CborParser object as `data.ctx` and may be + * used however the reader implementation sees fit. For cursor-specific + * context information, the \ref CborValue `source.token` union member is + * initialised to `NULL` and may be used however the reader implementation + * sees fit. */ -CborError cbor_parser_init_reader(const struct CborParserOperations *ops, CborParser *parser, CborValue *it, void *token) +CborError cbor_parser_init_reader(const struct CborParserOperations *ops, CborParser *parser, CborValue *it, void *ctx) { memset(parser, 0, sizeof(*parser)); parser->ops = ops; parser->flags = CborParserFlag_ExternalSource; - parser->data.ctx = token; + parser->data.ctx = ctx; it->parser = parser; it->remaining = 1; + it->source.token = NULL; return preparse_value(it); } From 4b8c8c0b486f310ef491c06ab9458301a26c39ed Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Sat, 3 Jul 2021 10:12:31 +1000 Subject: [PATCH 10/15] reader unit tests: Simplify the example reader We simplify this reader in two ways: 1. we remove the `consumed` member of `struct Input`, and instead use the `CborValue`'s `source.token` member, which we treat as an unsigned integer offset into our `QByteArray`. 2. we replace the reader-specific `struct Input` with the `QByteArray` it was wrapping, since that's the only thing now contained in our `struct Input`. If a `CborValue` gets cloned, the pointer referred to by `source.token` similarly gets cloned, thus when we advance the pointer on the clone, it leaves the original alone, so computing the length of unknown-length entities in the CBOR document can be done safely. --- tests/parser/tst_parser.cpp | 50 ++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/tests/parser/tst_parser.cpp b/tests/parser/tst_parser.cpp index 123f7e21..01ae4bd1 100644 --- a/tests/parser/tst_parser.cpp +++ b/tests/parser/tst_parser.cpp @@ -807,32 +807,32 @@ void tst_Parser::mapsAndArrays() "{_ 1: [_ " + expected + "], \"Hello\": {_ " + expected + ": (_ )}}"); } -struct Input { - QByteArray data; - int consumed; -}; - static const CborParserOperations byteArrayOps = { /* can_read_bytes = */ [](const CborValue *value, size_t len) { - auto input = static_cast(value->parser->data.ctx); - return input->data.size() - input->consumed >= int(len); + auto data = static_cast(value->parser->data.ctx); + auto consumed = uintptr_t(value->source.token); + return uintptr_t(data->size()) - consumed >= uintptr_t(len); }, /* read_bytes = */ [](const CborValue *value, void *dst, size_t offset, size_t len) { - auto input = static_cast(value->parser->data.ctx); - return memcpy(dst, input->data.constData() + input->consumed + offset, len); + auto data = static_cast(value->parser->data.ctx); + auto consumed = uintptr_t(value->source.token); + return memcpy(dst, data->constData() + consumed + offset, len); }, /* advance_bytes = */ [](CborValue *value, size_t len) { - auto input = static_cast(value->parser->data.ctx); - input->consumed += int(len); + auto consumed = uintptr_t(value->source.token); + consumed += int(len); + value->source.token = (void*)consumed; }, /* transfer_string = */ [](CborValue *value, const void **userptr, size_t offset, size_t len) { // ### - auto input = static_cast(value->parser->data.ctx); - if (input->data.size() - input->consumed < int(len + offset)) + auto data = static_cast(value->parser->data.ctx); + auto consumed = uintptr_t(value->source.token); + if (uintptr_t(data->size()) - consumed < uintptr_t(len + offset)) return CborErrorUnexpectedEOF; - input->consumed += int(offset); - *userptr = input->data.constData() + input->consumed; - input->consumed += int(len); + consumed += int(offset); + *userptr = data->constData() + consumed; + consumed += int(len); + value->source.token = (void*)consumed; return CborNoError; } }; @@ -842,11 +842,9 @@ void tst_Parser::readerApi() QFETCH(QByteArray, data); QFETCH(QString, expected); - Input input = { data, 0 }; - CborParser parser; CborValue first; - CborError err = cbor_parser_init_reader(&byteArrayOps, &parser, &first, &input); + CborError err = cbor_parser_init_reader(&byteArrayOps, &parser, &first, &data); QCOMPARE(err, CborNoError); QString decoded; @@ -855,7 +853,7 @@ void tst_Parser::readerApi() QCOMPARE(decoded, expected); // check we consumed everything - QCOMPARE(input.consumed, data.size()); + QCOMPARE(uintptr_t(first.source.token), uintptr_t(data.size())); } void tst_Parser::reparse_data() @@ -870,23 +868,23 @@ void tst_Parser::reparse() QFETCH(QByteArray, data); QFETCH(QString, expected); - Input input = { QByteArray(), 0 }; + QByteArray buffer; CborParser parser; CborValue first; - CborError err = cbor_parser_init_reader(&byteArrayOps, &parser, &first, &input); + CborError err = cbor_parser_init_reader(&byteArrayOps, &parser, &first, &buffer); QCOMPARE(err, CborErrorUnexpectedEOF); for (int i = 0; i < data.size(); ++i) { - input.data = data.left(i); + buffer = data.left(i); err = cbor_value_reparse(&first); if (err != CborErrorUnexpectedEOF) qDebug() << "At" << i; QCOMPARE(err, CborErrorUnexpectedEOF); - QCOMPARE(input.consumed, 0); + QCOMPARE(uintptr_t(first.source.token), 0U); } // now it should work - input.data = data; + buffer = data; err = cbor_value_reparse(&first); QCOMPARE(err, CborNoError); @@ -896,7 +894,7 @@ void tst_Parser::reparse() QCOMPARE(decoded, expected); // check we consumed everything - QCOMPARE(input.consumed, data.size()); + QCOMPARE(uintptr_t(first.source.token), uintptr_t(data.size())); } void tst_Parser::chunkedString_data() From f98f354de2a1ea088410f0ea6bbc50e7101b01b8 Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Wed, 7 Jul 2021 17:29:52 +1000 Subject: [PATCH 11/15] examples: Add buffered reader example This reads a CBOR file piece-wise, seeking backward and forward through the file if needed. Some seeking can be avoided by tuning the block size used in reads so that the read window shifts by smaller amounts. --- examples/bufferedreader.c | 807 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 807 insertions(+) create mode 100644 examples/bufferedreader.c diff --git a/examples/bufferedreader.c b/examples/bufferedreader.c new file mode 100644 index 00000000..cdcc9e46 --- /dev/null +++ b/examples/bufferedreader.c @@ -0,0 +1,807 @@ +/* vim: set sw=4 ts=4 et tw=80: */ + +/**************************************************************************** +** +** Copyright © 2021 VRT Systems Pty Ltd. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and associated documentation files (the "Software"), to deal +** in the Software without restriction, including without limitation the rights +** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +** copies of the Software, and to permit persons to whom the Software is +** furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +** THE SOFTWARE. +** +****************************************************************************/ + +/** + * \brief An example of a buffered CBOR file reader using low-level + * POSIX file I/O as might be implemented in a microcontroller + * RTOS. + * + * \author Stuart Longland + * + * \copyright VRT Systems Pty Ltd + * + * \file bufferedwriter.c + */ + +/* Includes for POSIX low-level file I/O */ +#include +#include +#include +#include + +/* Sanity check routine */ +#include + +/* Pull in "standard" integer types */ +#include + +/* Pull in definitions for printf and errno */ +#include +#include +#include + +/* For example usage */ +#include + +#include "../src/cbor.h" + +/** + * Context for the file reader. This stores the file descriptor, a pointer to + * the read buffer, and context pointers. The assumption here is that the + * CBOR document being read is less than 64KiB (65536 bytes) in size. + */ +struct filereader +{ + /** + * Read buffer. This must be allocated by the caller, and sized + * appropriately since the buffer must be big enough to accommodate + * entire string chunks embedded in the CBOR document. + */ + uint8_t* buffer; + + /** + * File descriptor, returned by the `open` system call. + */ + int fd; + + /** + * Size of the file in bytes. + */ + uint16_t file_sz; + + /** + * Size of the read buffer in bytes. + */ + uint16_t buffer_sz; + + /** + * Read position within the file. This basically describes where + * `buffer[0]` came from in the source file. + */ + uint16_t pos; + + /** + * Number of bytes stored in the buffer presently. + */ + uint16_t used_sz; + + /** + * Block size. When reading from the file, we round up to whole multiples + * of this block size to improve I/O efficiency. + */ + uint16_t block_sz; +}; + +/* Implementation routines */ + +/** + * Return the nearest (earlier) position that is on a block boundary. + */ +static uint16_t filereader_get_block_pos( + const struct filereader * const context, + uint16_t pos +) { + return context->block_sz * (pos / context->block_sz); +} + +/** + * Retrieve a pointer to the region defined by \a pos and \a sz. + * + * \retval NULL The region is not contained in the buffer. + */ +static uint8_t *filereader_get_ptr( + struct filereader *context, uint16_t pos, uint16_t sz +) { + /* Sanity check, disallow `sz` bigger than buffer content */ + if (sz > context->used_sz) { + return NULL; + } + + /* Is `pos` off the start of the buffer? */ + if (pos < context->pos) { + return NULL; + } + + /* Is `pos + sz` off the end of the buffer? */ + if ((pos + sz) > (context->pos + context->used_sz)) { + return NULL; + } + + /* We should be good */ + return &(context->buffer[pos - context->pos]); +} + +/** + * Copy the data from the requested position in the file to the buffer. + */ +static int filereader_read( + struct filereader *context, uint16_t pos, uint16_t sz, uint8_t *wptr +) { + /* Seek to the required file position */ + off_t seek_res = lseek(context->fd, pos, SEEK_SET); + if (seek_res != pos) { + /* We failed */ + return -errno; + } + + /* Perform the read */ + ssize_t read_res = read(context->fd, wptr, sz); + if (read_res != sz) { + /* Truncated read */ + return -errno; + } + + return sz; +} + +/** + * Prepend \arg sz bytes from the file to the buffer, shifting the + * read window back \arg sz bytes. + * + * \param context Reader context + * \param sz Number of bytes to read + * + * \retval ≥0 Number of bytes read into the buffer. + * \retval <0 Read failure, value is `errno` negated. + */ +static int filereader_prepend_buffer(struct filereader *context, uint16_t sz) { + /* Compute read position */ + const uint16_t pos = context->pos - sz; + + /* Shuffle existing data forward by sz bytes */ + memmove( + &(context->buffer[sz]), context->buffer, + context->buffer_sz - sz + ); + + /* Copy the data in */ + int read_res = filereader_read(context, pos, sz, context->buffer); + if (read_res >= 0) { + context->pos = pos; + if ((context->used_sz + sz) < context->buffer_sz) { + context->used_sz += sz; + } + } + + return read_res; +} + +/** + * Append \arg sz bytes from the file to the buffer, shifting the + * read window forward \arg sz bytes. + * + * \param context Reader context + * \param sz Number of bytes to read + * + * \retval ≥0 Number of bytes read into the buffer. + * \retval <0 Read failure, value is `errno` negated. + */ +static int filereader_append_buffer(struct filereader *context, uint16_t sz) { + /* Compute read position */ + const uint16_t pos = context->pos + context->used_sz; + + /* Is there room? */ + if ((context->buffer_sz - context->used_sz) < sz) { + /* Shuffle existing data forward by sz bytes */ + memmove( + context->buffer, &(context->buffer[sz]), + context->buffer_sz - sz + ); + context->pos += sz; + context->used_sz -= sz; + } + + /* Copy the data in */ + int read_res = filereader_read( + context, pos, sz, + &(context->buffer[context->used_sz]) + ); + if (read_res >= 0) { + context->used_sz += sz; + } + + return read_res; +} + +/** + * Read data from the file and place it in the buffer, shuffling + * existing data around as required. + * + * \param context Reader context + * \param pos Position in the file to start reading + * \param sz Number of bytes to read + * + * \retval ≥0 Number of bytes read into the buffer. + * \retval <0 Read failure, value is `errno` negated. + */ +static int filereader_load_buffer( + struct filereader *context, uint16_t pos, uint16_t sz +) { + /* Compute the end position (not-inclusive) */ + uint16_t end = pos + sz; + + /* Is this in the buffer already? */ + if ( + (pos < context->pos) + || (end > (context->pos + context->used_sz)) + ) { + /* Make a note of the current buffer state */ + uint16_t buffer_end = context->pos + context->used_sz; + uint16_t buffer_rem = context->buffer_sz - context->used_sz; + + /* Our buffer write position */ + uint8_t* wptr = context->buffer; + + /* + * Dumb approach for now, replace the entire buffer. Round + * the start and end points to block boundaries for efficiency. + */ + pos = filereader_get_block_pos(context, pos); + end = filereader_get_block_pos(context, end + (context->block_sz - 1)); + + /* Clamp the end position to the file size */ + if (end > context->file_sz) { + end = context->file_sz; + } + + /* Compute new rounded size, then clamp to buffer size */ + sz = end - pos; + if (sz > context->buffer_sz) { + sz = context->buffer_sz; + } + + /* Can we re-use existing data? */ + if ( + (pos >= context->pos) + && (pos < buffer_end) + && (end > buffer_end) + ) { + return filereader_append_buffer(context, end - buffer_end); + } else if ( + (pos < context->pos) + && (end >= context->pos) + && (end <= buffer_end) + ) { + return filereader_prepend_buffer(context, context->pos - pos); + } else { + /* Nope, read the lot in */ + const uint16_t file_rem = context->file_sz - pos; + if (file_rem < context->buffer_sz) { + sz = file_rem; + } else { + sz = context->buffer_sz; + } + + int read_res = filereader_read( + context, pos, sz, + context->buffer + ); + if (read_res >= 0) { + context->pos = pos; + context->used_sz = sz; + } + return read_res; + } + } else { + /* Nothing to do, we have the required data already */ + return 0; + } +} + +/** + * Try to read the data into the buffer, then return a pointer to it. + * + * \retval NULL The region could not be loaded into the buffer. + */ +static uint8_t *filereader_fetch_ptr( + struct filereader *context, uint16_t pos, uint16_t sz +) { + /* Ensure the data we need is present */ + if (filereader_load_buffer(context, pos, sz) < 0) { + /* We failed */ + return NULL; + } else { + return filereader_get_ptr(context, pos, sz); + } +} + +/** + * Fetch the reader context from the CborValue + */ +static struct filereader *filereader_get_context(const CborValue * const value) { + return (struct filereader*)(value->parser->data.ctx); +} + +/** + * Fetch the CborValue read position + */ +static uint16_t filereader_get_pos(const CborValue * const value) { + return (uint16_t)(uintptr_t)(value->source.token); +} + +/** + * Set the CborValue read position + */ +static void filereader_set_pos(CborValue * const value, uint16_t new_pos) { + value->source.token = (void*)(uintptr_t)new_pos; +} + +/** + * Return `true` if there is at least \a len bytes that can be read from + * the file at this moment in time. + */ +static bool filereader_impl_can_read_bytes( + const struct CborValue *value, + size_t len +) { + const struct filereader *context = filereader_get_context(value); + const uint16_t pos = filereader_get_pos(value); + + return ((size_t)pos + len) <= context->file_sz; +} + +/** + * Read the bytes from the buffer without advancing the read pointer. + */ +static void* filereader_impl_read_bytes( + const struct CborValue *value, + void* dst, size_t offset, size_t len +) { + struct filereader *context = filereader_get_context(value); + + /* Determine read position factoring in offset */ + const uint16_t pos = filereader_get_pos(value) + offset; + + /* Fetch the data from the file */ + const uint8_t* ptr = filereader_fetch_ptr(context, pos, (uint16_t)len); + if (ptr != NULL) { + return memcpy(dst, ptr, len); + } else { + /* We could not read the data */ + return NULL; + } +} + +/** + * Advance the pointer by the requested amount. + */ +static void filereader_impl_advance_bytes(struct CborValue *value, size_t len) { + filereader_set_pos(value, filereader_get_pos(value) + (uint16_t)len); +} + +/** + * Retrieve a pointer to the string defined by the given offset and length. + */ +CborError filereader_impl_transfer_string( + struct CborValue *value, + const void **userptr, size_t offset, size_t len +) { + struct filereader *context = filereader_get_context(value); + + /* Determine read position factoring in offset */ + const uint16_t pos = filereader_get_pos(value) + offset; + + /* Fetch the data from the file */ + const uint8_t* ptr = filereader_fetch_ptr(context, pos, (uint16_t)len); + if (ptr != NULL) { + /* All good, advance the cursor past the data and return the pointer */ + filereader_set_pos(value, pos + len); + *userptr = (void*)ptr; + return CborNoError; + } else { + /* We could not read the data */ + return CborErrorIO; + } +} + +/** + * Implementation of the CBOR File Reader operations. + */ +static const struct CborParserOperations filereader_ops = { + .can_read_bytes = filereader_impl_can_read_bytes, + .read_bytes = filereader_impl_read_bytes, + .advance_bytes = filereader_impl_advance_bytes, + .transfer_string = filereader_impl_transfer_string +}; + +/** + * Open a CBOR file for reading. + * + * \param[inout] parser CBOR parser object to initialise. + * \param[inout] value Root CBOR cursor object to initialise. + * + * \param[inout] context The file reader context. This must exist + * for the duration the file is open. + * + * \param[inout] buffer Read buffer allocated by the caller where + * the read data will be stored. + * + * \param[in] buffer_sz Size of the read buffer. + * + * \param[in] path The path to the file being read. + * + * \param[in] flags `open` flags. `O_RDONLY` is logic-ORed + * with this value, but the user may provide + * other options here. + * + * \param[in] block_sz Size of read blocks. Where possible, + * reads will be rounded up and aligned with + * blocks of this size for efficiency. Set + * to 0 to default to `buffer_sz / 2`. + * + * \retval CborErrorIO The `open` call failed for some reason, + * see the POSIX standard `errno` variable + * for why. + * + * \retval CborErrorDataTooLarge The CBOR document is too big to be + * handled by this reader. + * + * \retval CborNoError CBOR encoder initialised successfully. + */ +CborError filereader_open( + CborParser * const parser, + CborValue * const value, + struct filereader * const context, + uint8_t *buffer, + uint16_t buffer_sz, + const char* path, + int flags, + uint16_t block_sz +) +{ + CborError error = CborNoError; + struct stat path_stat; + + /* Determine the file size */ + if (stat(path, &path_stat) < 0) { + /* stat fails */ + error = CborErrorIO; + } else { + context->fd = open(path, O_RDONLY | flags); + if (context->fd < 0) { + /* Open fails */ + error = CborErrorIO; + } else { + /* Sanity check document size */ + if (path_stat.st_size > UINT16_MAX) { + error = CborErrorDataTooLarge; + } else { + /* Initialise structure */ + context->pos = 0; + context->used_sz = 0; + context->buffer = buffer; + context->buffer_sz = buffer_sz; + context->file_sz = (uint16_t)path_stat.st_size; + + if (block_sz == 0) { + block_sz = buffer_sz / 2; + } + context->block_sz = block_sz; + + /* Fill the initial buffer */ + if (filereader_load_buffer(context, 0, buffer_sz) >= 0) { + /* Initialise the CBOR parser */ + error = cbor_parser_init_reader( + &filereader_ops, parser, value, (void*)context + ); + } + } + + if (error != CborNoError) { + /* Close the file, if we can */ + assert(close(context->fd) == 0); + context->fd = -1; + } + } + } + + return error; +} + +/** + * Close the file reader. + * + * \param[inout] context File reader context to close. + * + * \retval CborErrorIO The `close` call failed for some + * reason, see the POSIX standard + * `errno` variable for why. + * + * \retval CborNoError File closed, `fd` should be set to -1. + */ +CborError filereader_close(struct filereader * const context) +{ + CborError error = CborNoError; + + /* Try to close the file */ + if (close(context->fd) < 0) { + /* Close fails! */ + error = CborErrorIO; + } else { + context->fd = -1; + } + + return error; +} + +/* --- Example usage of the above reader --- */ + +/** + * Indent the output text to the level specified. Taken from `simplereader.c` + */ +static void indent(int nestingLevel) +{ + while (nestingLevel--) + printf(" "); +} + +/** + * Dump the raw bytes given. Taken from `simplereader.c` + */ +static void dumpbytes(const uint8_t *buf, size_t len) +{ + while (len--) + printf("%02X ", *buf++); +} + +/** + * Recursively dump the CBOR data structure. Taken from `simplereader.c` + */ +static CborError dumprecursive(CborValue *it, int nestingLevel) +{ + while (!cbor_value_at_end(it)) { + CborError err; + CborType type = cbor_value_get_type(it); + + indent(nestingLevel); + switch (type) { + case CborArrayType: + case CborMapType: { + // recursive type + CborValue recursed; + assert(cbor_value_is_container(it)); + puts(type == CborArrayType ? "Array[" : "Map["); + err = cbor_value_enter_container(it, &recursed); + if (err) + return err; // parse error + err = dumprecursive(&recursed, nestingLevel + 1); + if (err) + return err; // parse error + err = cbor_value_leave_container(it, &recursed); + if (err) + return err; // parse error + indent(nestingLevel); + puts("]"); + continue; + } + + case CborIntegerType: { + int64_t val; + cbor_value_get_int64(it, &val); // can't fail + printf("%lld\n", (long long)val); + break; + } + + case CborByteStringType: { + uint8_t *buf; + size_t n; + err = cbor_value_dup_byte_string(it, &buf, &n, it); + if (err) + return err; // parse error + dumpbytes(buf, n); + printf("\n"); + free(buf); + continue; + } + + case CborTextStringType: { + char *buf; + size_t n; + err = cbor_value_dup_text_string(it, &buf, &n, it); + if (err) + return err; // parse error + puts(buf); + free(buf); + continue; + } + + case CborTagType: { + CborTag tag; + cbor_value_get_tag(it, &tag); // can't fail + printf("Tag(%lld)\n", (long long)tag); + break; + } + + case CborSimpleType: { + uint8_t type; + cbor_value_get_simple_type(it, &type); // can't fail + printf("simple(%u)\n", type); + break; + } + + case CborNullType: + puts("null"); + break; + + case CborUndefinedType: + puts("undefined"); + break; + + case CborBooleanType: { + bool val; + cbor_value_get_boolean(it, &val); // can't fail + puts(val ? "true" : "false"); + break; + } + + case CborDoubleType: { + double val; + if (false) { + float f; + case CborFloatType: + cbor_value_get_float(it, &f); + val = f; + } else { + cbor_value_get_double(it, &val); + } + printf("%g\n", val); + break; + } + case CborHalfFloatType: { + uint16_t val; + cbor_value_get_half_float(it, &val); + printf("__f16(%04x)\n", val); + break; + } + + case CborInvalidType: + assert(false); // can't happen + break; + } + + err = cbor_value_advance_fixed(it); + if (err) + return err; + } + return CborNoError; +} + +/** + * Print the error encountered. If the error is `CborErrorIO`, also check + * the global `errno` variable and print the resultant error seen. + * + * \param[in] error CBORError constant + */ +void print_err(CborError error) +{ + if (error == CborErrorIO) { + printf("IO: %s\n", strerror(errno)); + } else { + printf("%s\n", cbor_error_string(error)); + } +} + +int main(int argc, char **argv) +{ + if (argc < 2) { + printf( + "Usage: %s [buffer_sz [block_sz]]\n", + argv[0] + ); + return 1; + } else { + struct filereader context; + CborParser parser; + CborValue value; + CborError error; + + uint16_t buffer_sz = 64; + uint16_t block_sz = 0; + + if (argc > 2) { + /* buffer_sz given */ + char *endptr = NULL; + unsigned long long_buffer_sz = strtoul(argv[2], &endptr, 0); + + if (!endptr || *endptr) { + printf("Invalid buffer size %s\n", argv[2]); + return 1; + } + + if (long_buffer_sz > UINT16_MAX) { + printf("Buffer size (%lu bytes) too big\n", long_buffer_sz); + return 1; + } + + buffer_sz = (uint16_t)long_buffer_sz; + + if (argc > 3) { + /* block_sz given */ + char *endptr = NULL; + unsigned long long_block_sz = strtoul(argv[3], &endptr, 0); + + if (!endptr || *endptr) { + printf("Invalid block size %s\n", argv[3]); + return 1; + } + + if (long_block_sz > buffer_sz) { + printf("Block size (%lu bytes) too big\n", long_block_sz); + return 1; + } + + block_sz = (uint16_t)long_block_sz; + } + } + + /* Allocate the buffer on the stack */ + uint8_t buffer[buffer_sz]; + + /* Open the file for writing, create if needed */ + error = filereader_open( + &parser, /* CBOR context */ + &value, /* CBOR cursor */ + &context, /* Reader context */ + buffer, buffer_sz, /* Reader buffer & size */ + argv[1], /* File name */ + 0, /* Open flags */ + block_sz /* Block size */ + ); + + if (error != CborNoError) { + printf("Failed to open %s for reading: ", argv[1]); + print_err(error); + } else { + error = dumprecursive(&value, 0); + if (error != CborNoError) { + printf("Failed to read file: "); + print_err(error); + } + + error = filereader_close(&context); + if (error != CborNoError) { + printf("Failed to close file: "); + print_err(error); + } + } + + if (error != CborNoError) { + return 2; + } else { + return 0; + } + } +} From ffb1a36c1e7fd9db8cb4100a7f2e2e86b290c913 Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Wed, 8 Sep 2021 08:32:01 +1000 Subject: [PATCH 12/15] parser unit tests: Do not use `auto`, use reinterpret_cast --- tests/parser/tst_parser.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/parser/tst_parser.cpp b/tests/parser/tst_parser.cpp index 01ae4bd1..8965b9af 100644 --- a/tests/parser/tst_parser.cpp +++ b/tests/parser/tst_parser.cpp @@ -809,30 +809,30 @@ void tst_Parser::mapsAndArrays() static const CborParserOperations byteArrayOps = { /* can_read_bytes = */ [](const CborValue *value, size_t len) { - auto data = static_cast(value->parser->data.ctx); - auto consumed = uintptr_t(value->source.token); + QByteArray *data = static_cast(value->parser->data.ctx); + uintptr_t consumed = uintptr_t(value->source.token); return uintptr_t(data->size()) - consumed >= uintptr_t(len); }, /* read_bytes = */ [](const CborValue *value, void *dst, size_t offset, size_t len) { - auto data = static_cast(value->parser->data.ctx); - auto consumed = uintptr_t(value->source.token); + QByteArray *data = static_cast(value->parser->data.ctx); + uintptr_t consumed = uintptr_t(value->source.token); return memcpy(dst, data->constData() + consumed + offset, len); }, /* advance_bytes = */ [](CborValue *value, size_t len) { - auto consumed = uintptr_t(value->source.token); - consumed += int(len); - value->source.token = (void*)consumed; + uintptr_t consumed = uintptr_t(value->source.token); + consumed += uintptr_t(len); + value->source.token = reinterpret_cast(consumed); }, /* transfer_string = */ [](CborValue *value, const void **userptr, size_t offset, size_t len) { // ### - auto data = static_cast(value->parser->data.ctx); - auto consumed = uintptr_t(value->source.token); + QByteArray *data = static_cast(value->parser->data.ctx); + uintptr_t consumed = uintptr_t(value->source.token); if (uintptr_t(data->size()) - consumed < uintptr_t(len + offset)) return CborErrorUnexpectedEOF; - consumed += int(offset); + consumed += uintptr_t(offset); *userptr = data->constData() + consumed; - consumed += int(len); - value->source.token = (void*)consumed; + consumed += uintptr_t(len); + value->source.token = reinterpret_cast(consumed); return CborNoError; } }; From 496f0fb39833ce8ff4042481eddaaff79e51b60b Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Sat, 25 Apr 2026 10:28:45 +1000 Subject: [PATCH 13/15] cborinternal_p: Check for non-null "ops" Instead of testing a bit in a flags bitmap. https://github.com/intel/tinycbor/pull/208#discussion_r3132299467 --- src/cborinternal_p.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cborinternal_p.h b/src/cborinternal_p.h index ae26ca2a..190f5129 100644 --- a/src/cborinternal_p.h +++ b/src/cborinternal_p.h @@ -205,7 +205,7 @@ static inline void copy_current_position(CborValue *dst, const CborValue *src) static inline bool can_read_bytes(const CborValue *it, size_t n) { if (CBOR_PARSER_READER_CONTROL >= 0) { - if (it->parser->flags & CborParserFlag_ExternalSource || CBOR_PARSER_READER_CONTROL != 0) { + if (it->parser->ops != CBOR_NULLPTR || CBOR_PARSER_READER_CONTROL != 0) { #ifdef CBOR_PARSER_CAN_READ_BYTES_FUNCTION return CBOR_PARSER_CAN_READ_BYTES_FUNCTION(it, n); #else From 4b152ebf96fb7cfc1ab821f71f861e1c96ec4bae Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Sat, 25 Apr 2026 10:34:05 +1000 Subject: [PATCH 14/15] cborparser: Explicitly set ops = CBOR_NULLPTR As per suggestion: https://github.com/intel/tinycbor/pull/208#discussion_r3132303010 --- src/cborparser.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cborparser.c b/src/cborparser.c index c9aa3b03..3c3fc18f 100644 --- a/src/cborparser.c +++ b/src/cborparser.c @@ -407,6 +407,7 @@ CborError cbor_parser_init(const uint8_t *buffer, size_t size, uint32_t flags, C memset(parser, 0, sizeof(*parser)); parser->data.end = buffer + size; parser->flags = (enum CborParserGlobalFlags)flags; + parser->ops = CBOR_NULLPTR; it->parser = parser; it->source.ptr = buffer; it->remaining = 1; /* there's one type altogether, usually an array or map */ From 0ec01d24630aa286506f29d06ecae3f6609deb21 Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Sat, 25 Apr 2026 10:37:41 +1000 Subject: [PATCH 15/15] CborParser tests: use `ptrdiff_t` instead of `uintptr_t` To ensure we don't truncate to 32-bits as suggested here: https://github.com/intel/tinycbor/pull/208#discussion_r3132323304 --- tests/parser/tst_parser.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/parser/tst_parser.cpp b/tests/parser/tst_parser.cpp index 8965b9af..63fb8a51 100644 --- a/tests/parser/tst_parser.cpp +++ b/tests/parser/tst_parser.cpp @@ -810,28 +810,28 @@ void tst_Parser::mapsAndArrays() static const CborParserOperations byteArrayOps = { /* can_read_bytes = */ [](const CborValue *value, size_t len) { QByteArray *data = static_cast(value->parser->data.ctx); - uintptr_t consumed = uintptr_t(value->source.token); - return uintptr_t(data->size()) - consumed >= uintptr_t(len); + ptrdiff_t consumed = ptrdiff_t(value->source.token); + return ptrdiff_t(data->size()) - consumed >= ptrdiff_t(len); }, /* read_bytes = */ [](const CborValue *value, void *dst, size_t offset, size_t len) { QByteArray *data = static_cast(value->parser->data.ctx); - uintptr_t consumed = uintptr_t(value->source.token); + ptrdiff_t consumed = ptrdiff_t(value->source.token); return memcpy(dst, data->constData() + consumed + offset, len); }, /* advance_bytes = */ [](CborValue *value, size_t len) { - uintptr_t consumed = uintptr_t(value->source.token); - consumed += uintptr_t(len); + ptrdiff_t consumed = ptrdiff_t(value->source.token); + consumed += ptrdiff_t(len); value->source.token = reinterpret_cast(consumed); }, /* transfer_string = */ [](CborValue *value, const void **userptr, size_t offset, size_t len) { // ### QByteArray *data = static_cast(value->parser->data.ctx); - uintptr_t consumed = uintptr_t(value->source.token); - if (uintptr_t(data->size()) - consumed < uintptr_t(len + offset)) + ptrdiff_t consumed = ptrdiff_t(value->source.token); + if (ptrdiff_t(data->size()) - consumed < ptrdiff_t(len + offset)) return CborErrorUnexpectedEOF; - consumed += uintptr_t(offset); + consumed += ptrdiff_t(offset); *userptr = data->constData() + consumed; - consumed += uintptr_t(len); + consumed += ptrdiff_t(len); value->source.token = reinterpret_cast(consumed); return CborNoError; }