Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions io/io/inc/TBufferFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,17 @@ namespace TStreamerInfoActions {
}

class TBufferFile : public TBufferIO {
public:
using ByteCountLocator_t = std::size_t; // This might become a pair<chunk_number, local_offset> if we implement chunked keys
using ByteCount_t = std::uint64_t; ///< Type used to store byte count values, can be changed to uint32_t if we implement chunked keys
using ByteCountFinder_t = std::unordered_map<ByteCountLocator_t, ByteCount_t>;

protected:
typedef std::vector<TStreamerInfo*> InfoList_t;

TStreamerInfo *fInfo{nullptr}; ///< Pointer to TStreamerInfo object writing/reading the buffer
InfoList_t fInfoStack; ///< Stack of pointers to the TStreamerInfos

using ByteCountLocator_t = std::size_t; // This might become a pair<chunk_number, local_offset> if we implement chunked keys
struct ByteCountLocationInfo {
///< Position where the byte count value is stored
ByteCountLocator_t locator;
Expand All @@ -66,8 +69,6 @@ class TBufferFile : public TBufferIO {
using ByteCountStack_t = std::vector<ByteCountLocationInfo>;
ByteCountStack_t fByteCountStack; ///<! Stack to keep track of byte count storage positions

using ByteCount_t = std::uint64_t; ///< Type used to store byte count values, can be changed to uint32_t if we implement chunked keys
using ByteCountFinder_t = std::unordered_map<ByteCountLocator_t, ByteCount_t>;
// fByteCounts will be stored either in the header/summary tkey or at the end
// of the last segment/chunk for a large TKey.
ByteCountFinder_t fByteCounts; ///< Map to find the byte count value for a given position
Expand Down
14 changes: 13 additions & 1 deletion tree/ntuple/doc/BinaryFormatSpecification.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# RNTuple Binary Format Specification 1.0.2.0
# RNTuple Binary Format Specification 1.0.3.0

## Versioning Notes

Expand Down Expand Up @@ -1130,6 +1130,18 @@ The first (principal) column is of type `(Split)Index[32|64]`.
The second column is of type `Byte`.
In effect, the column representation is identical to a collection of `std::byte`.

There are two field versions for the streamer field, version 0 and version 1.
Both versions have an identical on-disk representation when the streamed object is smaller than 1GiB.
Only version 1 supports larger streamed objects.
For large objects, the version 1 streamer field prepends the large byte counts ("byte count stack") to the byte stream.
The format for the version 1 byte stream is

- 64bit unsigned integer: number of elements in the large byte count list
- List if 64bit unsigned integer pairs with the byte count location and byte count value`
- Regular ROOT object stream

The integer values before the regular ROOT object stream are stored in little endianess.

### Untyped collections and records

Untyped collections and records are fields with a collection or record role and an empty type name.
Expand Down
4 changes: 4 additions & 0 deletions tree/ntuple/inc/ROOT/RField.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,8 @@ public:
/// The field for a class using ROOT standard streaming
class RStreamerField final : public RFieldBase {
private:
static constexpr std::size_t kMaxSmallBuffer = 1024 * 1024 * 1024; ///< maximum buffer size for v0 streamer field

class RStreamerFieldDeleter : public RDeleter {
private:
TClass *fClass;
Expand Down Expand Up @@ -283,6 +285,8 @@ public:

size_t GetValueSize() const final;
size_t GetAlignment() const final;
// As of field version 1, the byte stream contains the byte count stack for large objects (see binary specs)
std::uint32_t GetFieldVersion() const final { return 1; }
std::uint32_t GetTypeVersion() const final;
std::uint32_t GetTypeChecksum() const final;
TClass *GetClass() const { return fClass; }
Expand Down
5 changes: 5 additions & 0 deletions tree/ntuple/inc/ROOT/RFieldBase.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ protected:
};

public:
static constexpr std::uint32_t kInvalidFieldVersion = -1U;
static constexpr std::uint32_t kInvalidTypeVersion = -1U;
enum {
/// No constructor needs to be called, i.e. any bit pattern in the allocated memory represents a valid type
Expand Down Expand Up @@ -320,6 +321,8 @@ protected:
std::string fTypeAlias;
/// List of functions to be called after reading a value
std::vector<ReadCallback_t> fReadCallbacks;
/// Field version cached from the descriptor after a call to ConnectPageSource()
std::uint32_t fOnDiskFieldVersion = kInvalidFieldVersion;
/// C++ type version cached from the descriptor after a call to ConnectPageSource()
std::uint32_t fOnDiskTypeVersion = kInvalidTypeVersion;
/// TClass checksum cached from the descriptor after a call to ConnectPageSource(). Only set
Expand Down Expand Up @@ -664,6 +667,8 @@ public:
virtual std::uint32_t GetTypeVersion() const { return 0; }
/// Return the current TClass reported checksum of this class. Only valid if `kTraitTypeChecksum` is set.
virtual std::uint32_t GetTypeChecksum() const { return 0; }
/// Return the field version stored in the field descriptor; only valid after a call to ConnectPageSource()
std::uint32_t GetOnDiskFieldVersion() const { return fOnDiskFieldVersion; }
/// Return the C++ type version stored in the field descriptor; only valid after a call to ConnectPageSource()
std::uint32_t GetOnDiskTypeVersion() const { return fOnDiskTypeVersion; }
/// Return checksum stored in the field descriptor; only valid after a call to ConnectPageSource(),
Expand Down
1 change: 1 addition & 0 deletions tree/ntuple/src/RFieldBase.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1023,6 +1023,7 @@ void ROOT::RFieldBase::ConnectPageSource(ROOT::Internal::RPageSource &pageSource
R__ASSERT(!fColumnRepresentatives.empty());
if (fOnDiskId != ROOT::kInvalidDescriptorId) {
const auto &fieldDesc = desc.GetFieldDescriptor(fOnDiskId);
fOnDiskFieldVersion = fieldDesc.GetFieldVersion();
fOnDiskTypeVersion = fieldDesc.GetTypeVersion();
if (fieldDesc.GetTypeChecksum().has_value())
fOnDiskTypeChecksum = *fieldDesc.GetTypeChecksum();
Expand Down
65 changes: 62 additions & 3 deletions tree/ntuple/src/RFieldMeta.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <ROOT/RFieldBase.hxx>
#include <ROOT/RFieldUtils.hxx>
#include <ROOT/RFieldVisitor.hxx>
#include <ROOT/RNTupleSerialize.hxx>
#include <ROOT/RNTupleUtils.hxx>
#include <ROOT/RSpan.hxx>

Expand Down Expand Up @@ -1316,11 +1317,29 @@ std::size_t ROOT::RStreamerField::AppendImpl(const void *from)
[this](TVirtualStreamerInfo *info) { fStreamerInfos[info->GetNumber()] = info; });
fClass->Streamer(const_cast<void *>(from), buffer);

auto nbytes = buffer.Length();
const auto nbytes = buffer.Length();
std::size_t szBufCounts = 0;
R__ASSERT(nbytes >= 0);
if (static_cast<std::size_t>(nbytes) > kMaxSmallBuffer) {
const std::uint64_t nCounts = buffer.GetByteCounts().size();
szBufCounts = sizeof(std::uint64_t) * (2 * nCounts + 1);
auto bufCounts = Internal::MakeUninitArray<unsigned char>(szBufCounts);
std::size_t pos = Internal::RNTupleSerializer::SerializeUInt64(nCounts, bufCounts.get());
for (const auto &[bcountLoc, bcountVal] : buffer.GetByteCounts()) {
pos += Internal::RNTupleSerializer::SerializeUInt64(bcountLoc, bufCounts.get() + pos);
pos += Internal::RNTupleSerializer::SerializeUInt64(bcountVal, bufCounts.get() + pos);
}
assert(pos == szBufCounts);
fAuxiliaryColumn->AppendV(bufCounts.get(), szBufCounts);
fIndex += szBufCounts;
} else {
assert(buffer.GetByteCounts().empty());
}

fAuxiliaryColumn->AppendV(buffer.Buffer(), buffer.Length());
fIndex += nbytes;
fPrincipalColumn->Append(&fIndex);
return nbytes + fPrincipalColumn->GetElement()->GetPackedSize();
return szBufCounts + nbytes + fPrincipalColumn->GetElement()->GetPackedSize();
}

void ROOT::RStreamerField::ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to)
Expand All @@ -1329,7 +1348,41 @@ void ROOT::RStreamerField::ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *
ROOT::NTupleSize_t nbytes;
fPrincipalColumn->GetCollectionInfo(globalIndex, &collectionStart, &nbytes);

TBufferFile::ByteCountFinder_t byteCounts;
if (nbytes > kMaxSmallBuffer) {
std::vector<unsigned char> bufCounts(sizeof(std::uint64_t));
fAuxiliaryColumn->ReadV(collectionStart, sizeof(std::uint64_t), bufCounts.data());
std::uint64_t nCounts;
std::size_t pos = Internal::RNTupleSerializer::DeserializeUInt64(bufCounts.data(), nCounts);
if (nCounts > (std::numeric_limits<std::size_t>::max() / sizeof(std::uint64_t)) / 2 - 1)
throw RException(R__FAIL("invalid byte count size in streamer field: " + std::to_string(nCounts)));
const std::size_t szBufCounts = sizeof(std::uint64_t) * (2 * nCounts + 1);
if (szBufCounts > nbytes)
throw RException(R__FAIL("invalid byte count size in streamer field: " + std::to_string(nCounts)));
bufCounts.resize(szBufCounts);
nbytes -= szBufCounts;
fAuxiliaryColumn->ReadV(collectionStart + sizeof(uint64_t), szBufCounts - sizeof(uint64_t),
bufCounts.data() + sizeof(uint64_t));
collectionStart = collectionStart + szBufCounts;

byteCounts.reserve(nCounts);
for (std::uint64_t i = 0; i < nCounts; ++i) {
std::uint64_t bcountLoc, bcountVal;
pos += Internal::RNTupleSerializer::DeserializeUInt64(bufCounts.data() + pos, bcountLoc);
pos += Internal::RNTupleSerializer::DeserializeUInt64(bufCounts.data() + pos, bcountVal);
if ((bcountLoc > nbytes) || (bcountVal > nbytes) || (nbytes - bcountVal < bcountLoc)) {
throw RException(R__FAIL("invalid byte count record: " + std::to_string(bcountLoc) + ", " +
std::to_string(bcountVal)));
}
byteCounts.emplace(bcountLoc, bcountVal);
}
assert(pos == szBufCounts);
if (byteCounts.size() != nCounts)
throw RException(R__FAIL("duplicate byte counts"));
}

TBufferFile buffer(TBuffer::kRead, nbytes);
buffer.SetByteCounts(std::move(byteCounts));
fAuxiliaryColumn->ReadV(collectionStart, nbytes, buffer.Buffer());
fClass->Streamer(to, buffer);
}
Expand Down Expand Up @@ -1362,7 +1415,13 @@ std::unique_ptr<ROOT::RFieldBase> ROOT::RStreamerField::BeforeConnectPageSource(

void ROOT::RStreamerField::ReconcileOnDiskField(const RNTupleDescriptor &desc)
{
EnsureMatchingOnDiskField(desc, kDiffTypeName | kDiffTypeVersion).ThrowOnError();
EnsureMatchingOnDiskField(desc, kDiffTypeName | kDiffTypeVersion | kDiffFieldVersion).ThrowOnError();
const auto &fieldDesc = desc.GetFieldDescriptor(GetOnDiskId());
if (fieldDesc.GetFieldVersion() > 1) {
throw RException(R__FAIL("RStreamerField " + GetQualifiedFieldName() + " has unsupported field version " +
std::to_string(fieldDesc.GetFieldVersion()) + "\n" +
Internal::GetTypeTraceReport(*this, desc)));
}
}

void ROOT::RStreamerField::ConstructValue(void *where) const
Expand Down
5 changes: 5 additions & 0 deletions tree/ntuple/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ ROOT_GENERATE_DICTIONARY(StreamerFieldDict ${CMAKE_CURRENT_SOURCE_DIR}/StreamerF
MODULE rfield_streamer LINKDEF StreamerFieldLinkDef.h OPTIONS -inlineInputHeader
DEPENDENCIES RIO)

ROOT_ADD_GTEST(rfield_streamer_beyond rfield_streamer_beyond.cxx StreamerBeyond.cxx LIBRARIES ROOTNTuple)
ROOT_GENERATE_DICTIONARY(StreamerBeyondDict ${CMAKE_CURRENT_SOURCE_DIR}/StreamerBeyond.hxx
MODULE rfield_streamer_beyond LINKDEF StreamerBeyondLinkDef.h OPTIONS -inlineInputHeader
DEPENDENCIES RIO)

if(MSVC)
set(command ${CMAKE_COMMAND} -E env "ROOTIGNOREPREFIX=1" $<TARGET_FILE:genreflex>)
else()
Expand Down
5 changes: 5 additions & 0 deletions tree/ntuple/test/CustomStruct.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -478,4 +478,9 @@ struct MemberWithCustomStreamer {
ClassDefNV(MemberWithCustomStreamer, 2);
};

struct VersionedStreamerField
{
ClassDefNV(VersionedStreamerField, 2);
};

#endif
2 changes: 2 additions & 0 deletions tree/ntuple/test/CustomStructLinkDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,6 @@

#pragma link C++ class MemberWithCustomStreamer+;

#pragma link C++ class VersionedStreamerField+;

#endif
1 change: 1 addition & 0 deletions tree/ntuple/test/StreamerBeyond.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#include "StreamerBeyond.hxx"
14 changes: 14 additions & 0 deletions tree/ntuple/test/StreamerBeyond.hxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifndef ROOT_RNTuple_Test_StreamerBeyond
#define ROOT_RNTuple_Test_StreamerBeyond

#include <Rtypes.h>

#include <cstdint>
#include <vector>

struct StreamerBeyond {
std::vector<std::int64_t> fOne;
std::vector<std::int64_t> fTwo;
};

#endif
5 changes: 5 additions & 0 deletions tree/ntuple/test/StreamerBeyondLinkDef.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#ifdef __CLING__

#pragma link C++ options=rntupleStreamerMode(true) struct StreamerBeyond+;

#endif
10 changes: 9 additions & 1 deletion tree/ntuple/test/rfield_class.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,15 @@ TEST(RNTuple, TClass) {
EXPECT_THROW(model->MakeField<TDatime>("datime"), ROOT::RException);

FileRaii fileGuard("test_ntuple_tclass.root");
auto ntuple = RNTupleWriter::Recreate(std::move(model), "f", fileGuard.GetPath());
{
RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath());
}
auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath());
const auto &f = reader->GetModel().GetConstField("klass");
EXPECT_EQ(0u, f.GetFieldVersion());
EXPECT_EQ(0u, f.GetOnDiskFieldVersion());
EXPECT_EQ(TClass::GetClass("CustomStruct")->GetClassVersion(), f.GetOnDiskTypeVersion());
EXPECT_EQ(TClass::GetClass("CustomStruct")->GetClassVersion(), f.GetTypeVersion());
}

TEST(RNTuple, CyclicClass)
Expand Down
71 changes: 71 additions & 0 deletions tree/ntuple/test/rfield_streamer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -407,3 +407,74 @@ TEST(RField, StreamerClassMismatch)
false /* matchFullMessage */);
reader->LoadEntry(0);
}

namespace {

/// Used to create on-disk streamer fields with different field versions
class RVersionedStreamerField : public RFieldBase {
protected:
std::unique_ptr<RFieldBase> CloneImpl(std::string_view newName) const final
{
return std::make_unique<RVersionedStreamerField>(newName, fCustomVersion);
}

const RColumnRepresentations &GetColumnRepresentations() const final
{
static RColumnRepresentations representations(
{{ROOT::ENTupleColumnType::kSplitIndex64, ROOT::ENTupleColumnType::kByte}}, {});
return representations;
}

void GenerateColumns() final
{
GenerateColumnsImpl<ROOT::Internal::RColumnIndex, std::byte>();
}
void GenerateColumns(const ROOT::RNTupleDescriptor &) final {}

void ConstructValue(void *) const final {}

std::size_t AppendImpl(const void *) final { return 0; }

public:
std::uint32_t fCustomVersion = 0;

RVersionedStreamerField(std::string_view name, std::uint32_t version)
: RFieldBase(name, "VersionedStreamerField", ROOT::ENTupleStructure::kStreamer, /*isSimple=*/false),
fCustomVersion(version)
{}

std::uint32_t GetFieldVersion() const final { return fCustomVersion; }
std::uint32_t GetTypeVersion() const final { return 137; }
std::size_t GetValueSize() const final { return 0; }
std::size_t GetAlignment() const final { return 0; }
};

} // anonymous namespace

TEST(RField, StreamerFieldVersion)
{
for (std::uint32_t version: {0, 1, 2}) {
FileRaii fileGuard("test_ntuple_rfield_streamer_version.root");
{
auto model = RNTupleModel::Create();
model->AddField(std::make_unique<RVersionedStreamerField>("f", version));
auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath());
}
auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath());
if (version < 2) {
const auto &f = reader->GetModel().GetConstField("f");
EXPECT_TRUE(dynamic_cast<const ROOT::RStreamerField *>(&f));
EXPECT_EQ(1u, f.GetFieldVersion());
EXPECT_EQ(version, f.GetOnDiskFieldVersion());
EXPECT_EQ(2u, f.GetTypeVersion());
EXPECT_EQ(137u, f.GetOnDiskTypeVersion());
} else {
try {
reader->GetModel().GetConstField("f");
FAIL() << "creating model from unsupported field version should fail";
} catch (const ROOT::RException &e) {
EXPECT_THAT(e.what(), ::testing::HasSubstr("RStreamerField f has unsupported field version 2"));
}
}
}
}
Loading
Loading