diff --git a/tree/ntuple/inc/ROOT/RPageStorage.hxx b/tree/ntuple/inc/ROOT/RPageStorage.hxx index b3b82482303f6..02e39aa41cd11 100644 --- a/tree/ntuple/inc/ROOT/RPageStorage.hxx +++ b/tree/ntuple/inc/ROOT/RPageStorage.hxx @@ -568,7 +568,7 @@ The page source also gives access to the ntuple's metadata. // clang-format on class RPageSource : public RPageStorage { protected: - /// Summarizes meta-data necessary to load a certain page. Used by LoadPageImpl(). + /// Summarizes meta-data necessary to load a certain page. Used by LoadPageFromSummary(). struct RPageSummary { ROOT::DescriptorId_t fClusterId = 0; /// The first element number of the page's column in the given cluster @@ -631,6 +631,37 @@ public: }; private: + /// Keeps track of the requested physical column IDs and their in-memory target type via a column element identifier. + /// When using alias columns (projected fields), physical columns may be requested multiple times. + class RActivePhysicalColumns { + public: + struct RColumnInfo { + ROOT::Internal::RColumnElementBase::RIdentifier fElementId; + std::size_t fRefCounter = 0; + }; + + private: + /// Maps physical column IDs to all the requested in-memory representations. + /// A pair of physical column ID and in-memory representation can be requested multiple times, which is + /// indicated by the reference counter. + /// We can only have a handful of possible in-memory representations for a given column, + /// so it is fine to search them linearly. + std::unordered_map> fColumnInfos; + + public: + void Insert(ROOT::DescriptorId_t physicalColumnId, ROOT::Internal::RColumnElementBase::RIdentifier elementId); + void Erase(ROOT::DescriptorId_t physicalColumnId, ROOT::Internal::RColumnElementBase::RIdentifier elementId); + ROOT::Internal::RCluster::ColumnSet_t ToColumnSet() const; + bool HasColumnInfos(ROOT::DescriptorId_t physicalColumnId) const + { + return fColumnInfos.count(physicalColumnId) > 0; + } + const std::vector &GetColumnInfos(ROOT::DescriptorId_t physicalColumnId) const + { + return fColumnInfos.at(physicalColumnId); + } + }; + ROOT::RNTupleDescriptor fDescriptor; mutable std::shared_mutex fDescriptorLock; REntryRange fEntryRange; ///< Used by the cluster pool to prevent reading beyond the given range @@ -638,6 +669,18 @@ private: bool fIsAttached = false; ///< Set to true once `Attach()` is called bool fHasStreamerInfosRegistered = false; ///< Set to true when RegisterStreamerInfos() is called. + /// The active columns are implicitly defined by the model fields or views + RActivePhysicalColumns fActivePhysicalColumns; + + /// The cluster pool asynchronously preloads the next few clusters. Note that derived classes should call + /// StopClusterPoolBackgroundThread() in their destructor so that the I/O background thread does not call + /// methods from the destructed derived class. + ROOT::Internal::RClusterPool fClusterPool; + /// The last cluster from which a page got loaded. Points into fClusterPool->fPool + ROOT::Internal::RCluster *fCurrentCluster = nullptr; + /// Pages that are unzipped with IMT are staged into the page pool + ROOT::Internal::RPagePool fPagePool; + /// Remembers the last cluster id from which a page was requested ROOT::DescriptorId_t fLastUsedCluster = ROOT::kInvalidDescriptorId; /// Clusters from where pages got preloaded in UnzipClusterImpl(), ordered by first entry number @@ -645,13 +688,21 @@ private: /// previous clusters are evicted from the page pool. Pinned clusters won't be evicted. std::map fPreloadedClusters; + /// Pinned clusters and their $2 * (cluster bunch size) - 1$ successors will not be evicted from the cluster pool. + /// Pages of pinned clusters won't be evicted from the page pool. + std::unordered_set fPinnedClusters; + /// Does nothing if fLastUsedCluster == clusterId. Otherwise, updated fLastUsedCluster /// and evict unused paged from the page pool of all previous clusters. /// Must not be called when the descriptor guard is taken. void UpdateLastUsedCluster(ROOT::DescriptorId_t clusterId); - // Common treatment of zero pages that would otherwise need to be handled in LoadPageImpl() + // Common treatment of zero pages in LoadPageFromSummary() ROOT::Internal::RPageRef LoadZeroPage(ColumnHandle_t columnHandle, const RPageSummary &pageSummary); + // Once the page is found to be missing in the page cache and all information about the page is collected, + // either using a global or a local element index, perform the common page loading tasks using the page summary. + // This includes zero page handling and prefetching the corresponding cluster. + ROOT::Internal::RPageRef LoadPageFromSummary(ColumnHandle_t columnHandle, const RPageSummary &pageSummary); protected: /// Default I/O performance counters that get registered in `fMetrics` @@ -675,52 +726,9 @@ protected: ROOT::Experimental::Detail::RNTupleCalcPerf &fCompressionRatio; }; - /// Keeps track of the requested physical column IDs and their in-memory target type via a column element identifier. - /// When using alias columns (projected fields), physical columns may be requested multiple times. - class RActivePhysicalColumns { - public: - struct RColumnInfo { - ROOT::Internal::RColumnElementBase::RIdentifier fElementId; - std::size_t fRefCounter = 0; - }; - - private: - /// Maps physical column IDs to all the requested in-memory representations. - /// A pair of physical column ID and in-memory representation can be requested multiple times, which is - /// indicated by the reference counter. - /// We can only have a handful of possible in-memory representations for a given column, - /// so it is fine to search them linearly. - std::unordered_map> fColumnInfos; - - public: - void Insert(ROOT::DescriptorId_t physicalColumnId, ROOT::Internal::RColumnElementBase::RIdentifier elementId); - void Erase(ROOT::DescriptorId_t physicalColumnId, ROOT::Internal::RColumnElementBase::RIdentifier elementId); - ROOT::Internal::RCluster::ColumnSet_t ToColumnSet() const; - bool HasColumnInfos(ROOT::DescriptorId_t physicalColumnId) const - { - return fColumnInfos.count(physicalColumnId) > 0; - } - const std::vector &GetColumnInfos(ROOT::DescriptorId_t physicalColumnId) const - { - return fColumnInfos.at(physicalColumnId); - } - }; - std::unique_ptr fCounters; ROOT::RNTupleReadOptions fOptions; - /// The active columns are implicitly defined by the model fields or views - RActivePhysicalColumns fActivePhysicalColumns; - /// Pinned clusters and their $2 * (cluster bunch size) - 1$ successors will not be evicted from the cluster pool. - /// Pages of pinned clusters won't be evicted from the page pool. - std::unordered_set fPinnedClusters; - - /// The cluster pool asynchronously preloads the next few clusters. Note that derived classes should call - /// fClusterPool.StopBackgroundThread() in their destructor so that the I/O background thread does not call - /// methods from the destructed derived class. - ROOT::Internal::RClusterPool fClusterPool; - /// Pages that are unzipped with IMT are staged into the page pool - ROOT::Internal::RPagePool fPagePool; virtual void LoadStructureImpl() = 0; /// `LoadStructureImpl()` has been called before `AttachImpl()` is called @@ -729,8 +737,6 @@ protected: virtual std::unique_ptr CloneImpl() const = 0; // Only called if a task scheduler is set. No-op be default. virtual void UnzipClusterImpl(ROOT::Internal::RCluster *cluster); - // Returns a page from storage if not found in the page pool. Will never receive requests for zero pages. - virtual ROOT::Internal::RPageRef LoadPageImpl(ColumnHandle_t columnHandle, const RPageSummary &pageSummary) = 0; // Returns a sealed page from storage without adding it to the page pool. The sealed pages buffer and buffer size // is already initialized. virtual void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) = 0; @@ -754,6 +760,9 @@ protected: /// Note that the underlying lock is not recursive. See `GetSharedDescriptorGuard()` for further information. RExclDescriptorGuard GetExclDescriptorGuard() { return RExclDescriptorGuard(fDescriptor, fDescriptorLock); } + // To be called in the destructor of derived classes + void StopClusterPoolBackgroundThread() { fClusterPool.StopBackgroundThread(); } + public: RPageSource(std::string_view ntupleName, const ROOT::RNTupleReadOptions &fOptions); RPageSource(const RPageSource &) = delete; @@ -809,8 +818,9 @@ public: void SetEntryRange(const REntryRange &range); REntryRange GetEntryRange() const { return fEntryRange; } - /// Allocates and fills a page that contains the index-th element. The default implementation searches - /// the page and calls LoadPageImpl(). Returns a default-constructed RPage for suppressed columns. + /// Allocates and fills a page that contains the index-th element. Calls into the concrete page source + /// for loading the corresponding sealed page of cluster where necessary. + /// Returns a default-constructed RPage for suppressed columns. virtual ROOT::Internal::RPageRef LoadPage(ColumnHandle_t columnHandle, ROOT::NTupleSize_t globalIndex); /// Another version of `LoadPage` that allows to specify cluster-relative indexes. /// Returns a default-constructed RPage for suppressed columns. diff --git a/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx b/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx index f2a6580d7d793..758db599a9134 100644 --- a/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx +++ b/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx @@ -159,7 +159,6 @@ private: ROOT::Internal::RNTupleDescriptorBuilder fDescriptorBuilder; - ROOT::Internal::RPageRef LoadPageImpl(ColumnHandle_t columnHandle, const RPageSummary &pageSummary) final; void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) final; protected: diff --git a/tree/ntuple/inc/ROOT/RPageStorageFile.hxx b/tree/ntuple/inc/ROOT/RPageStorageFile.hxx index 7cd74993f2451..abc0c8bf2333f 100644 --- a/tree/ntuple/inc/ROOT/RPageStorageFile.hxx +++ b/tree/ntuple/inc/ROOT/RPageStorageFile.hxx @@ -180,7 +180,6 @@ protected: /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data. std::unique_ptr CloneImpl() const final; - RPageRef LoadPageImpl(ColumnHandle_t columnHandle, const RPageSummary &pageSummary) final; void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) final; public: diff --git a/tree/ntuple/src/RPageStorage.cxx b/tree/ntuple/src/RPageStorage.cxx index 825e183b379aa..2cec2e82d1981 100644 --- a/tree/ntuple/src/RPageStorage.cxx +++ b/tree/ntuple/src/RPageStorage.cxx @@ -165,9 +165,9 @@ bool ROOT::Internal::RPageSource::REntryRange::IntersectsWith(const ROOT::RClust ROOT::Internal::RPageSource::RPageSource(std::string_view name, const ROOT::RNTupleReadOptions &options) : RPageStorage(name), - fOptions(options), - fClusterPool(*this, ROOT::Internal::RNTupleReadOptionsManip::GetClusterBunchSize(fOptions)), - fPagePool(*this) + fClusterPool(*this, ROOT::Internal::RNTupleReadOptionsManip::GetClusterBunchSize(options)), + fPagePool(*this), + fOptions(options) { } @@ -440,6 +440,70 @@ ROOT::Internal::RPageSource::LoadZeroPage(ColumnHandle_t columnHandle, const RPa return fPagePool.RegisterPage(std::move(pageZero), RPagePool::RKey{columnHandle.fPhysicalId, elementInMemoryType}); } +ROOT::Internal::RPageRef +ROOT::Internal::RPageSource::LoadPageFromSummary(ColumnHandle_t columnHandle, const RPageSummary &pageSummary) +{ + if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypeUnknown) { + throw RException(R__FAIL("tried to read a page with an unknown locator")); + } else if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero) { + return LoadZeroPage(columnHandle, pageSummary); + } + + const auto &columnId = columnHandle.fPhysicalId; + const auto &clusterId = pageSummary.fClusterId; + const auto &pageInfo = pageSummary.fPageInfo; + + const auto element = columnHandle.fColumn->GetElement(); + const auto elementSize = element->GetSize(); + const auto elementInMemoryType = element->GetIdentifier().fInMemoryType; + + UpdateLastUsedCluster(clusterId); + + RSealedPage sealedPage; + sealedPage.SetNElements(pageInfo.GetNElements()); + sealedPage.SetHasChecksum(pageInfo.HasChecksum()); + sealedPage.SetBufferSize(pageInfo.GetLocator().GetNBytesOnStorage() + pageInfo.HasChecksum() * kNBytesPageChecksum); + std::unique_ptr directReadBuffer; // only used if cluster pool is turned off + + if (fOptions.GetClusterCache() == ROOT::RNTupleReadOptions::EClusterCache::kOff) { + directReadBuffer = MakeUninitArray(sealedPage.GetBufferSize()); + sealedPage.SetBuffer(directReadBuffer.get()); + LoadSealedPageImpl(pageInfo.GetLocator(), sealedPage); + + fCounters->fNPageRead.Inc(); + fCounters->fNRead.Inc(); + fCounters->fSzReadPayload.Add(sealedPage.GetBufferSize()); + } else { + if (!fCurrentCluster || (fCurrentCluster->GetId() != clusterId) || !fCurrentCluster->ContainsColumn(columnId)) + fCurrentCluster = fClusterPool.GetCluster(clusterId, fActivePhysicalColumns.ToColumnSet()); + R__ASSERT(fCurrentCluster->ContainsColumn(columnId)); + + // The cluster pool may have unzipped the required page into the page pool + auto cachedPageRef = fPagePool.GetPage(ROOT::Internal::RPagePool::RKey{columnId, elementInMemoryType}, + RNTupleLocalIndex(clusterId, pageInfo.GetFirstElementIndex())); + if (!cachedPageRef.Get().IsNull()) + return cachedPageRef; + + ROnDiskPage::Key key(columnId, pageInfo.GetPageNumber()); + auto onDiskPage = fCurrentCluster->GetOnDiskPage(key); + R__ASSERT(onDiskPage && (sealedPage.GetBufferSize() == onDiskPage->GetSize())); + sealedPage.SetBuffer(onDiskPage->GetAddress()); + } + + ROOT::Internal::RPage newPage; + { + RNTupleAtomicTimer timer(fCounters->fTimeWallUnzip, fCounters->fTimeCpuUnzip); + newPage = UnsealPage(sealedPage, *element).Unwrap(); + fCounters->fSzUnzip.Add(elementSize * pageInfo.GetNElements()); + } + + newPage.SetWindow(pageSummary.fColumnOffset + pageInfo.GetFirstElementIndex(), + ROOT::Internal::RPage::RClusterInfo(clusterId, pageSummary.fColumnOffset)); + fCounters->fNPageUnsealed.Inc(); + + return fPagePool.RegisterPage(std::move(newPage), RPagePool::RKey{columnId, elementInMemoryType}); +} + ROOT::Internal::RPageRef ROOT::Internal::RPageSource::LoadPage(ColumnHandle_t columnHandle, ROOT::NTupleSize_t globalIndex) { @@ -470,14 +534,7 @@ ROOT::Internal::RPageSource::LoadPage(ColumnHandle_t columnHandle, ROOT::NTupleS pageSummary.fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(globalIndex - pageSummary.fColumnOffset); } - if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypeUnknown) { - throw RException(R__FAIL("tried to read a page with an unknown locator")); - } else if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero) { - return LoadZeroPage(columnHandle, pageSummary); - } - - UpdateLastUsedCluster(pageSummary.fClusterId); - return LoadPageImpl(columnHandle, pageSummary); + return LoadPageFromSummary(columnHandle, pageSummary); } ROOT::Internal::RPageRef @@ -509,14 +566,7 @@ ROOT::Internal::RPageSource::LoadPage(ColumnHandle_t columnHandle, RNTupleLocalI pageSummary.fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(localIndex.GetIndexInCluster()); } - if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypeUnknown) { - throw RException(R__FAIL("tried to read a page with an unknown locator")); - } else if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero) { - return LoadZeroPage(columnHandle, pageSummary); - } - - UpdateLastUsedCluster(clusterId); - return LoadPageImpl(columnHandle, pageSummary); + return LoadPageFromSummary(columnHandle, pageSummary); } void ROOT::Internal::RPageSource::EnableDefaultMetrics(const std::string &prefix) diff --git a/tree/ntuple/src/RPageStorageDaos.cxx b/tree/ntuple/src/RPageStorageDaos.cxx index abdefd355d7a1..b087a1973275f 100644 --- a/tree/ntuple/src/RPageStorageDaos.cxx +++ b/tree/ntuple/src/RPageStorageDaos.cxx @@ -452,7 +452,7 @@ ROOT::Experimental::Internal::RPageSourceDaos::RPageSourceDaos(std::string_view ROOT::Experimental::Internal::RPageSourceDaos::~RPageSourceDaos() { - fClusterPool.StopBackgroundThread(); + StopClusterPoolBackgroundThread(); } ROOT::RNTupleDescriptor @@ -504,63 +504,6 @@ void ROOT::Experimental::Internal::RPageSourceDaos::LoadSealedPageImpl(const RNT daosKey.fDkey, daosKey.fAkey); } -ROOT::Internal::RPageRef ROOT::Experimental::Internal::RPageSourceDaos::LoadPageImpl(ColumnHandle_t columnHandle, - const RPageSummary &pageSummary) -{ - const auto &columnId = columnHandle.fPhysicalId; - const auto &clusterId = pageSummary.fClusterId; - const auto &pageInfo = pageSummary.fPageInfo; - - const auto element = columnHandle.fColumn->GetElement(); - const auto elementSize = element->GetSize(); - const auto elementInMemoryType = element->GetIdentifier().fInMemoryType; - - RSealedPage sealedPage; - sealedPage.SetNElements(pageInfo.GetNElements()); - sealedPage.SetHasChecksum(pageInfo.HasChecksum()); - sealedPage.SetBufferSize(pageInfo.GetLocator().GetNBytesOnStorage() + pageInfo.HasChecksum() * kNBytesPageChecksum); - std::unique_ptr directReadBuffer; // only used if cluster pool is turned off - - if (fOptions.GetClusterCache() == ROOT::RNTupleReadOptions::EClusterCache::kOff) { - directReadBuffer = MakeUninitArray(sealedPage.GetBufferSize()); - RDaosKey daosKey = - GetPageDaosKey(fNTupleIndex, pageInfo.GetLocator().GetPosition().GetLocation()); - fDaosContainer->ReadSingleAkey(directReadBuffer.get(), sealedPage.GetBufferSize(), daosKey.fOid, daosKey.fDkey, - daosKey.fAkey); - fCounters->fNPageRead.Inc(); - fCounters->fNRead.Inc(); - fCounters->fSzReadPayload.Add(sealedPage.GetBufferSize()); - sealedPage.SetBuffer(directReadBuffer.get()); - } else { - if (!fCurrentCluster || (fCurrentCluster->GetId() != clusterId) || !fCurrentCluster->ContainsColumn(columnId)) - fCurrentCluster = fClusterPool.GetCluster(clusterId, fActivePhysicalColumns.ToColumnSet()); - R__ASSERT(fCurrentCluster->ContainsColumn(columnId)); - - // The cluster pool may have unzipped the required page into the page pool - auto cachedPageRef = fPagePool.GetPage(ROOT::Internal::RPagePool::RKey{columnId, elementInMemoryType}, - RNTupleLocalIndex(clusterId, pageInfo.GetFirstElementIndex())); - if (!cachedPageRef.Get().IsNull()) - return cachedPageRef; - - ROOT::Internal::ROnDiskPage::Key key(columnId, pageInfo.GetPageNumber()); - auto onDiskPage = fCurrentCluster->GetOnDiskPage(key); - R__ASSERT(onDiskPage && (sealedPage.GetBufferSize() == onDiskPage->GetSize())); - sealedPage.SetBuffer(onDiskPage->GetAddress()); - } - - ROOT::Internal::RPage newPage; - { - Detail::RNTupleAtomicTimer timer(fCounters->fTimeWallUnzip, fCounters->fTimeCpuUnzip); - newPage = UnsealPage(sealedPage, *element).Unwrap(); - fCounters->fSzUnzip.Add(elementSize * pageInfo.GetNElements()); - } - - newPage.SetWindow(pageSummary.fColumnOffset + pageInfo.GetFirstElementIndex(), - ROOT::Internal::RPage::RClusterInfo(clusterId, pageSummary.fColumnOffset)); - fCounters->fNPageUnsealed.Inc(); - return fPagePool.RegisterPage(std::move(newPage), ROOT::Internal::RPagePool::RKey{columnId, elementInMemoryType}); -} - std::unique_ptr ROOT::Experimental::Internal::RPageSourceDaos::CloneImpl() const { auto clone = new RPageSourceDaos(fNTupleName, fURI, fOptions); diff --git a/tree/ntuple/src/RPageStorageFile.cxx b/tree/ntuple/src/RPageStorageFile.cxx index 522727e52d089..a90565b07a973 100644 --- a/tree/ntuple/src/RPageStorageFile.cxx +++ b/tree/ntuple/src/RPageStorageFile.cxx @@ -414,7 +414,7 @@ ROOT::Internal::RPageSourceFile::CreateFromAnchor(const RNTuple &anchor, const R ROOT::Internal::RPageSourceFile::~RPageSourceFile() { - fClusterPool.StopBackgroundThread(); + StopClusterPoolBackgroundThread(); } std::unique_ptr @@ -520,74 +520,18 @@ ROOT::RNTupleDescriptor ROOT::Internal::RPageSourceFile::AttachImpl(RNTupleSeria void ROOT::Internal::RPageSourceFile::LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) { + RNTupleAtomicTimer timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead); + const auto offset = locator.GetPosition(); + // Track seek distance (excluding file structure reads) + if (fLastOffset != 0) { + R__ASSERT(fFileCounters); + const auto distance = static_cast(std::abs( + static_cast(offset) - static_cast(fLastOffset))); + fFileCounters->fSzSkip.Add(distance); + } fReader.ReadBuffer(const_cast(sealedPage.GetBuffer()), sealedPage.GetBufferSize(), locator.GetPosition()); -} - -ROOT::Internal::RPageRef -ROOT::Internal::RPageSourceFile::LoadPageImpl(ColumnHandle_t columnHandle, const RPageSummary &pageSummary) -{ - const auto &columnId = columnHandle.fPhysicalId; - const auto &clusterId = pageSummary.fClusterId; - const auto &pageInfo = pageSummary.fPageInfo; - - const auto element = columnHandle.fColumn->GetElement(); - const auto elementSize = element->GetSize(); - const auto elementInMemoryType = element->GetIdentifier().fInMemoryType; - - RSealedPage sealedPage; - sealedPage.SetNElements(pageInfo.GetNElements()); - sealedPage.SetHasChecksum(pageInfo.HasChecksum()); - sealedPage.SetBufferSize(pageInfo.GetLocator().GetNBytesOnStorage() + pageInfo.HasChecksum() * kNBytesPageChecksum); - std::unique_ptr directReadBuffer; // only used if cluster pool is turned off - - if (fOptions.GetClusterCache() == ROOT::RNTupleReadOptions::EClusterCache::kOff) { - directReadBuffer = MakeUninitArray(sealedPage.GetBufferSize()); - { - RNTupleAtomicTimer timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead); - const auto offset = pageInfo.GetLocator().GetPosition(); - // Track seek distance (excluding file structure reads) - R__ASSERT(fFileCounters); - if (fLastOffset != 0) { - const auto distance = static_cast(std::abs( - static_cast(offset) - static_cast(fLastOffset))); - fFileCounters->fSzSkip.Add(distance); - } - fReader.ReadBuffer(directReadBuffer.get(), sealedPage.GetBufferSize(), offset); - fLastOffset = offset + sealedPage.GetBufferSize(); - } - fCounters->fNPageRead.Inc(); - fCounters->fNRead.Inc(); - fCounters->fSzReadPayload.Add(sealedPage.GetBufferSize()); - sealedPage.SetBuffer(directReadBuffer.get()); - } else { - if (!fCurrentCluster || (fCurrentCluster->GetId() != clusterId) || !fCurrentCluster->ContainsColumn(columnId)) - fCurrentCluster = fClusterPool.GetCluster(clusterId, fActivePhysicalColumns.ToColumnSet()); - R__ASSERT(fCurrentCluster->ContainsColumn(columnId)); - - // The cluster pool may have unzipped the required page into the page pool - auto cachedPageRef = fPagePool.GetPage(ROOT::Internal::RPagePool::RKey{columnId, elementInMemoryType}, - RNTupleLocalIndex(clusterId, pageInfo.GetFirstElementIndex())); - if (!cachedPageRef.Get().IsNull()) - return cachedPageRef; - - ROnDiskPage::Key key(columnId, pageInfo.GetPageNumber()); - auto onDiskPage = fCurrentCluster->GetOnDiskPage(key); - R__ASSERT(onDiskPage && (sealedPage.GetBufferSize() == onDiskPage->GetSize())); - sealedPage.SetBuffer(onDiskPage->GetAddress()); - } - - ROOT::Internal::RPage newPage; - { - RNTupleAtomicTimer timer(fCounters->fTimeWallUnzip, fCounters->fTimeCpuUnzip); - newPage = UnsealPage(sealedPage, *element).Unwrap(); - fCounters->fSzUnzip.Add(elementSize * pageInfo.GetNElements()); - } - - newPage.SetWindow(pageSummary.fColumnOffset + pageInfo.GetFirstElementIndex(), - ROOT::Internal::RPage::RClusterInfo(clusterId, pageSummary.fColumnOffset)); - fCounters->fNPageUnsealed.Inc(); - return fPagePool.RegisterPage(std::move(newPage), RPagePool::RKey{columnId, elementInMemoryType}); + fLastOffset = offset + sealedPage.GetBufferSize(); } std::unique_ptr ROOT::Internal::RPageSourceFile::CloneImpl() const diff --git a/tree/ntuple/test/ntuple_cluster.cxx b/tree/ntuple/test/ntuple_cluster.cxx index 882c9be8b2cab..9b321c28318f5 100644 --- a/tree/ntuple/test/ntuple_cluster.cxx +++ b/tree/ntuple/test/ntuple_cluster.cxx @@ -41,7 +41,6 @@ class RPageSourceMock : public RPageSource { void LoadStructureImpl() final {} RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode) final { return RNTupleDescriptor(); } std::unique_ptr CloneImpl() const final { return nullptr; } - RPageRef LoadPageImpl(ColumnHandle_t, const RPageSummary &) final { return RPageRef(); } void LoadSealedPageImpl(const ROOT::RNTupleLocator &, RSealedPage &) final {} void LoadStreamerInfo() final {} std::unique_ptr diff --git a/tree/ntuple/test/ntuple_endian.cxx b/tree/ntuple/test/ntuple_endian.cxx index 35cb3efd9684b..90fe9d8aaa90d 100644 --- a/tree/ntuple/test/ntuple_endian.cxx +++ b/tree/ntuple/test/ntuple_endian.cxx @@ -83,17 +83,17 @@ class RPageSinkMock : public RPageSink { * An RPageSource that can be constructed given the raw content of its pages and unpacks them on demand */ class RPageSourceMock : public RPageSource { -protected: const RColumnElementBase &fElement; const std::vector &fPages; + ROOT::Internal::RPagePool fMyPagePool; +protected: void LoadStructureImpl() final {} RNTupleDescriptor AttachImpl(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode) final { return RNTupleDescriptor(); } std::unique_ptr CloneImpl() const final { return nullptr; } - RPageRef LoadPageImpl(ColumnHandle_t, const RPageSummary &) final { return RPageRef(); } void LoadSealedPageImpl(const ROOT::RNTupleLocator &, RSealedPage &) final {} void LoadStreamerInfo() final {} @@ -105,7 +105,7 @@ class RPageSourceMock : public RPageSource { public: RPageSourceMock(const std::vector &pages, const RColumnElementBase &elt) - : RPageSource("test", ROOT::RNTupleReadOptions()), fElement(elt), fPages(pages) + : RPageSource("test", ROOT::RNTupleReadOptions()), fElement(elt), fPages(pages), fMyPagePool(*this) { } @@ -113,7 +113,7 @@ class RPageSourceMock : public RPageSource { { auto page = RPageSource::UnsealPage(fPages[i], fElement).Unwrap(); ROOT::Internal::RPagePool::RKey key{columnHandle.fPhysicalId, std::type_index(typeid(void))}; - return fPagePool.RegisterPage(std::move(page), key); + return fMyPagePool.RegisterPage(std::move(page), key); } RPageRef LoadPage(ColumnHandle_t, ROOT::RNTupleLocalIndex) final { return RPageRef(); } std::vector> LoadClusters(std::span) final { return {}; } diff --git a/tree/ntuple/test/ntuple_pages.cxx b/tree/ntuple/test/ntuple_pages.cxx index ee106e59567e6..aef3c5b4a0708 100644 --- a/tree/ntuple/test/ntuple_pages.cxx +++ b/tree/ntuple/test/ntuple_pages.cxx @@ -11,7 +11,6 @@ class RPageSourceMock : public RPageSource { void LoadStructureImpl() final {} RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode) final { return RNTupleDescriptor(); } std::unique_ptr CloneImpl() const final { return nullptr; } - RPageRef LoadPageImpl(ColumnHandle_t, const RPageSummary &) final { return RPageRef(); } void LoadSealedPageImpl(const ROOT::RNTupleLocator &, RSealedPage &) final {} void LoadStreamerInfo() final {} std::unique_ptr