Skip to content

Commit d931b6b

Browse files
shahor02davidrohr
authored andcommitted
Add POD version of TPCFastTransform
The TPCFastTransformPOD is a pointerless version of the TPCFastTransform. It can be created from the original TPCFastTransform as e.g. auto lold = o2::gpu::TPCFastTransform::loadFromFile("o2-gpu-TPCFastTransform.root","ccdb_object"); // load original transform std::vector<char> v; // one has to provide a vector (could be a std or pmr), which later can be messaged via DPL auto* pod = o2::gpu::TPCFastTransformPOD::create(v, *lold); // pointer pod is just v.data() cast to TPCFastTransformPOD* // run test: pod->test(*lold); [INFO] (ns per call) original this Nmissmatch [INFO] getCorrection 1.330e+02 1.400e+02 0 [INFO] getCorrectionInvCorrectedX 8.856e+01 8.434e+01 0 [INFO] getCorrectionInvUV 6.266e+01 6.142e+01 0 It can be also created directly from the TPCFastSpaceChargeCorrection as TPCFastSpaceChargeCorrection& oldCorr = lold->getCorrection(); auto* pod = o2::gpu::TPCFastTransformPOD::create(v, oldCorr); but in this case one should afterwards set the vdrift and t0 using provided getters. TPCFastTransformPOD replicates all the methods of the TPCFastTransform (and of the TPCFastSpaceChargeCorrection), including those which allow to query rescaled corrections (by providing refernce maps and scaling coefficients). Since the idea of this class is to create a final correction map as a weighted sum of different contribution and to distribute it to consumer processes via shared memory, also the query methods w/o rescaling are added, they have the suffix _new added. Eventually, the scalable legacy methods can be suppressed and the suffix new can be dropped.
1 parent f7517a9 commit d931b6b

File tree

5 files changed

+1163
-0
lines changed

5 files changed

+1163
-0
lines changed

GPU/TPCFastTransformation/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ set(SRCS
2626
TPCFastSpaceChargeCorrectionMap.cxx
2727
TPCFastTransform.cxx
2828
CorrectionMapsHelper.cxx
29+
TPCFastTransformPOD.cxx
2930
)
3031

3132
if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone")

GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ namespace gpu
4141
///
4242
class TPCFastSpaceChargeCorrection : public FlatObject
4343
{
44+
friend class TPCFastTransformPOD;
45+
4446
public:
4547
// obsolete structure, declared here only for backward compatibility
4648
struct SliceInfo {
Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2+
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3+
// All rights not expressly granted are reserved.
4+
//
5+
// This software is distributed under the terms of the GNU General Public
6+
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7+
//
8+
// In applying this license CERN does not waive the privileges and immunities
9+
// granted to it by virtue of its status as an Intergovernmental Organization
10+
// or submit itself to any jurisdiction.
11+
12+
/// \file TPCFastTransformPOD.cxx
13+
/// \brief Implementation of POD correction map
14+
///
15+
/// \author ruben.shahoayn@cern.ch
16+
17+
/// \brief Implementation of POD correction map
18+
///
19+
/// \author ruben.shahoayn@cern.ch
20+
21+
#if !defined(GPUCA_NO_ROOT) && !defined(GPUCA_NO_FMT) && !defined(GPUCA_STANDALONE)
22+
#include <TRandom.h>
23+
#endif
24+
#include "TPCFastTransformPOD.h"
25+
#include "GPUDebugStreamer.h"
26+
27+
namespace o2
28+
{
29+
namespace gpu
30+
{
31+
32+
#if !defined(GPUCA_NO_ROOT) && !defined(GPUCA_NO_FMT) && !defined(GPUCA_STANDALONE)
33+
34+
size_t TPCFastTransformPOD::estimateSize(const TPCFastSpaceChargeCorrection& origCorr)
35+
{
36+
// estimate size of own buffer
37+
const size_t selfSizeFix = sizeof(TPCFastTransformPOD);
38+
size_t nextDynOffs = alignOffset(selfSizeFix);
39+
nextDynOffs = alignOffset(nextDynOffs + origCorr.mNumberOfScenarios * sizeof(size_t)); // spline scenarios start here
40+
// space for splines
41+
for (int isc = 0; isc < origCorr.mNumberOfScenarios; isc++) {
42+
const auto& spline = origCorr.mScenarioPtr[isc];
43+
nextDynOffs = alignOffset(nextDynOffs + sizeof(spline));
44+
}
45+
// space for splines data
46+
for (int is = 0; is < 3; is++) {
47+
for (int sector = 0; sector < origCorr.mGeo.getNumberOfSectors(); sector++) {
48+
for (int row = 0; row < NROWS; row++) {
49+
const auto& spline = origCorr.getSpline(sector, row);
50+
int nPar = spline.getNumberOfParameters();
51+
if (is == 1) {
52+
nPar = nPar / 3;
53+
}
54+
if (is == 2) {
55+
nPar = nPar * 2 / 3;
56+
}
57+
nextDynOffs += nPar * sizeof(float);
58+
}
59+
}
60+
}
61+
nextDynOffs = alignOffset(nextDynOffs);
62+
return nextDynOffs;
63+
}
64+
65+
TPCFastTransformPOD* TPCFastTransformPOD::create(char* buff, size_t buffSize, const TPCFastSpaceChargeCorrection& origCorr)
66+
{
67+
// instantiate object to already created buffer of the right size
68+
assert(buffSize > sizeof(TPCFastTransformPOD));
69+
auto& podMap = getNonConst(buff);
70+
podMap.mApplyCorrection = true; // by default always apply corrections
71+
72+
// copy fixed size data --- start
73+
podMap.mNumberOfScenarios = origCorr.mNumberOfScenarios;
74+
std::memcpy(&podMap.mGeo, &origCorr.mGeo, sizeof(TPCFastTransformGeo)); // copy geometry (fixed size)
75+
for (int sector = 0; sector < TPCFastTransformGeo::getNumberOfSectors(); sector++) {
76+
for (int row = 0; row < NROWS; row++) {
77+
podMap.mSectorRowInfos[NROWS * sector + row] = origCorr.getSectorRowInfo(sector, row);
78+
}
79+
}
80+
podMap.mTimeStamp = origCorr.mTimeStamp;
81+
//
82+
// init data members coming from the TPCFastTrasform
83+
podMap.mVdrift = 0.;
84+
podMap.mT0 = 0.;
85+
// copy fixed size data --- end
86+
87+
size_t nextDynOffs = alignOffset(sizeof(TPCFastTransformPOD));
88+
89+
// copy sector scenarios
90+
podMap.mOffsScenariosOffsets = nextDynOffs; // spline scenarios offsets start here
91+
LOGP(debug, "Set mOffsScenariosOffsets = {}", podMap.mOffsScenariosOffsets);
92+
nextDynOffs = alignOffset(nextDynOffs + podMap.mNumberOfScenarios * sizeof(size_t)); // spline scenarios start here
93+
94+
// copy spline objects
95+
size_t* scenOffs = reinterpret_cast<size_t*>(buff + podMap.mOffsScenariosOffsets);
96+
for (int isc = 0; isc < origCorr.mNumberOfScenarios; isc++) {
97+
scenOffs[isc] = nextDynOffs;
98+
const auto& spline = origCorr.mScenarioPtr[isc];
99+
if (buffSize < nextDynOffs + sizeof(spline)) {
100+
throw std::runtime_error(fmt::format("attempt to copy {} bytes for spline for scenario {} to {}, overflowing the buffer of size {}", sizeof(spline), isc, nextDynOffs + sizeof(spline), buffSize));
101+
}
102+
std::memcpy(buff + scenOffs[isc], &spline, sizeof(spline));
103+
nextDynOffs = alignOffset(nextDynOffs + sizeof(spline));
104+
LOGP(debug, "Copy {} bytes for spline scenario {} (ptr:{}) to offsset {}", sizeof(spline), isc, (void*)&spline, scenOffs[isc]);
105+
}
106+
107+
// copy splines data
108+
for (int is = 0; is < 3; is++) {
109+
float* data = reinterpret_cast<float*>(buff + nextDynOffs);
110+
LOGP(debug, "splinID={} start offset {} -> {}", is, nextDynOffs, (void*)data);
111+
for (int sector = 0; sector < origCorr.mGeo.getNumberOfSectors(); sector++) {
112+
podMap.mSplineDataOffsets[sector][is] = nextDynOffs;
113+
size_t rowDataOffs = 0;
114+
for (int row = 0; row < NROWS; row++) {
115+
const auto& spline = origCorr.getSpline(sector, row);
116+
const float* dataOr = origCorr.getCorrectionData(sector, row, is);
117+
int nPar = spline.getNumberOfParameters();
118+
if (is == 1) {
119+
nPar = nPar / 3;
120+
}
121+
if (is == 2) {
122+
nPar = nPar * 2 / 3;
123+
}
124+
LOGP(debug, "Copying {} floats for spline{} of sector:{} row:{} to offset {}", nPar, is, sector, row, nextDynOffs);
125+
size_t nbcopy = nPar * sizeof(float);
126+
if (buffSize < nextDynOffs + nbcopy) {
127+
throw std::runtime_error(fmt::format("attempt to copy {} bytes of data for spline{} of sector{}/row{} to {}, overflowing the buffer of size {}", nbcopy, is, sector, row, nextDynOffs, buffSize));
128+
}
129+
std::memcpy(data, dataOr, nbcopy);
130+
podMap.getSectorRowInfo(sector, row).dataOffsetBytes[is] = rowDataOffs;
131+
rowDataOffs += nbcopy;
132+
data += nPar;
133+
nextDynOffs += nbcopy;
134+
}
135+
}
136+
}
137+
podMap.mTotalSize = alignOffset(nextDynOffs);
138+
if (buffSize != podMap.mTotalSize) {
139+
throw std::runtime_error(fmt::format("Estimated buffer size {} differs from filled one {}", buffSize, podMap.mTotalSize));
140+
}
141+
return &getNonConst(buff);
142+
}
143+
144+
TPCFastTransformPOD* TPCFastTransformPOD::create(char* buff, size_t buffSize, const TPCFastTransform& src)
145+
{
146+
// instantiate objec to already created buffer of the right size
147+
auto podMap = create(buff, buffSize, src.getCorrection());
148+
// set data members of TPCFastTransform
149+
podMap->mVdrift = src.getVDrift();
150+
podMap->mT0 = src.getT0();
151+
// copy fixed size data --- end
152+
return podMap;
153+
}
154+
155+
bool TPCFastTransformPOD::test(const TPCFastSpaceChargeCorrection& origCorr, int npoints) const
156+
{
157+
if (npoints < 1) {
158+
return false;
159+
}
160+
std::vector<unsigned char> sector, row;
161+
std::vector<float> y, z;
162+
std::vector<std::array<float, 3>> corr0, corr1;
163+
std::vector<std::array<float, 2>> corrInv0, corrInv1;
164+
std::vector<float> corrInvX0, corrInvX1;
165+
166+
sector.reserve(npoints);
167+
row.reserve(npoints);
168+
y.reserve(npoints);
169+
z.reserve(npoints);
170+
corr0.reserve(npoints);
171+
corr1.reserve(npoints);
172+
corrInv0.reserve(npoints);
173+
corrInv1.reserve(npoints);
174+
corrInvX0.reserve(npoints);
175+
corrInvX1.reserve(npoints);
176+
177+
for (int i = 0; i < npoints; i++) {
178+
sector.push_back(gRandom->Integer(NSECTORS));
179+
row.push_back(gRandom->Integer(NROWS));
180+
y.push_back(2 * (gRandom->Rndm() - 0.5) * mGeo.getRowInfo(row.back()).getYmax());
181+
z.push_back((sector.back() < NSECTORS / 2 ? 1.f : -1.f) * gRandom->Rndm() * 240);
182+
}
183+
long origStart[3], origEnd[3], thisStart[3], thisEnd[3];
184+
origStart[0] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
185+
for (int i = 0; i < npoints; i++) {
186+
std::array<float, 3> val;
187+
origCorr.getCorrectionLocal(sector[i], row[i], y[i], z[i], val[0], val[1], val[2]);
188+
corr0.push_back(val);
189+
}
190+
191+
origEnd[0] = origStart[1] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
192+
for (int i = 0; i < npoints; i++) {
193+
std::array<float, 2> val;
194+
origCorr.getCorrectionYZatRealYZ(sector[i], row[i], y[i], z[i], val[0], val[1]);
195+
corrInv0.push_back(val);
196+
}
197+
198+
origEnd[1] = origStart[2] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
199+
for (int i = 0; i < npoints; i++) {
200+
corrInvX0.push_back(origCorr.getCorrectionXatRealYZ(sector[i], row[i], y[i], z[i]));
201+
}
202+
//
203+
origEnd[2] = thisStart[0] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
204+
for (int i = 0; i < npoints; i++) {
205+
std::array<float, 3> val;
206+
this->getCorrectionLocal(sector[i], row[i], y[i], z[i], val[0], val[1], val[2]);
207+
corr1.push_back(val);
208+
}
209+
thisEnd[0] = thisStart[1] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
210+
for (int i = 0; i < npoints; i++) {
211+
std::array<float, 2> val;
212+
this->getCorrectionYZatRealYZ(sector[i], row[i], y[i], z[i], val[0], val[1]);
213+
corrInv1.push_back(val);
214+
}
215+
216+
thisEnd[1] = thisStart[2] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
217+
for (int i = 0; i < npoints; i++) {
218+
corrInvX1.push_back(this->getCorrectionXatRealYZ(sector[i], row[i], y[i], z[i]));
219+
}
220+
thisEnd[2] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
221+
//
222+
size_t ndiff[3] = {};
223+
for (int i = 0; i < npoints; i++) {
224+
if (corr0[i][0] != corr1[i][0] || corr0[i][1] != corr1[i][1] || corr0[i][2] != corr1[i][2]) {
225+
ndiff[0]++;
226+
}
227+
if (corrInv0[i][0] != corrInv1[i][0] || corrInv0[i][1] != corrInv1[i][1]) {
228+
ndiff[1]++;
229+
}
230+
if (corrInvX0[i] != corrInvX1[i]) {
231+
ndiff[2]++;
232+
}
233+
}
234+
//
235+
LOGP(info, " (ns per call) original this Nmissmatch");
236+
LOGP(info, "getCorrection {:.3e} {:.3e} {}", double(origEnd[0] - origStart[0]) / npoints * 1000., double(thisEnd[0] - thisStart[0]) / npoints * 1000., ndiff[0]);
237+
LOGP(info, "getCorrectionInvCorrectedX {:.3e} {:.3e} {}", double(origEnd[1] - origStart[1]) / npoints * 1000., double(thisEnd[1] - thisStart[1]) / npoints * 1000., ndiff[1]);
238+
LOGP(info, "getCorrectionInvUV {:.3e} {:.3e} {}", double(origEnd[2] - origStart[2]) / npoints * 1000., double(thisEnd[2] - thisStart[2]) / npoints * 1000., ndiff[2]);
239+
return ndiff[0] == 0 && ndiff[1] == 0 && ndiff[2] == 0;
240+
}
241+
242+
#endif
243+
244+
} // namespace gpu
245+
} // namespace o2

0 commit comments

Comments
 (0)