forked from AliceO2Group/AliceO2
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathGPUDefGPUParameters.h
More file actions
628 lines (603 loc) · 27.3 KB
/
GPUDefGPUParameters.h
File metadata and controls
628 lines (603 loc) · 27.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.
/// \file GPUDefGPUParameters.h
/// \author David Rohr
// This files contains compile-time constants affecting the GPU performance.
// Many of these constants are GPU-architecture specific.
// This file also contains all constants describing memory limitations, essentially limiting the total number of tracks, etc.
// Compile-time constants affecting the tracking algorithms / results are located in GPUDefConstantsAndSettings.h
#ifndef GPUDEFGPUPARAMETERS_H
#define GPUDEFGPUPARAMETERS_H
// clang-format off
#ifndef GPUDEF_H
#error Please include GPUDef.h
#endif
#include "GPUDefMacros.h"
// GPU Run Configuration
#ifdef GPUCA_GPUCODE
#if defined(GPUCA_GPUTYPE_MI2xx)
#define GPUCA_WARP_SIZE 64
#define GPUCA_THREAD_COUNT 256
#define GPUCA_LB_GPUTPCCreateTrackingData 256
#define GPUCA_LB_GPUTPCStartHitsSorter 512, 1
#define GPUCA_LB_GPUTPCStartHitsFinder 1024
#define GPUCA_LB_GPUTPCTrackletConstructor 512, 2
#define GPUCA_LB_GPUTPCTrackletSelector 192, 3
#define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1
#define GPUCA_LB_GPUTPCNeighboursCleaner 896
#define GPUCA_LB_GPUTPCExtrapolationTracking 256
#define GPUCA_LB_GPUTPCCFDecodeZS 64, 4
#define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE
#define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE
#define GPUCA_LB_GPUTPCCFGather 1024, 1
#define GPUCA_LB_GPUTPCGMMergerTrackFit 128, 1
#define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12
#define GPUCA_LB_GPUTPCGMMergerSectorRefit 256
#define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256
#define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step0 512
#define GPUCA_LB_GPUTPCGMMergerResolve_step1 512
#define GPUCA_LB_GPUTPCGMMergerResolve_step2 512
#define GPUCA_LB_GPUTPCGMMergerResolve_step3 512
#define GPUCA_LB_GPUTPCGMMergerResolve_step4 512
#define GPUCA_LB_GPUTPCGMMergerClearLinks 256
#define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256
#define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256
#define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 512
#define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 512
#define GPUCA_LB_GPUTPCGMMergerMergeCE 512
#define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256
#define GPUCA_LB_GPUTPCGMMergerCollect 512
#define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256
#define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256
#define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256
#define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256
#define GPUCA_LB_GPUTPCGMMergerFinalize_0 256
#define GPUCA_LB_GPUTPCGMMergerFinalize_1 256
#define GPUCA_LB_GPUTPCGMMergerFinalize_2 256
#define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2
#define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2
#define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2
#define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2
#define GPUCA_LB_GPUTPCCFCheckPadBaseline 64
#define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512
#define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512
#define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512
#define GPUCA_LB_GPUTPCCFPeakFinder 512
#define GPUCA_LB_GPUTPCCFNoiseSuppression 512
#define GPUCA_LB_GPUTPCCFDeconvolution 512
#define GPUCA_LB_GPUTPCCFClusterizer 448
#define GPUCA_LB_COMPRESSION_GATHER 1024
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
#define GPUCA_ALTERNATE_BORDER_SORT 1
#define GPUCA_SORT_BEFORE_FIT 1
#define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1
#define GPUCA_NO_ATOMIC_PRECHECK 1
#define GPUCA_DEDX_STORAGE_TYPE uint16_t
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half
#define GPUCA_COMP_GATHER_KERNEL 4
#define GPUCA_COMP_GATHER_MODE 3
#elif defined(GPUCA_GPUTYPE_VEGA)
#define GPUCA_WARP_SIZE 64
#define GPUCA_THREAD_COUNT 256
#define GPUCA_LB_GPUTPCCreateTrackingData 128
#define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2
#define GPUCA_LB_GPUTPCStartHitsFinder 1024
#define GPUCA_LB_GPUTPCTrackletConstructor 256, 2
#define GPUCA_LB_GPUTPCTrackletSelector 256, 8
#define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1
#define GPUCA_LB_GPUTPCNeighboursCleaner 896
#define GPUCA_LB_GPUTPCExtrapolationTracking 256
#define GPUCA_LB_GPUTPCCFDecodeZS 64, 4
#define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE
#define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE
#define GPUCA_LB_GPUTPCCFGather 1024, 1
#define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 1
#define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 4, 200
#define GPUCA_LB_GPUTPCGMMergerSectorRefit 256
#define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256
#define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step0 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step1 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step2 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step3 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step4 256
#define GPUCA_LB_GPUTPCGMMergerClearLinks 256
#define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256
#define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256
#define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256
#define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256
#define GPUCA_LB_GPUTPCGMMergerMergeCE 256
#define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256
#define GPUCA_LB_GPUTPCGMMergerCollect 512
#define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256
#define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256
#define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256
#define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256
#define GPUCA_LB_GPUTPCGMMergerFinalize_0 256
#define GPUCA_LB_GPUTPCGMMergerFinalize_1 256
#define GPUCA_LB_GPUTPCGMMergerFinalize_2 256
#define GPUCA_LB_GPUTPCCompressionKernels_step0attached 192, 2
#define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2
#define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2
#define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2
#define GPUCA_LB_GPUTPCCFCheckPadBaseline 64
#define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512
#define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512
#define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512
#define GPUCA_LB_GPUTPCCFPeakFinder 512
#define GPUCA_LB_GPUTPCCFNoiseSuppression 512
#define GPUCA_LB_GPUTPCCFDeconvolution 512
#define GPUCA_LB_GPUTPCCFClusterizer 512
#define GPUCA_LB_COMPRESSION_GATHER 1024
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
#define GPUCA_ALTERNATE_BORDER_SORT 1
#define GPUCA_SORT_BEFORE_FIT 1
#define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1
#define GPUCA_NO_ATOMIC_PRECHECK 1
#define GPUCA_DEDX_STORAGE_TYPE uint16_t
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half
#define GPUCA_COMP_GATHER_KERNEL 4
#define GPUCA_COMP_GATHER_MODE 3
#elif defined(GPUCA_GPUTYPE_AMPERE)
#define GPUCA_WARP_SIZE 32
#define GPUCA_THREAD_COUNT 512
#define GPUCA_LB_GPUTPCCreateTrackingData 384
#define GPUCA_LB_GPUTPCStartHitsSorter 512, 1
#define GPUCA_LB_GPUTPCStartHitsFinder 512
#define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 // best single-kernel: 128, 4
#define GPUCA_LB_GPUTPCTrackletSelector 192, 3 // best single-kernel: 128, 4
#define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 // best single-kernel: 768, 1
#define GPUCA_LB_GPUTPCNeighboursCleaner 512
#define GPUCA_LB_GPUTPCExtrapolationTracking 128, 4
#define GPUCA_LB_GPUTPCCFDecodeZS 64, 10
#define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE
#define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE
#define GPUCA_LB_GPUTPCCFGather 1024, 1
#define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 4
#define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12
#define GPUCA_LB_GPUTPCGMMergerSectorRefit 32, 6
#define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256
#define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step0 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step1 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step2 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step3 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4
#define GPUCA_LB_GPUTPCGMMergerClearLinks 256
#define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256
#define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2
#define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192
#define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 64, 2
#define GPUCA_LB_GPUTPCGMMergerMergeCE 256
#define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256
#define GPUCA_LB_GPUTPCGMMergerCollect 256, 2
#define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256
#define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256
#define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256
#define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256
#define GPUCA_LB_GPUTPCGMMergerFinalize_0 256
#define GPUCA_LB_GPUTPCGMMergerFinalize_1 256
#define GPUCA_LB_GPUTPCGMMergerFinalize_2 256
#define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2
#define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 3
#define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1
#define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1
#define GPUCA_LB_GPUTPCCFCheckPadBaseline 64,8
#define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 448
#define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 448
#define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 448
#define GPUCA_LB_GPUTPCCFPeakFinder 128
#define GPUCA_LB_GPUTPCCFNoiseSuppression 448
#define GPUCA_LB_GPUTPCCFDeconvolution 384
#define GPUCA_LB_GPUTPCCFClusterizer 448
#define GPUCA_LB_COMPRESSION_GATHER 1024
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
#define GPUCA_ALTERNATE_BORDER_SORT 1
#define GPUCA_SORT_BEFORE_FIT 1
#define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1
#define GPUCA_NO_ATOMIC_PRECHECK 1
#define GPUCA_DEDX_STORAGE_TYPE uint16_t
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half
#define GPUCA_COMP_GATHER_KERNEL 4
#define GPUCA_COMP_GATHER_MODE 3
#elif defined(GPUCA_GPUTYPE_TURING)
#define GPUCA_WARP_SIZE 32
#define GPUCA_THREAD_COUNT 512
#define GPUCA_LB_GPUTPCCreateTrackingData 256
#define GPUCA_LB_GPUTPCStartHitsSorter 512, 1
#define GPUCA_LB_GPUTPCStartHitsFinder 512
#define GPUCA_LB_GPUTPCTrackletConstructor 256, 2
#define GPUCA_LB_GPUTPCTrackletSelector 192, 3
#define GPUCA_LB_GPUTPCNeighboursFinder 640, 1
#define GPUCA_LB_GPUTPCNeighboursCleaner 512
#define GPUCA_LB_GPUTPCExtrapolationTracking 192, 2
#define GPUCA_LB_GPUTPCCFDecodeZS 64, 8
#define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE
#define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE
#define GPUCA_LB_GPUTPCCFGather 1024, 1
#define GPUCA_LB_GPUTPCGMMergerTrackFit 32, 8
#define GPUCA_LB_GPUTPCGMMergerFollowLoopers 128, 4
#define GPUCA_LB_GPUTPCGMMergerSectorRefit 64, 5
#define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256
#define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step0 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step1 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step2 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step3 256
#define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4
#define GPUCA_LB_GPUTPCGMMergerClearLinks 256
#define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256
#define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2
#define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192
#define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256
#define GPUCA_LB_GPUTPCGMMergerMergeCE 256
#define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256
#define GPUCA_LB_GPUTPCGMMergerCollect 128, 2
#define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256
#define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256
#define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256
#define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256
#define GPUCA_LB_GPUTPCGMMergerFinalize_0 256
#define GPUCA_LB_GPUTPCGMMergerFinalize_1 256
#define GPUCA_LB_GPUTPCGMMergerFinalize_2 256
#define GPUCA_LB_GPUTPCCompressionKernels_step0attached 128
#define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2
#define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1
#define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1
#define GPUCA_LB_COMPRESSION_GATHER 1024
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
#define GPUCA_ALTERNATE_BORDER_SORT 1
#define GPUCA_SORT_BEFORE_FIT 1
#define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1
#define GPUCA_NO_ATOMIC_PRECHECK 1
#define GPUCA_COMP_GATHER_KERNEL 4
#define GPUCA_COMP_GATHER_MODE 3
#define GPUCA_DEDX_STORAGE_TYPE uint16_t
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half
// #define GPUCA_USE_TEXTURES
#elif defined(GPUCA_GPUTYPE_OPENCL)
#else
#error GPU TYPE NOT SET
#endif
#endif // GPUCA_GPUCODE
#ifdef GPUCA_GPUCODE
// Default settings, if not already set for selected GPU type
#ifndef GPUCA_THREAD_COUNT
#define GPUCA_THREAD_COUNT 256
#endif
#ifndef GPUCA_LB_GPUTPCCreateTrackingData
#define GPUCA_LB_GPUTPCCreateTrackingData 256
#endif
#ifndef GPUCA_LB_GPUTPCTrackletConstructor
#define GPUCA_LB_GPUTPCTrackletConstructor 256
#endif
#ifndef GPUCA_LB_GPUTPCTrackletSelector
#define GPUCA_LB_GPUTPCTrackletSelector 256
#endif
#ifndef GPUCA_LB_GPUTPCNeighboursFinder
#define GPUCA_LB_GPUTPCNeighboursFinder 256
#endif
#ifndef GPUCA_LB_GPUTPCNeighboursCleaner
#define GPUCA_LB_GPUTPCNeighboursCleaner 256
#endif
#ifndef GPUCA_LB_GPUTPCExtrapolationTracking
#define GPUCA_LB_GPUTPCExtrapolationTracking 256
#endif
#ifndef GPUCA_LB_GPUTRDTrackerKernels_gpuVersion
#define GPUCA_LB_GPUTRDTrackerKernels_gpuVersion 512
#endif
#ifndef GPUCA_LB_GPUTPCCreateOccupancyMap_fill
#define GPUCA_LB_GPUTPCCreateOccupancyMap_fill 256
#endif
#ifndef GPUCA_LB_GPUTPCCreateOccupancyMap_fold
#define GPUCA_LB_GPUTPCCreateOccupancyMap_fold 256
#endif
#ifndef GPUCA_LB_GPUTRDTrackerKernels_o2Version
#define GPUCA_LB_GPUTRDTrackerKernels_o2Version 512
#endif
#ifndef GPUCA_LB_GPUTPCConvertKernel
#define GPUCA_LB_GPUTPCConvertKernel 256
#endif
#ifndef GPUCA_LB_GPUTPCCompressionKernels_step0attached
#define GPUCA_LB_GPUTPCCompressionKernels_step0attached 256
#endif
#ifndef GPUCA_LB_GPUTPCCompressionKernels_step1unattached
#define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 256
#endif
#ifndef GPUCA_LB_GPUTPCDecompressionKernels_step0attached
#define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 256
#endif
#ifndef GPUCA_LB_GPUTPCDecompressionKernels_step1unattached
#define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 256
#endif
#ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow
#define GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow 256
#endif
#ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters
#define GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters 256
#endif
#ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters
#define GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters 256
#endif
#ifndef GPUCA_LB_GPUTPCCFDecodeZS
#define GPUCA_LB_GPUTPCCFDecodeZS 128, 4
#endif
#ifndef GPUCA_LB_GPUTPCCFDecodeZSLink
#define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE
#endif
#ifndef GPUCA_LB_GPUTPCCFDecodeZSDenseLink
#define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE
#endif
#ifndef GPUCA_LB_GPUTPCCFGather
#define GPUCA_LB_GPUTPCCFGather 1024, 1
#endif
#ifndef GPUCA_LB_COMPRESSION_GATHER
#define GPUCA_LB_COMPRESSION_GATHER 1024
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerTrackFit
#define GPUCA_LB_GPUTPCGMMergerTrackFit 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerFollowLoopers
#define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerSectorRefit
#define GPUCA_LB_GPUTPCGMMergerSectorRefit 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerUnpackResetIds
#define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerUnpackGlobal
#define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerResolve_step0
#define GPUCA_LB_GPUTPCGMMergerResolve_step0 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerResolve_step1
#define GPUCA_LB_GPUTPCGMMergerResolve_step1 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerResolve_step2
#define GPUCA_LB_GPUTPCGMMergerResolve_step2 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerResolve_step3
#define GPUCA_LB_GPUTPCGMMergerResolve_step3 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerResolve_step4
#define GPUCA_LB_GPUTPCGMMergerResolve_step4 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerClearLinks
#define GPUCA_LB_GPUTPCGMMergerClearLinks 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare
#define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare
#define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step0
#define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step2
#define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerMergeCE
#define GPUCA_LB_GPUTPCGMMergerMergeCE 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks
#define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerCollect
#define GPUCA_LB_GPUTPCGMMergerCollect 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerSortTracksPrepare
#define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0
#define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1
#define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2
#define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step0
#define GPUCA_LB_GPUTPCGMMergerFinalize_step0 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step1
#define GPUCA_LB_GPUTPCGMMergerFinalize_step1 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step2
#define GPUCA_LB_GPUTPCGMMergerFinalize_step2 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step0
#define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step0 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step1
#define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step1 256
#endif
#ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step2
#define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step2 256
#endif
#ifndef GPUCA_LB_GPUTPCGMO2Output_prepare
#define GPUCA_LB_GPUTPCGMO2Output_prepare 256
#endif
#ifndef GPUCA_LB_GPUTPCGMO2Output_output
#define GPUCA_LB_GPUTPCGMO2Output_output 256
#endif
#ifndef GPUCA_LB_GPUITSFitterKernels
#define GPUCA_LB_GPUITSFitterKernels 256
#endif
#ifndef GPUCA_LB_GPUTPCStartHitsFinder
#define GPUCA_LB_GPUTPCStartHitsFinder 256
#endif
#ifndef GPUCA_LB_GPUTPCStartHitsSorter
#define GPUCA_LB_GPUTPCStartHitsSorter 256
#endif
#ifndef GPUCA_LB_GPUTPCCFCheckPadBaseline
#define GPUCA_LB_GPUTPCCFCheckPadBaseline 64
#endif
#ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap
#define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512
#endif
#ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits
#define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512
#endif
#ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart
#define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512
#endif
#ifndef GPUCA_LB_GPUTPCCFPeakFinder
#define GPUCA_LB_GPUTPCCFPeakFinder 512
#endif
#ifndef GPUCA_LB_GPUTPCCFNoiseSuppression
#define GPUCA_LB_GPUTPCCFNoiseSuppression 512
#endif
#ifndef GPUCA_LB_GPUTPCCFDeconvolution
#define GPUCA_LB_GPUTPCCFDeconvolution 512
#endif
#ifndef GPUCA_LB_GPUTPCCFClusterizer
#define GPUCA_LB_GPUTPCCFClusterizer 512
#endif
#ifndef GPUCA_LB_GPUTPCNNClusterizerKernels
#define GPUCA_LB_GPUTPCNNClusterizerKernels 512
#endif
#ifndef GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU
#define GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU 256
#endif
#ifndef GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov
#define GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov 256
#endif
#define GPUCA_GET_THREAD_COUNT(...) GPUCA_M_FIRST(__VA_ARGS__)
#else
// The following defaults are needed to compile the host code
#define GPUCA_GET_THREAD_COUNT(...) 1
#endif
#define GPUCA_GET_WARP_COUNT(...) (GPUCA_GET_THREAD_COUNT(__VA_ARGS__) / GPUCA_WARP_SIZE)
#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix!
#define GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression GPUCA_LB_GPUTPCCFNoiseSuppression
#define GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks GPUCA_LB_GPUTPCCFNoiseSuppression
#ifdef GPUCA_HAS_ONNX
#define GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer GPUCA_LB_GPUTPCNNClusterizerKernels
#define GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN GPUCA_LB_GPUTPCNNClusterizerKernels
#define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels GPUCA_LB_GPUTPCNNClusterizerKernels
#define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels GPUCA_LB_GPUTPCNNClusterizerKernels
#define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels
#define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels
#endif
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_THREAD_COUNT_SCAN
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_THREAD_COUNT_SCAN
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_THREAD_COUNT_SCAN
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_THREAD_COUNT_SCAN
#define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_THREAD_COUNT_SCAN
#define GPUCA_LB_GPUTPCTrackletConstructor_singleSector GPUCA_LB_GPUTPCTrackletConstructor
#define GPUCA_LB_GPUTPCTrackletConstructor_allSectors GPUCA_LB_GPUTPCTrackletConstructor
#define GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered GPUCA_LB_COMPRESSION_GATHER
#define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32 GPUCA_LB_COMPRESSION_GATHER
#define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER
#define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128 GPUCA_LB_COMPRESSION_GATHER
#define GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock GPUCA_LB_COMPRESSION_GATHER
#if defined(__CUDACC__) || defined(__HIPCC__)
#define GPUCA_SPECIALIZE_THRUST_SORTS
#endif
#ifndef GPUCA_NEIGHBORSFINDER_REGS
#define GPUCA_NEIGHBORSFINDER_REGS NONE, 0
#endif
#ifdef GPUCA_GPUCODE
#ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6
#endif
#ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12
#endif
#ifndef GPUCA_ALTERNATE_BORDER_SORT
#define GPUCA_ALTERNATE_BORDER_SORT 0
#endif
#ifndef GPUCA_SORT_BEFORE_FIT
#define GPUCA_SORT_BEFORE_FIT 0
#endif
#ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION
#define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0
#endif
#ifndef GPUCA_COMP_GATHER_KERNEL
#define GPUCA_COMP_GATHER_KERNEL 0
#endif
#ifndef GPUCA_COMP_GATHER_MODE
#define GPUCA_COMP_GATHER_MODE 2
#endif
#else
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0
#define GPUCA_ALTERNATE_BORDER_SORT 0
#define GPUCA_SORT_BEFORE_FIT 0
#define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0
#define GPUCA_THREAD_COUNT_FINDER 1
#define GPUCA_COMP_GATHER_KERNEL 0
#define GPUCA_COMP_GATHER_MODE 0
#endif
#ifndef GPUCA_DEDX_STORAGE_TYPE
#define GPUCA_DEDX_STORAGE_TYPE float
#endif
#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float
#endif
#ifdef GPUCA_DETERMINISTIC_MODE
#undef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float
#undef GPUCA_DEDX_STORAGE_TYPE
#define GPUCA_DEDX_STORAGE_TYPE float
#endif
#ifndef GPUCA_WARP_SIZE
#ifdef GPUCA_GPUCODE
#define GPUCA_WARP_SIZE 32
#else
#define GPUCA_WARP_SIZE 1
#endif
#endif
#define GPUCA_MAX_THREADS 1024
#define GPUCA_MAX_STREAMS 36
#define GPUCA_SORT_STARTHITS_GPU // Sort the start hits when running on GPU
#define GPUCA_ROWALIGNMENT 16 // Align of Row Hits and Grid
#define GPUCA_BUFFER_ALIGNMENT 64 // Alignment of buffers obtained from SetPointers
#define GPUCA_MEMALIGN (64 * 1024) // Alignment of allocated memory blocks
// #define GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE // Output Profiling Data for Tracklet Constructor Tracklet Scheduling
// Default maximum numbers
#define GPUCA_MAX_CLUSTERS ((size_t) 1024 * 1024 * 1024) // Maximum number of TPC clusters
#define GPUCA_MAX_TRD_TRACKLETS ((size_t) 128 * 1024) // Maximum number of TRD tracklets
#define GPUCA_MAX_ITS_FIT_TRACKS ((size_t) 96 * 1024) // Max number of tracks for ITS track fit
#define GPUCA_TRACKER_CONSTANT_MEM ((size_t) 63 * 1024) // Amount of Constant Memory to reserve
#define GPUCA_MEMORY_SIZE ((size_t) 6 * 1024 * 1024 * 1024) // Size of memory allocated on Device
#define GPUCA_HOST_MEMORY_SIZE ((size_t) 1 * 1024 * 1024 * 1024) // Size of memory allocated on Host
#define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread
#define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread
#define GPUCA_MAX_SECTOR_NTRACK (2 << 24) // Maximum number of tracks per sector (limited by track id format)
// #define GPUCA_KERNEL_DEBUGGER_OUTPUT
// Some assertions to make sure out parameters are not invalid
static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP");
static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE");
#ifdef GPUCA_GPUCODE
static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE");
#endif
// Derived parameters
#ifdef GPUCA_USE_TEXTURES
#define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache
#define GPUCA_TEXTURE_FETCH_NEIGHBORS // Fetch also in Neighbours Finder
#endif
#if defined(GPUCA_SORT_STARTHITS_GPU) && defined(GPUCA_GPUCODE)
#define GPUCA_SORT_STARTHITS
#endif
#define GPUCA_NEW_ALIGNMENT (std::align_val_t{GPUCA_BUFFER_ALIGNMENT})
#define GPUCA_OPERATOR_NEW_ALIGNMENT ,GPUCA_NEW_ALIGNMENT
// clang-format on
#endif