Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
84eac06
Initial set of bug.fixes and cosmetic changes
ChSonnabend Mar 15, 2025
2191649
Please consider the following formatting changes
alibuild Mar 15, 2025
5be779c
Merge pull request #18 from alibuild/alibot-cleanup-14069
ChSonnabend Mar 15, 2025
b742c50
Adjusting eval sizes. Makes code neater and avoids some calculations
ChSonnabend Mar 15, 2025
c0bc918
Merge branch 'dev' into gpu_clusterizer_bug_fixes
ChSonnabend Mar 19, 2025
0c1cfb7
Adding separate functions. Now the host process only needs one instan…
ChSonnabend Mar 20, 2025
83c004f
First version of CCDB implementation
ChSonnabend Mar 22, 2025
d767ed1
Working CCDB API calls (tested with test-ccdb)
ChSonnabend Mar 23, 2025
ad4b22b
Improve fetching, but have to pass settings by value, not const ref
ChSonnabend Mar 24, 2025
81c646b
Using const ref and moving CCDB calls to host initialization
ChSonnabend Mar 24, 2025
566ddb7
Simplifications and renaming
ChSonnabend Mar 25, 2025
a9c33b5
Please consider the following formatting changes
alibuild Mar 25, 2025
0ed7d25
Merge pull request #19 from alibuild/alibot-cleanup-14069
ChSonnabend Mar 25, 2025
9037ea6
First version of GPU stream implementation. Still needs testing.
ChSonnabend Mar 27, 2025
64c19d5
Fixes
ChSonnabend Mar 27, 2025
8a5bb69
Please consider the following formatting changes
alibuild Mar 27, 2025
e657928
Merge pull request #20 from alibuild/alibot-cleanup-14117
ChSonnabend Mar 27, 2025
46fb1e1
Adding the lane variable. This PR will in any case conflict with #14069
ChSonnabend Mar 27, 2025
70320c3
Compiles on EPNs. Need to add shadow processors next. But for this, I…
ChSonnabend Mar 29, 2025
3174e39
Merge branch 'gpu_clusterizer_bug_fixes' into onnx_gpu_implementation
ChSonnabend Mar 29, 2025
9d9267f
Adding shadow instance. Not sure if this correctly allocates GPU memo…
ChSonnabend Mar 29, 2025
007a4a1
This runs, but will eventually fill up the VRAM. Need to include a me…
ChSonnabend Apr 1, 2025
4ef35fc
Found the stream allocation issue. Now starting optimizations
ChSonnabend Apr 1, 2025
4faaa4a
Improve readability and adapt for some comments
ChSonnabend Apr 1, 2025
2801c2e
Fixing memory assignment issue. Reconstruction runs through with FP32…
ChSonnabend Apr 2, 2025
1dcb1da
Major reworkings to add FP16 support
ChSonnabend Apr 2, 2025
7da3793
Merge branch 'dev' into onnx_gpu_implementation
ChSonnabend Apr 2, 2025
381955a
Bug-fixes
ChSonnabend Apr 3, 2025
19b5bd5
Improved data filling speeds by factor 3
ChSonnabend Apr 3, 2025
83d0257
Limiting threads for ONNX evaluation
ChSonnabend Apr 3, 2025
fff6dc3
Bug-fix for correct thread assignment and input data filling
ChSonnabend Apr 3, 2025
b437e38
Minor changes
ChSonnabend Apr 4, 2025
710993a
Adding I** inference, potentally needed for CNN + FC inference
ChSonnabend Apr 5, 2025
77c1691
CCDB fetching of NNs ported to GPUWorkflowSpec
ChSonnabend Apr 7, 2025
a985798
Adjusting CPU threads and ORT copmile definitions
ChSonnabend Apr 10, 2025
fb08f18
About 10x speed-up due to explicit io binding
ChSonnabend Apr 10, 2025
b1c88f0
Changes for synchronization and consistency. No performance loss.
ChSonnabend Apr 11, 2025
32cab70
Please consider the following formatting changes
alibuild Apr 11, 2025
5f741fc
Merge pull request #21 from alibuild/alibot-cleanup-14117
ChSonnabend Apr 11, 2025
70907aa
Fixing warnings (errors due to size_t)
ChSonnabend Apr 11, 2025
e46cdfa
Fixing linker issues
ChSonnabend Apr 13, 2025
37955fa
Merge branch 'dev' into onnx_gpu_implementation
ChSonnabend Apr 15, 2025
4b0825a
Adding volatile memory allocation and MockedOrtAllocator. Removing pr…
ChSonnabend Apr 16, 2025
497a9d4
Please consider the following formatting changes
alibuild Apr 16, 2025
aabddb7
Merge pull request #22 from alibuild/alibot-cleanup-14117
ChSonnabend Apr 16, 2025
cfdc15f
Merge dev + fixes
ChSonnabend Apr 16, 2025
a67b634
Circumvent "unused result" warning and build failure
ChSonnabend Apr 16, 2025
938a1ed
Adjust for comments
ChSonnabend Apr 19, 2025
7b07496
Please consider the following formatting changes
alibuild Apr 19, 2025
4d3f54d
Merge pull request #23 from alibuild/alibot-cleanup-14117
ChSonnabend Apr 19, 2025
af89c9a
Fixing build flags
ChSonnabend Apr 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Common/ML/include/ML/OrtInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ class OrtModel

// Environment settings
bool mInitialized = false;
std::string modelPath, device = "cpu", dtype = "float", thread_affinity = ""; // device options should be cpu, rocm, migraphx, cuda
int intraOpNumThreads = 1, interOpNumThreads = 1, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0;
std::string modelPath, device = "cpu", thread_affinity = ""; // device options should be cpu, rocm, migraphx, cuda
int intraOpNumThreads = 1, interOpNumThreads = 1, streamId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0;

std::string printShape(const std::vector<int64_t>&);
};
Expand Down
13 changes: 7 additions & 6 deletions Common/ML/src/OrtInterface.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap)
if (!optionsMap["model-path"].empty()) {
modelPath = optionsMap["model-path"];
device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU");
dtype = (optionsMap.contains("dtype") ? optionsMap["dtype"] : "float");
deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : 0);
streamId = (optionsMap.contains("stream-id") ? std::stoi(optionsMap["stream-id"]) : 0);
allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0);
intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0);
interOpNumThreads = (optionsMap.contains("inter-op-num-threads") ? std::stoi(optionsMap["inter-op-num-threads"]) : 0);
Expand All @@ -61,31 +60,33 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap)
#if defined(ORT_ROCM_BUILD)
#if ORT_ROCM_BUILD == 1
if (device == "ROCM") {
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(pImplOrt->sessionOptions, deviceId));
// Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(pImplOrt->sessionOptions, streamId));
o2::gpu::SetONNXGPUStream(pImplOrt->sessionOptions, streamId);
LOG(info) << "(ORT) ROCM execution provider set";
}
#endif
#endif
#if defined(ORT_MIGRAPHX_BUILD)
#if ORT_MIGRAPHX_BUILD == 1
if (device == "MIGRAPHX") {
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(pImplOrt->sessionOptions, deviceId));
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(pImplOrt->sessionOptions, streamId));
LOG(info) << "(ORT) MIGraphX execution provider set";
}
#endif
#endif
#if defined(ORT_CUDA_BUILD)
#if ORT_CUDA_BUILD == 1
if (device == "CUDA") {
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(pImplOrt->sessionOptions, deviceId));
// Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(pImplOrt->sessionOptions, streamId));
o2::gpu::SetONNXGPUStream(pImplOrt->sessionOptions, streamId);
LOG(info) << "(ORT) CUDA execution provider set";
dev_mem_str = "Cuda";
}
#endif
#endif

if (allocateDeviceMemory) {
pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceId, OrtMemType::OrtMemTypeDefault);
pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, streamId, OrtMemType::OrtMemTypeDefault);
LOG(info) << "(ORT) Memory info set to on-device memory";
}

Expand Down
3 changes: 3 additions & 0 deletions GPU/GPUTracking/Base/GPUReconstructionProcessing.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include <functional>
#include <atomic>

struct OrtSessionOptions;

namespace o2::gpu
{

Expand Down Expand Up @@ -88,6 +90,7 @@ class GPUReconstructionProcessing : public GPUReconstruction
void AddGPUEvents(T*& events);

virtual std::unique_ptr<gpu_reconstruction_kernels::threadContext> GetThreadContext() override;
virtual int32_t SetONNXGPUStream(OrtSessionOptions* session_options, int32_t stream) { return 0; }

struct RecoStepTimerMeta {
HighResTimer timerToGPU;
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/Base/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2")
${MODULE}
SOURCES ${SRCS}
PUBLIC_LINK_LIBRARIES ${TMP_BASELIB} O2::ITStrackingCUDA
PRIVATE_LINK_LIBRARIES ONNXRuntime::ONNXRuntime
PRIVATE_INCLUDE_DIRECTORIES
${CMAKE_SOURCE_DIR}/Detectors/Base/src
${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src
Expand Down
42 changes: 42 additions & 0 deletions GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "GPUReconstructionCUDAIncludesHost.h"

#include <cuda_profiler_api.h>
#include "ML/OrtInterface.h"
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Warum brauchst du hier den Interface header?


#include "GPUReconstructionCUDA.h"
#include "GPUReconstructionCUDAInternals.h"
Expand All @@ -35,6 +36,10 @@
#undef GPUCA_KRNL
#endif

#ifdef GPUCA_HAS_ONNX
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you need the GPU specifix settings for the general GPUTracking library? Can't you put them only into the backend libraries?

I need the definition of GPUCA_HAS_ONNX here. and the ORT variables further below in the same file

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but the ORT variables are in GPUReconstructionCUDA..., correct?
Stuff like ORT_CUDA_BUILD=$<BOOL:${ORT_CUDA_BUILD}> does not need to be in GPUTracking/CMakeLists.txt, but it can go to GPUTracking/Base/cuda/CMakeLists.txt?
The main GPUTracking library should be agnostic of the backends.

#include <onnxruntime_cxx_api.h>
#endif

static constexpr size_t REQUIRE_MIN_MEMORY = 1024L * 1024 * 1024;
static constexpr size_t REQUIRE_MEMORY_RESERVED = 512L * 1024 * 1024;
static constexpr size_t REQUIRE_FREE_MEMORY_RESERVED_PER_SM = 40L * 1024 * 1024;
Expand Down Expand Up @@ -656,13 +661,50 @@ void GPUReconstructionCUDA::endGPUProfiling()
{
GPUChkErr(cudaProfilerStop());
}

#ifdef GPUCA_HAS_ONNX
int32_t GPUReconstructionCUDA::SetONNXGPUStream(OrtSessionOptions* session_options, int32_t stream)
{
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
CreateCUDAProviderOptions(&cuda_options);

// std::vector<const char*> keys{"device_id", "gpu_mem_limit", "arena_extend_strategy", "cudnn_conv_algo_search", "do_copy_in_default_stream", "cudnn_conv_use_max_workspace", "cudnn_conv1d_pad_to_nc1d"};
// std::vector<const char*> values{"0", "2147483648", "kSameAsRequested", "DEFAULT", "1", "1", "1"};
// UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), keys.size());

// this implicitly sets "has_user_compute_stream"
UpdateCUDAProviderOptionsWithValue(cuda_options, "user_compute_stream", &mInternals->Streams[stream]);
Ort::ThrowOnError(SessionOptionsAppendExecutionProvider_CUDA_V2(session_options, cuda_options));

// Finally, don't forget to release the provider options
ReleaseCUDAProviderOptions(cuda_options);

return 0;
}
#endif // GPUCA_HAS_ONNX

#else // HIP
void* GPUReconstructionHIP::getGPUPointer(void* ptr)
{
void* retVal = nullptr;
GPUChkErr(hipHostGetDevicePointer(&retVal, ptr, 0));
return retVal;
}

#ifdef GPUCA_HAS_ONNX
int32_t GPUReconstructionCUDA::SetONNXGPUStream(OrtSessionOptions* session_options, int32_t stream)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hier sollte HIP stehen statt CUDA

{
// Create ROCm provider options
const auto& api = Ort::GetApi();
OrtROCMProviderOptions rocm_options{};
rocm_options.has_user_compute_stream = 1; // Indicate that we are passing a user stream
rocm_options.user_compute_stream = &mInternals->Streams[stream];

// Append the ROCm execution provider with the custom HIP stream
Ort::ThrowOnError(api.SessionOptionsAppendExecutionProvider_ROCM(session_options, &rocm_options));
return 0;
}
#endif // GPUCA_HAS_ONNX
#endif // __HIPCC__

namespace o2::gpu
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels<GPUReconstructionC
size_t GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override;
void ReleaseEvent(deviceEvent ev) override;
void RecordMarker(deviceEvent* ev, int32_t stream) override;
int32_t SetONNXGPUStream(OrtSessionOptions* session_options, int32_t stream) override;

void GetITSTraits(std::unique_ptr<o2::its::TrackerTraits>* trackerTraits, std::unique_ptr<o2::its::VertexerTraits>* vertexerTraits, std::unique_ptr<o2::its::TimeFrame>* timeFrame) override;

Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/Base/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2")
${MODULE}
SOURCES ${SRCS}
PUBLIC_LINK_LIBRARIES ${TMP_BASELIB} O2::ITStrackingHIP
PRIVATE_LINK_LIBRARIES ONNXRuntime::ONNXRuntime
PRIVATE_INCLUDE_DIRECTORIES
${CMAKE_SOURCE_DIR}/Detectors/Base/src
${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src
Expand Down
Loading