-
Notifications
You must be signed in to change notification settings - Fork 494
GPU stream implementation for ONNX runtime #14117
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
84eac06
2191649
5be779c
b742c50
c0bc918
0c1cfb7
83c004f
d767ed1
ad4b22b
81c646b
566ddb7
a9c33b5
0ed7d25
9037ea6
64c19d5
8a5bb69
e657928
46fb1e1
70320c3
3174e39
9d9267f
007a4a1
4ef35fc
4faaa4a
2801c2e
1dcb1da
7da3793
381955a
19b5bd5
83d0257
fff6dc3
b437e38
710993a
77c1691
a985798
fb08f18
b1c88f0
32cab70
5f741fc
70907aa
e46cdfa
37955fa
4b0825a
497a9d4
aabddb7
cfdc15f
a67b634
938a1ed
7b07496
4d3f54d
af89c9a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,6 +16,7 @@ | |
| #include "GPUReconstructionCUDAIncludesHost.h" | ||
|
|
||
| #include <cuda_profiler_api.h> | ||
| #include "ML/OrtInterface.h" | ||
|
|
||
| #include "GPUReconstructionCUDA.h" | ||
| #include "GPUReconstructionCUDAInternals.h" | ||
|
|
@@ -35,6 +36,10 @@ | |
| #undef GPUCA_KRNL | ||
| #endif | ||
|
|
||
| #ifdef GPUCA_HAS_ONNX | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I need the definition of GPUCA_HAS_ONNX here. and the ORT variables further below in the same file
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but the ORT variables are in GPUReconstructionCUDA..., correct? |
||
| #include <onnxruntime_cxx_api.h> | ||
| #endif | ||
|
|
||
| static constexpr size_t REQUIRE_MIN_MEMORY = 1024L * 1024 * 1024; | ||
| static constexpr size_t REQUIRE_MEMORY_RESERVED = 512L * 1024 * 1024; | ||
| static constexpr size_t REQUIRE_FREE_MEMORY_RESERVED_PER_SM = 40L * 1024 * 1024; | ||
|
|
@@ -656,13 +661,50 @@ void GPUReconstructionCUDA::endGPUProfiling() | |
| { | ||
| GPUChkErr(cudaProfilerStop()); | ||
| } | ||
|
|
||
| #ifdef GPUCA_HAS_ONNX | ||
| int32_t GPUReconstructionCUDA::SetONNXGPUStream(OrtSessionOptions* session_options, int32_t stream) | ||
| { | ||
| OrtCUDAProviderOptionsV2* cuda_options = nullptr; | ||
| CreateCUDAProviderOptions(&cuda_options); | ||
|
|
||
| // std::vector<const char*> keys{"device_id", "gpu_mem_limit", "arena_extend_strategy", "cudnn_conv_algo_search", "do_copy_in_default_stream", "cudnn_conv_use_max_workspace", "cudnn_conv1d_pad_to_nc1d"}; | ||
| // std::vector<const char*> values{"0", "2147483648", "kSameAsRequested", "DEFAULT", "1", "1", "1"}; | ||
| // UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), keys.size()); | ||
|
|
||
| // this implicitly sets "has_user_compute_stream" | ||
| UpdateCUDAProviderOptionsWithValue(cuda_options, "user_compute_stream", &mInternals->Streams[stream]); | ||
| Ort::ThrowOnError(SessionOptionsAppendExecutionProvider_CUDA_V2(session_options, cuda_options)); | ||
|
|
||
| // Finally, don't forget to release the provider options | ||
| ReleaseCUDAProviderOptions(cuda_options); | ||
|
|
||
| return 0; | ||
| } | ||
| #endif // GPUCA_HAS_ONNX | ||
|
|
||
| #else // HIP | ||
| void* GPUReconstructionHIP::getGPUPointer(void* ptr) | ||
| { | ||
| void* retVal = nullptr; | ||
| GPUChkErr(hipHostGetDevicePointer(&retVal, ptr, 0)); | ||
| return retVal; | ||
| } | ||
|
|
||
| #ifdef GPUCA_HAS_ONNX | ||
| int32_t GPUReconstructionCUDA::SetONNXGPUStream(OrtSessionOptions* session_options, int32_t stream) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hier sollte HIP stehen statt CUDA |
||
| { | ||
| // Create ROCm provider options | ||
| const auto& api = Ort::GetApi(); | ||
| OrtROCMProviderOptions rocm_options{}; | ||
| rocm_options.has_user_compute_stream = 1; // Indicate that we are passing a user stream | ||
| rocm_options.user_compute_stream = &mInternals->Streams[stream]; | ||
|
|
||
| // Append the ROCm execution provider with the custom HIP stream | ||
| Ort::ThrowOnError(api.SessionOptionsAppendExecutionProvider_ROCM(session_options, &rocm_options)); | ||
| return 0; | ||
| } | ||
| #endif // GPUCA_HAS_ONNX | ||
| #endif // __HIPCC__ | ||
|
|
||
| namespace o2::gpu | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Warum brauchst du hier den Interface header?