Skip to content

Commit 0be7ff2

Browse files
committed
add external semaphore support for CUDA
1 parent ca38327 commit 0be7ff2

17 files changed

Lines changed: 771 additions & 5 deletions

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
Version History
22
---------------
33

4+
### Changes in v2.5.0:
5+
6+
- Added API for external semaphores imported from graphics APIs (e.g. Vulkan,
7+
Direct3D 12)
8+
49
### Changes in v2.4.1:
510

611
- Added AMD RDNA 3.5 GFX1152 GPU support

api/api.cpp

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,10 @@ OIDN_API_NAMESPACE_BEGIN
141141
}
142142
}
143143

144+
// -----------------------------------------------------------------------------------------------
145+
// Physical Device
146+
// -----------------------------------------------------------------------------------------------
147+
144148
OIDN_API int oidnGetNumPhysicalDevices()
145149
{
146150
OIDN_TRY
@@ -193,6 +197,10 @@ OIDN_API_NAMESPACE_BEGIN
193197
return nullptr;
194198
}
195199

200+
// -----------------------------------------------------------------------------------------------
201+
// Device
202+
// -----------------------------------------------------------------------------------------------
203+
196204
OIDN_API bool oidnIsCPUDeviceSupported()
197205
{
198206
OIDN_TRY
@@ -554,6 +562,10 @@ OIDN_API_NAMESPACE_BEGIN
554562
OIDN_CATCH_DEVICE(device)
555563
}
556564

565+
// -----------------------------------------------------------------------------------------------
566+
// Buffer
567+
// -----------------------------------------------------------------------------------------------
568+
557569
OIDN_API OIDNBuffer oidnNewBuffer(OIDNDevice hDevice, size_t byteSize)
558570
{
559571
Device* device = reinterpret_cast<Device*>(hDevice);
@@ -735,6 +747,100 @@ OIDN_API_NAMESPACE_BEGIN
735747
return nullptr;
736748
}
737749

750+
// -----------------------------------------------------------------------------------------------
751+
// Semaphore
752+
// -----------------------------------------------------------------------------------------------
753+
754+
OIDN_API OIDNSemaphore oidnNewSharedSemaphoreFromFD(OIDNDevice hDevice,
755+
OIDNExternalSemaphoreTypeFlag fdType,
756+
int fd)
757+
{
758+
Device* device = reinterpret_cast<Device*>(hDevice);
759+
OIDN_TRY
760+
checkHandle(hDevice);
761+
OIDN_LOCK_DEVICE(device);
762+
device->checkCommitted();
763+
if (!(static_cast<ExternalSemaphoreTypeFlag>(fdType) & device->getExternalSemaphoreTypes()))
764+
throw Exception(Error::InvalidArgument, "external semaphore type not supported by the device");
765+
Ref<Semaphore> semaphore = device->newExternalSemaphore(
766+
static_cast<ExternalSemaphoreTypeFlag>(fdType), fd);
767+
return reinterpret_cast<OIDNSemaphore>(semaphore.detach());
768+
OIDN_CATCH_DEVICE(device)
769+
return nullptr;
770+
}
771+
772+
OIDN_API OIDNSemaphore oidnNewSharedSemaphoreFromWin32Handle(OIDNDevice hDevice,
773+
OIDNExternalSemaphoreTypeFlag handleType,
774+
void* handle, const void* name)
775+
{
776+
Device* device = reinterpret_cast<Device*>(hDevice);
777+
OIDN_TRY
778+
checkHandle(hDevice);
779+
OIDN_LOCK_DEVICE(device);
780+
device->checkCommitted();
781+
if (!(static_cast<ExternalSemaphoreTypeFlag>(handleType) & device->getExternalSemaphoreTypes()))
782+
throw Exception(Error::InvalidArgument, "external semaphore type not supported by the device");
783+
if ((!handle && !name) || (handle && name))
784+
throw Exception(Error::InvalidArgument, "exactly one of the external memory handle and name must be non-null");
785+
Ref<Semaphore> semaphore = device->newExternalSemaphore(
786+
static_cast<ExternalSemaphoreTypeFlag>(handleType), handle, name);
787+
return reinterpret_cast<OIDNSemaphore>(semaphore.detach());
788+
OIDN_CATCH_DEVICE(device)
789+
return nullptr;
790+
}
791+
792+
OIDN_API void oidnSignalSemaphoresAsync(OIDNDevice hDevice,
793+
const OIDNSemaphore* hSemaphores,
794+
const uint64_t* values,
795+
int numSemaphores)
796+
{
797+
Device* device = reinterpret_cast<Device*>(hDevice);
798+
OIDN_TRY
799+
checkHandle(hDevice);
800+
OIDN_LOCK_DEVICE(device);
801+
device->checkCommitted();
802+
device->submitSignalSemaphores(
803+
reinterpret_cast<Semaphore* const*>(hSemaphores),
804+
values,
805+
numSemaphores);
806+
OIDN_CATCH_DEVICE(device)
807+
}
808+
809+
OIDN_API void oidnWaitSemaphoresAsync(OIDNDevice hDevice,
810+
const OIDNSemaphore* hSemaphores,
811+
const uint64_t* values,
812+
const uint32_t* timeoutsMs,
813+
int numSemaphores)
814+
{
815+
Device* device = reinterpret_cast<Device*>(hDevice);
816+
OIDN_TRY
817+
checkHandle(hDevice);
818+
OIDN_LOCK_DEVICE(device);
819+
device->checkCommitted();
820+
device->submitWaitSemaphores(
821+
reinterpret_cast<Semaphore* const*>(hSemaphores),
822+
values,
823+
timeoutsMs,
824+
numSemaphores);
825+
OIDN_CATCH_DEVICE(device)
826+
}
827+
828+
OIDN_API void oidnRetainSemaphore(OIDNSemaphore hSemaphore)
829+
{
830+
Semaphore* semaphore = reinterpret_cast<Semaphore*>(hSemaphore);
831+
retainObject(semaphore);
832+
}
833+
834+
OIDN_API void oidnReleaseSemaphore(OIDNSemaphore hSemaphore)
835+
{
836+
Semaphore* semaphore = reinterpret_cast<Semaphore*>(hSemaphore);
837+
releaseObject(semaphore);
838+
}
839+
840+
// -----------------------------------------------------------------------------------------------
841+
// Filter
842+
// -----------------------------------------------------------------------------------------------
843+
738844
OIDN_API OIDNFilter oidnNewFilter(OIDNDevice hDevice, const char* type)
739845
{
740846
Device* device = reinterpret_cast<Device*>(hDevice);

core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ set(OIDN_CORE_SOURCES
6161
rt_filter.cpp
6262
rtlightmap_filter.h
6363
rtlightmap_filter.cpp
64+
semaphore.h
6465
subdevice.h
6566
subdevice.cpp
6667
tensor.h

core/device.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,32 @@ OIDN_NAMESPACE_BEGIN
280280
return getEngine()->newExternalBuffer(handleType, handle, name, byteSize)->toUser();
281281
}
282282

283+
Ref<Semaphore> Device::newExternalSemaphore(ExternalSemaphoreTypeFlag fdType, int fd)
284+
{
285+
return getEngine()->newExternalSemaphore(fdType, fd);
286+
}
287+
288+
Ref<Semaphore> Device::newExternalSemaphore(ExternalSemaphoreTypeFlag handleType,
289+
void* handle, const void* name)
290+
{
291+
return getEngine()->newExternalSemaphore(handleType, handle, name);
292+
}
293+
294+
void Device::submitSignalSemaphores(Semaphore* const* semaphores,
295+
const uint64_t* values,
296+
int numSemaphores)
297+
{
298+
getEngine()->submitSignalSemaphores(semaphores, values, numSemaphores);
299+
}
300+
301+
void Device::submitWaitSemaphores(Semaphore* const* semaphores,
302+
const uint64_t* values,
303+
const uint32_t* timeoutsMs,
304+
int numSemaphores)
305+
{
306+
getEngine()->submitWaitSemaphores(semaphores, values, timeoutsMs, numSemaphores);
307+
}
308+
283309
Ref<Filter> Device::newFilter(const std::string& type)
284310
{
285311
if (isVerbose(2))

core/device.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ OIDN_NAMESPACE_BEGIN
1717
class Subdevice;
1818
class Engine;
1919
class Buffer;
20+
class Semaphore;
2021
class Filter;
2122

2223
class PhysicalDevice : public RefCount
@@ -110,6 +111,22 @@ OIDN_NAMESPACE_BEGIN
110111
ExternalMemoryTypeFlags getExternalMemoryTypes() const { return externalMemoryTypes; }
111112
void trimScratch();
112113

114+
// Semaphore
115+
Ref<Semaphore> newExternalSemaphore(ExternalSemaphoreTypeFlag fdType, int fd);
116+
Ref<Semaphore> newExternalSemaphore(ExternalSemaphoreTypeFlag handleType,
117+
void* handle, const void* name);
118+
119+
void submitSignalSemaphores(Semaphore* const* semaphores,
120+
const uint64_t* values,
121+
int numSemaphores);
122+
123+
void submitWaitSemaphores(Semaphore* const* semaphores,
124+
const uint64_t* values,
125+
const uint32_t* timeoutsMs,
126+
int numSemaphores);
127+
128+
ExternalSemaphoreTypeFlags getExternalSemaphoreTypes() const { return externalSemaphoreTypes; }
129+
113130
// Executes operations on the device, making sure to wait/flush and release temporary
114131
// allocations (e.g. from ObjC) at the end, even if an exception is thrown
115132
virtual void execute(std::function<void()>&& f, SyncMode sync = SyncMode::Blocking);
@@ -147,6 +164,8 @@ OIDN_NAMESPACE_BEGIN
147164
bool managedMemorySupported = false;
148165
ExternalMemoryTypeFlags externalMemoryTypes;
149166

167+
ExternalSemaphoreTypeFlags externalSemaphoreTypes;
168+
150169
// State
151170
bool dirty = true;
152171
bool committed = false;

core/engine.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,37 @@ OIDN_NAMESPACE_BEGIN
5959
"creating a shared buffer from a Win32 handle is not supported by the device");
6060
}
6161

62+
Ref<Semaphore> Engine::newExternalSemaphore(ExternalSemaphoreTypeFlag fdType,
63+
int fd)
64+
{
65+
throw Exception(Error::InvalidOperation,
66+
"creating a shared semaphore from a POSIX file descriptor is not supported by the device");
67+
}
68+
69+
Ref<Semaphore> Engine::newExternalSemaphore(ExternalSemaphoreTypeFlag handleType,
70+
void* handle, const void* name)
71+
{
72+
throw Exception(Error::InvalidOperation,
73+
"creating a shared semaphore from a Win32 handle is not supported by the device");
74+
}
75+
76+
void Engine::submitSignalSemaphores(Semaphore* const* semaphores,
77+
const uint64_t* values,
78+
int numSemaphores)
79+
{
80+
throw Exception(Error::InvalidOperation,
81+
"signaling semaphores is not supported by the device");
82+
}
83+
84+
void Engine::submitWaitSemaphores(Semaphore* const* semaphores,
85+
const uint64_t* values,
86+
const uint32_t* timeoutsMs,
87+
int numSemaphores)
88+
{
89+
throw Exception(Error::InvalidOperation,
90+
"waiting on semaphores is not supported by the device");
91+
}
92+
6293
bool Engine::isSupported(const TensorDesc& desc) const
6394
{
6495
// We store tensor byte offsets in 32-bit unsigned integers

core/engine.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "heap.h"
1010
#include "buffer.h"
1111
#include "image.h"
12+
#include "semaphore.h"
1213
#include "progress.h"
1314

1415
OIDN_NAMESPACE_BEGIN
@@ -58,12 +59,27 @@ OIDN_NAMESPACE_BEGIN
5859

5960
virtual Ref<Buffer> newExternalBuffer(ExternalMemoryTypeFlag handleType,
6061
void* handle, const void* name, size_t byteSize);
61-
6262
// Tensor
6363
virtual bool isSupported(const TensorDesc& desc) const;
6464
virtual Ref<Tensor> newTensor(const TensorDesc& desc, Storage storage = Storage::Device);
6565
virtual Ref<Tensor> newTensor(const Ref<Buffer>& buffer, const TensorDesc& desc, size_t byteOffset = 0);
6666

67+
// Semaphore
68+
virtual Ref<Semaphore> newExternalSemaphore(ExternalSemaphoreTypeFlag fdType,
69+
int fd);
70+
71+
virtual Ref<Semaphore> newExternalSemaphore(ExternalSemaphoreTypeFlag handleType,
72+
void* handle, const void* name);
73+
74+
virtual void submitSignalSemaphores(Semaphore* const* semaphores,
75+
const uint64_t* values,
76+
int numSemaphores);
77+
78+
virtual void submitWaitSemaphores(Semaphore* const* semaphores,
79+
const uint64_t* values,
80+
const uint32_t* timeoutsMs,
81+
int numSemaphores);
82+
6783
// Ops
6884
virtual bool isConvSupported(PostOp postOp);
6985
virtual Ref<Conv> newConv(const ConvDesc& desc) = 0;

core/semaphore.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Copyright 2026 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
#pragma once
5+
6+
#include "device.h"
7+
8+
OIDN_NAMESPACE_BEGIN
9+
10+
class Semaphore : public RefCount
11+
{
12+
public:
13+
explicit Semaphore(const Ref<Device>& device) : device(device) {}
14+
15+
Device* getDevice() const { return device.get(); }
16+
17+
protected:
18+
Ref<Device> device;
19+
};
20+
21+
OIDN_NAMESPACE_END

devices/cuda/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ set(OIDN_CUDA_SOURCES
3939
cuda_engine.cu
4040
cuda_external_buffer.h
4141
cuda_external_buffer.cpp
42+
cuda_external_semaphore.h
43+
cuda_external_semaphore.cpp
4244
cuda_module.cpp
4345
cutlass_conv.h
4446
cutlass_conv_sm75.cu

devices/cuda/cuda_device.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,19 @@ OIDN_NAMESPACE_BEGIN
230230
ExternalMemoryTypeFlag::D3D11ResourceKMT |
231231
ExternalMemoryTypeFlag::D3D12Heap |
232232
ExternalMemoryTypeFlag::D3D12Resource;
233+
234+
externalSemaphoreTypes = ExternalSemaphoreTypeFlag::OpaqueWin32 |
235+
ExternalSemaphoreTypeFlag::OpaqueWin32KMT |
236+
ExternalSemaphoreTypeFlag::D3D11Fence |
237+
ExternalSemaphoreTypeFlag::D3D12Fence |
238+
ExternalSemaphoreTypeFlag::KeyedMutex |
239+
ExternalSemaphoreTypeFlag::KeyedMutexKMT |
240+
ExternalSemaphoreTypeFlag::TimelineSemaphoreWin32;
233241
#else
234242
externalMemoryTypes = ExternalMemoryTypeFlag::OpaqueFD;
243+
244+
externalSemaphoreTypes = ExternalSemaphoreTypeFlag::OpaqueFD |
245+
ExternalSemaphoreTypeFlag::TimelineSemaphoreFD;
235246
#endif
236247

237248
subdevices.emplace_back(new Subdevice(std::unique_ptr<Engine>(new CUDAEngine(this, stream))));

0 commit comments

Comments
 (0)