From 462821f61b404e9d925567f32dbcf8eb54a5af70 Mon Sep 17 00:00:00 2001 From: Daniel Juenger <2955913+sleeepyjack@users.noreply.github.com> Date: Mon, 24 Apr 2023 15:57:04 +0000 Subject: [PATCH 1/5] Add base class for extent --- include/cuco/detail/extent_base.cuh | 37 +++++++++++++++++++++++++++++ include/cuco/extent.cuh | 9 +++---- 2 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 include/cuco/detail/extent_base.cuh diff --git a/include/cuco/detail/extent_base.cuh b/include/cuco/detail/extent_base.cuh new file mode 100644 index 000000000..063fcc741 --- /dev/null +++ b/include/cuco/detail/extent_base.cuh @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cuco::experimental::detail { + +/** + * @brief Base class of public extent class. + * + * This class should not be used directly. + * + * @tparam SizeType Size type + */ +template +class extent_base { + static_assert(std::is_integral_v, "SizeType bust be integral."); + + public: + using value_type = SizeType; ///< Extent value type +}; +} // namespace cuco::experimental::detail \ No newline at end of file diff --git a/include/cuco/extent.cuh b/include/cuco/extent.cuh index b825188ed..131cb778d 100644 --- a/include/cuco/extent.cuh +++ b/include/cuco/extent.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include @@ -32,8 +33,8 @@ static constexpr std::size_t dynamic_extent = static_cast(-1); * @tparam N Extent */ template -struct extent { - using value_type = SizeType; ///< Extent value type +struct extent : private detail::extent_base { + using value_type = typename detail::extent_base::value_type; ///< Extent value type constexpr extent() = default; @@ -67,8 +68,8 @@ struct extent { * @tparam SizeType Size type */ template -struct extent { - using value_type = SizeType; ///< Extent value type +struct extent : private detail::extent_base { + using value_type = typename detail::extent_base::value_type; ///< Extent value type /** * @brief Constructs extent from a given `size`. From a9dbc748d2ec7b7d327345690141793dd9f6ac0e Mon Sep 17 00:00:00 2001 From: Daniel Juenger <2955913+sleeepyjack@users.noreply.github.com> Date: Tue, 25 Apr 2023 01:40:16 +0000 Subject: [PATCH 2/5] Introduce cuco::cuda_stream_ref --- .../hash_table/static_set/contains_bench.cu | 2 +- .../hash_table/static_set/insert_bench.cu | 4 +- .../static_set/retrieve_all_bench.cu | 2 +- .../hash_table/static_set/size_bench.cu | 2 +- include/cuco/cuda_stream_ref.hpp | 142 ++++++++++++++++++ include/cuco/detail/cuda_stream_ref.inl | 50 ++++++ include/cuco/detail/static_set/static_set.inl | 23 +-- include/cuco/detail/storage/aow_storage.cuh | 3 +- .../cuco/detail/storage/counter_storage.cuh | 7 +- include/cuco/static_set.cuh | 19 +-- 10 files changed, 225 insertions(+), 29 deletions(-) create mode 100644 include/cuco/cuda_stream_ref.hpp create mode 100644 include/cuco/detail/cuda_stream_ref.inl diff --git a/benchmarks/hash_table/static_set/contains_bench.cu b/benchmarks/hash_table/static_set/contains_bench.cu index b0c0f34f4..697b98574 100644 --- a/benchmarks/hash_table/static_set/contains_bench.cu +++ b/benchmarks/hash_table/static_set/contains_bench.cu @@ -54,7 +54,7 @@ void static_set_contains(nvbench::state& state, nvbench::type_list) state.add_element_count(num_keys); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - set.contains(keys.begin(), keys.end(), result.begin(), launch.get_stream()); + set.contains(keys.begin(), keys.end(), result.begin(), {launch.get_stream()}); }); } diff --git a/benchmarks/hash_table/static_set/insert_bench.cu b/benchmarks/hash_table/static_set/insert_bench.cu index cb5dcf1f8..48bc37fa4 100644 --- a/benchmarks/hash_table/static_set/insert_bench.cu +++ b/benchmarks/hash_table/static_set/insert_bench.cu @@ -48,10 +48,10 @@ void static_set_insert(nvbench::state& state, nvbench::type_list) state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) { cuco::experimental::static_set set{ - size, cuco::empty_key{-1}, {}, {}, {}, launch.get_stream()}; + size, cuco::empty_key{-1}, {}, {}, {}, {launch.get_stream()}}; timer.start(); - set.insert(keys.begin(), keys.end(), launch.get_stream()); + set.insert(keys.begin(), keys.end(), {launch.get_stream()}); timer.stop(); }); } diff --git a/benchmarks/hash_table/static_set/retrieve_all_bench.cu b/benchmarks/hash_table/static_set/retrieve_all_bench.cu index fb52b251b..17ea66384 100644 --- a/benchmarks/hash_table/static_set/retrieve_all_bench.cu +++ b/benchmarks/hash_table/static_set/retrieve_all_bench.cu @@ -50,7 +50,7 @@ void static_set_retrieve_all(nvbench::state& state, nvbench::type_list) set.insert(keys.begin(), keys.end()); state.exec(nvbench::exec_tag::sync, - [&](nvbench::launch& launch) { auto const size = set.size(launch.get_stream()); }); + [&](nvbench::launch& launch) { auto const size = set.size({launch.get_stream()}); }); } NVBENCH_BENCH_TYPES(static_set_size, diff --git a/include/cuco/cuda_stream_ref.hpp b/include/cuco/cuda_stream_ref.hpp new file mode 100644 index 000000000..bf0a5dea9 --- /dev/null +++ b/include/cuco/cuda_stream_ref.hpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +namespace cuco { +namespace experimental { + +/** + * @brief Strongly-typed non-owning wrapper for CUDA streams with default constructor. + * + * This wrapper is simply a "view": it does not own the lifetime of the stream it wraps. + */ +class cuda_stream_ref { + public: + constexpr cuda_stream_ref() = default; ///< Default constructor + constexpr cuda_stream_ref(cuda_stream_ref const&) = default; ///< Copy constructor + constexpr cuda_stream_ref(cuda_stream_ref&&) = default; ///< Move constructor + + /** + * @brief Copy-assignment operator. + * + * @return Copy of this stream reference. + */ + constexpr cuda_stream_ref& operator=(cuda_stream_ref const&) = default; + + /** + * @brief Move-assignment operator. + * + * @return New location of this stream reference. + */ + constexpr cuda_stream_ref& operator=(cuda_stream_ref&&) = default; ///< Move-assignment operator + + ~cuda_stream_ref() = default; + + constexpr cuda_stream_ref(int) = delete; //< Prevent cast from literal 0 + constexpr cuda_stream_ref(std::nullptr_t) = delete; //< Prevent cast from nullptr + + /** + * @brief Implicit conversion from `cudaStream_t`. + * + * @param stream The CUDA stream to reference. + */ + constexpr cuda_stream_ref(cudaStream_t stream) noexcept : stream_{stream} {} + + /** + * @brief Get the wrapped stream. + * + * @return The wrapped stream. + */ + [[nodiscard]] constexpr cudaStream_t value() const noexcept { return stream_; } + + /** + * @brief Implicit conversion to `cudaStream_t`. + * + * @return The underlying `cudaStream_t`. + */ + constexpr operator cudaStream_t() const noexcept { return value(); } + + /** + * @brief Return true if the wrapped stream is the CUDA per-thread default stream. + * + * @return True if the wrapped stream is the per-thread default stream; else false. + */ + [[nodiscard]] inline bool is_per_thread_default() const noexcept; + + /** + * @brief Return true if the wrapped stream is explicitly the CUDA legacy default stream. + * + * @return True if the wrapped stream is the default stream; else false. + */ + [[nodiscard]] inline bool is_default() const noexcept; + + /** + * @brief Synchronize the viewed CUDA stream. + * + * Calls `cudaStreamSynchronize()`. + * + * @throw cuco::cuda_error if stream synchronization fails + */ + void synchronize() const; + + private: + cudaStream_t stream_{}; +}; + +/** + * @brief Static `cuda_stream_ref` of the default stream (stream 0), for convenience + */ +static constexpr cuda_stream_ref cuda_stream_default{}; + +/** + * @brief Static `cuda_stream_ref` of cudaStreamLegacy, for convenience + */ +static const cuda_stream_ref cuda_stream_legacy{cudaStreamLegacy}; + +/** + * @brief Static `cuda_stream_ref` of cudaStreamPerThread, for convenience + */ +static const cuda_stream_ref cuda_stream_per_thread{cudaStreamPerThread}; + +// /** +// * @brief Equality comparison operator for streams +// * +// * @param lhs The first stream view to compare +// * @param rhs The second stream view to compare +// * @return true if equal, false if unequal +// */ +// inline bool operator==(cuda_stream_ref lhs, cuda_stream_ref rhs) +// { +// return lhs.value() == rhs.value(); +// } + +// /** +// * @brief Inequality comparison operator for streams +// * +// * @param lhs The first stream view to compare +// * @param rhs The second stream view to compare +// * @return true if unequal, false if equal +// */ +// inline bool operator!=(cuda_stream_ref lhs, cuda_stream_ref rhs) { return not(lhs == rhs); } + +} // namespace experimental +} // namespace cuco + +#include \ No newline at end of file diff --git a/include/cuco/detail/cuda_stream_ref.inl b/include/cuco/detail/cuda_stream_ref.inl new file mode 100644 index 000000000..64aa078aa --- /dev/null +++ b/include/cuco/detail/cuda_stream_ref.inl @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +namespace cuco { +namespace experimental { + +[[nodiscard]] inline bool cuda_stream_ref::is_per_thread_default() const noexcept +{ +#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM + return value() == cuda_stream_per_thread || value() == nullptr; +#else + return value() == cuda_stream_per_thread; +#endif +} + +[[nodiscard]] inline bool cuda_stream_ref::is_default() const noexcept +{ +#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM + return value() == cuda_stream_legacy; +#else + return value() == cuda_stream_legacy || value() == nullptr; +#endif +} + +inline void cuda_stream_ref::synchronize() const +{ + CUCO_CUDA_TRY(cudaStreamSynchronize(this->stream_)); +} + +} // namespace experimental +} // namespace cuco \ No newline at end of file diff --git a/include/cuco/detail/static_set/static_set.inl b/include/cuco/detail/static_set/static_set.inl index 324387b0e..0198b91d4 100644 --- a/include/cuco/detail/static_set/static_set.inl +++ b/include/cuco/detail/static_set/static_set.inl @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -50,7 +51,7 @@ constexpr static_set static_set::size_type static_set::insert( - InputIt first, InputIt last, cudaStream_t stream) + InputIt first, InputIt last, cuda_stream_ref stream) { auto const num_keys = cuco::detail::distance(first, last); if (num_keys == 0) { return 0; } @@ -99,7 +100,7 @@ template template void static_set::insert_async( - InputIt first, InputIt last, cudaStream_t stream) + InputIt first, InputIt last, cuda_stream_ref stream) { auto const num_keys = cuco::detail::distance(first, last); if (num_keys == 0) { return; } @@ -124,7 +125,7 @@ template static_set::size_type static_set::insert_if( - InputIt first, InputIt last, StencilIt stencil, Predicate pred, cudaStream_t stream) + InputIt first, InputIt last, StencilIt stencil, Predicate pred, cuda_stream_ref stream) { auto const num_keys = cuco::detail::distance(first, last); if (num_keys == 0) { return 0; } @@ -152,7 +153,7 @@ template template void static_set::insert_if_async( - InputIt first, InputIt last, StencilIt stencil, Predicate pred, cudaStream_t stream) + InputIt first, InputIt last, StencilIt stencil, Predicate pred, cuda_stream_ref stream) { auto const num_keys = cuco::detail::distance(first, last); if (num_keys == 0) { return; } @@ -175,10 +176,10 @@ template template void static_set::contains( - InputIt first, InputIt last, OutputIt output_begin, cudaStream_t stream) const + InputIt first, InputIt last, OutputIt output_begin, cuda_stream_ref stream) const { contains_async(first, last, output_begin, stream); - CUCO_CUDA_TRY(cudaStreamSynchronize(stream)); + stream.synchronize(); } template template void static_set::contains_async( - InputIt first, InputIt last, OutputIt output_begin, cudaStream_t stream) const + InputIt first, InputIt last, OutputIt output_begin, cuda_stream_ref stream) const { auto const num_keys = cuco::detail::distance(first, last); if (num_keys == 0) { return; } @@ -219,7 +220,7 @@ template template OutputIt static_set::retrieve_all( - OutputIt output_begin, cudaStream_t stream) const + OutputIt output_begin, cuda_stream_ref stream) const { auto begin = thrust::make_transform_iterator(thrust::counting_iterator(0), detail::get_slot(storage_.ref())); @@ -248,7 +249,7 @@ OutputIt static_set::deallocate( temp_allocator, reinterpret_cast(d_num_out), sizeof(size_type)); temp_allocator.deallocate(d_temp_storage, temp_storage_bytes); @@ -265,7 +266,7 @@ template static_set::size_type static_set::size( - cudaStream_t stream) const + cuda_stream_ref stream) const { auto counter = detail::counter_storage{allocator_}; counter.reset(stream); diff --git a/include/cuco/detail/storage/aow_storage.cuh b/include/cuco/detail/storage/aow_storage.cuh index 316f7fbe5..0d35cf49d 100644 --- a/include/cuco/detail/storage/aow_storage.cuh +++ b/include/cuco/detail/storage/aow_storage.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -226,7 +227,7 @@ class aow_storage : public aow_storage_base { * @param key Key to which all keys in `slots` are initialized * @param stream Stream used for executing the kernel */ - void initialize(value_type key, cudaStream_t stream) noexcept + void initialize(value_type key, cuda_stream_ref stream) noexcept { auto constexpr stride = 4; auto const grid_size = (this->num_windows() + stride * detail::CUCO_DEFAULT_BLOCK_SIZE - 1) / diff --git a/include/cuco/detail/storage/counter_storage.cuh b/include/cuco/detail/storage/counter_storage.cuh index bf87357a3..021e530d9 100644 --- a/include/cuco/detail/storage/counter_storage.cuh +++ b/include/cuco/detail/storage/counter_storage.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -64,7 +65,7 @@ class counter_storage : public storage_basedata(), 0, sizeof(value_type), stream)); @@ -92,12 +93,12 @@ class counter_storage : public storage_basedata(), sizeof(size_type), cudaMemcpyDeviceToHost, stream)); - CUCO_CUDA_TRY(cudaStreamSynchronize(stream)); + stream.synchronize(); return h_count; } diff --git a/include/cuco/static_set.cuh b/include/cuco/static_set.cuh index 985626444..256fd1131 100644 --- a/include/cuco/static_set.cuh +++ b/include/cuco/static_set.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -165,7 +166,7 @@ class static_set { KeyEqual pred = {}, ProbingScheme const& probing_scheme = {}, Allocator const& alloc = {}, - cudaStream_t stream = nullptr); + cuda_stream_ref stream = {}); /** * @brief Inserts all keys in the range `[first, last)` and returns the number of successful @@ -185,7 +186,7 @@ class static_set { * @return Number of successfully inserted keys */ template - size_type insert(InputIt first, InputIt last, cudaStream_t stream = nullptr); + size_type insert(InputIt first, InputIt last, cuda_stream_ref stream = {}); /** * @brief Asynchonously inserts all keys in the range `[first, last)`. @@ -199,7 +200,7 @@ class static_set { * @param stream CUDA stream used for insert */ template - void insert_async(InputIt first, InputIt last, cudaStream_t stream = nullptr); + void insert_async(InputIt first, InputIt last, cuda_stream_ref stream = {}); /** * @brief Inserts keys in the range `[first, last)` if `pred` of the corresponding stencil returns @@ -227,7 +228,7 @@ class static_set { */ template size_type insert_if( - InputIt first, InputIt last, StencilIt stencil, Predicate pred, cudaStream_t stream = nullptr); + InputIt first, InputIt last, StencilIt stencil, Predicate pred, cuda_stream_ref stream = {}); /** * @brief Asynchonously inserts keys in the range `[first, last)` if `pred` of the corresponding @@ -251,7 +252,7 @@ class static_set { */ template void insert_if_async( - InputIt first, InputIt last, StencilIt stencil, Predicate pred, cudaStream_t stream = nullptr); + InputIt first, InputIt last, StencilIt stencil, Predicate pred, cuda_stream_ref stream = {}); /** * @brief Indicates whether the keys in the range `[first, last)` are contained in the set. @@ -271,7 +272,7 @@ class static_set { void contains(InputIt first, InputIt last, OutputIt output_begin, - cudaStream_t stream = nullptr) const; + cuda_stream_ref stream = {}) const; /** * @brief Asynchonously indicates whether the keys in the range `[first, last)` are contained in @@ -289,7 +290,7 @@ class static_set { void contains_async(InputIt first, InputIt last, OutputIt output_begin, - cudaStream_t stream = nullptr) const; + cuda_stream_ref stream = {}) const; /** * @brief Retrieves all keys contained in the set. @@ -309,7 +310,7 @@ class static_set { * @return Iterator indicating the end of the output */ template - [[nodiscard]] OutputIt retrieve_all(OutputIt output_begin, cudaStream_t stream = nullptr) const; + [[nodiscard]] OutputIt retrieve_all(OutputIt output_begin, cuda_stream_ref stream = {}) const; /** * @brief Gets the number of elements in the container. @@ -319,7 +320,7 @@ class static_set { * @param stream CUDA stream used to get the number of inserted elements * @return The number of elements in the container */ - [[nodiscard]] size_type size(cudaStream_t stream = nullptr) const; + [[nodiscard]] size_type size(cuda_stream_ref stream = {}) const; /** * @brief Gets the maximum number of elements the hash map can hold. From 91b5cb1a1b1f0e73455f721dfd9059ca402b2540 Mon Sep 17 00:00:00 2001 From: Daniel Juenger <2955913+sleeepyjack@users.noreply.github.com> Date: Tue, 25 Apr 2023 23:32:31 +0000 Subject: [PATCH 3/5] Re-order template parameters so thread scope can be set explicitly --- include/cuco/detail/static_set/static_set.inl | 56 +++++++++---------- include/cuco/static_set.cuh | 4 +- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/include/cuco/detail/static_set/static_set.inl b/include/cuco/detail/static_set/static_set.inl index 0198b91d4..d77bee31c 100644 --- a/include/cuco/detail/static_set/static_set.inl +++ b/include/cuco/detail/static_set/static_set.inl @@ -39,13 +39,13 @@ namespace cuco { namespace experimental { template -constexpr static_set::static_set( +constexpr static_set::static_set( Extent capacity, empty_key empty_key_sentinel, KeyEqual pred, @@ -62,15 +62,15 @@ constexpr static_set template -static_set::size_type -static_set::insert( +static_set::size_type +static_set::insert( InputIt first, InputIt last, cuda_stream_ref stream) { auto const num_keys = cuco::detail::distance(first, last); @@ -92,14 +92,14 @@ static_set::ins } template template -void static_set::insert_async( +void static_set::insert_async( InputIt first, InputIt last, cuda_stream_ref stream) { auto const num_keys = cuco::detail::distance(first, last); @@ -116,15 +116,15 @@ void static_set } template template -static_set::size_type -static_set::insert_if( +static_set::size_type +static_set::insert_if( InputIt first, InputIt last, StencilIt stencil, Predicate pred, cuda_stream_ref stream) { auto const num_keys = cuco::detail::distance(first, last); @@ -145,14 +145,14 @@ static_set::ins } template template -void static_set::insert_if_async( +void static_set::insert_if_async( InputIt first, InputIt last, StencilIt stencil, Predicate pred, cuda_stream_ref stream) { auto const num_keys = cuco::detail::distance(first, last); @@ -168,14 +168,14 @@ void static_set } template template -void static_set::contains( +void static_set::contains( InputIt first, InputIt last, OutputIt output_begin, cuda_stream_ref stream) const { contains_async(first, last, output_begin, stream); @@ -183,14 +183,14 @@ void static_set } template template -void static_set::contains_async( +void static_set::contains_async( InputIt first, InputIt last, OutputIt output_begin, cuda_stream_ref stream) const { auto const num_keys = cuco::detail::distance(first, last); @@ -212,14 +212,14 @@ void static_set } template template -OutputIt static_set::retrieve_all( +OutputIt static_set::retrieve_all( OutputIt output_begin, cuda_stream_ref stream) const { auto begin = thrust::make_transform_iterator(thrust::counting_iterator(0), @@ -258,14 +258,14 @@ OutputIt static_set -static_set::size_type -static_set::size( +static_set::size_type +static_set::size( cuda_stream_ref stream) const { auto counter = detail::counter_storage{allocator_}; @@ -285,42 +285,42 @@ static_set::siz } template constexpr auto -static_set::capacity() +static_set::capacity() const noexcept { return storage_.capacity(); } template -constexpr static_set::key_type -static_set::empty_key_sentinel() +constexpr static_set::key_type +static_set::empty_key_sentinel() const noexcept { return empty_key_sentinel_; } template template -auto static_set::ref( +auto static_set::ref( Operators...) const noexcept { static_assert(sizeof...(Operators), "No operators specified"); diff --git a/include/cuco/static_set.cuh b/include/cuco/static_set.cuh index 256fd1131..308ccc9c4 100644 --- a/include/cuco/static_set.cuh +++ b/include/cuco/static_set.cuh @@ -73,8 +73,8 @@ namespace experimental { * @throw If the probing scheme type is not inherited from `cuco::detail::probing_scheme_base` * * @tparam Key Type used for keys. Requires `cuco::is_bitwise_comparable_v` - * @tparam Extent Data structure size type * @tparam Scope The scope in which operations will be performed by individual threads. + * @tparam Extent Data structure size type * @tparam KeyEqual Binary callable type used to compare two keys for equality * @tparam ProbingScheme Probing scheme (see `include/cuco/probing_scheme.cuh` for choices) * @tparam Allocator Type of allocator used for device storage @@ -82,8 +82,8 @@ namespace experimental { */ template , cuda::thread_scope Scope = cuda::thread_scope_device, + class Extent = cuco::experimental::extent, class KeyEqual = thrust::equal_to, class ProbingScheme = experimental::double_hashing<4, // CG size cuco::murmurhash3_32, From 428547567d6a0cf4c9d72e2b05d97d3b70e68adf Mon Sep 17 00:00:00 2001 From: Daniel Juenger <2955913+sleeepyjack@users.noreply.github.com> Date: Tue, 25 Apr 2023 23:59:45 +0000 Subject: [PATCH 4/5] Add storage dummy to ctor arguments --- benchmarks/hash_table/static_set/insert_bench.cu | 2 +- include/cuco/detail/static_set/static_set.inl | 1 + include/cuco/static_set.cuh | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/benchmarks/hash_table/static_set/insert_bench.cu b/benchmarks/hash_table/static_set/insert_bench.cu index 48bc37fa4..7a9290444 100644 --- a/benchmarks/hash_table/static_set/insert_bench.cu +++ b/benchmarks/hash_table/static_set/insert_bench.cu @@ -48,7 +48,7 @@ void static_set_insert(nvbench::state& state, nvbench::type_list) state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) { cuco::experimental::static_set set{ - size, cuco::empty_key{-1}, {}, {}, {}, {launch.get_stream()}}; + size, cuco::empty_key{-1}, {}, {}, {}, {}, {launch.get_stream()}}; timer.start(); set.insert(keys.begin(), keys.end(), {launch.get_stream()}); diff --git a/include/cuco/detail/static_set/static_set.inl b/include/cuco/detail/static_set/static_set.inl index d77bee31c..fdee99ecb 100644 --- a/include/cuco/detail/static_set/static_set.inl +++ b/include/cuco/detail/static_set/static_set.inl @@ -51,6 +51,7 @@ constexpr static_set Date: Wed, 26 Apr 2023 00:15:08 +0000 Subject: [PATCH 5/5] Add factory function for static_set --- .../hash_table/static_set/insert_bench.cu | 6 +- include/cuco/detail/static_set/static_set.inl | 22 ++++ include/cuco/detail/traits.hpp | 118 ++++++++++++++++++ include/cuco/detail/utils.hpp | 40 ++++++ include/cuco/static_set.cuh | 4 + 5 files changed, 188 insertions(+), 2 deletions(-) create mode 100644 include/cuco/detail/traits.hpp diff --git a/benchmarks/hash_table/static_set/insert_bench.cu b/benchmarks/hash_table/static_set/insert_bench.cu index 7a9290444..c2443c107 100644 --- a/benchmarks/hash_table/static_set/insert_bench.cu +++ b/benchmarks/hash_table/static_set/insert_bench.cu @@ -47,8 +47,10 @@ void static_set_insert(nvbench::state& state, nvbench::type_list) state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) { - cuco::experimental::static_set set{ - size, cuco::empty_key{-1}, {}, {}, {}, {}, {launch.get_stream()}}; + auto set = cuco::experimental::make_static_set( + cuco::experimental::extent{size}, + cuco::empty_key{-1}, + cuco::experimental::cuda_stream_ref{launch.get_stream()}); timer.start(); set.insert(keys.begin(), keys.end(), {launch.get_stream()}); diff --git a/include/cuco/detail/static_set/static_set.inl b/include/cuco/detail/static_set/static_set.inl index fdee99ecb..b8fc8cb49 100644 --- a/include/cuco/detail/static_set/static_set.inl +++ b/include/cuco/detail/static_set/static_set.inl @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -328,5 +329,26 @@ auto static_set return ref_type{ cuco::empty_key(empty_key_sentinel_), predicate_, probing_scheme_, storage_.ref()}; } + +template +constexpr auto make_static_set(Args&&... args) +{ + // TODO don't repeat defaults + return static_set{ + detail::find_arg(std::forward(args)...), // required parameter + detail::find_arg>(std::forward(args)...), // required parameter + detail::find_arg::template is_equal_functor_t>( + std::forward(args)..., thrust::equal_to{}), + detail::find_arg(std::forward(args)..., + double_hashing<4, // CG size + cuco::murmurhash3_32, + cuco::murmurhash3_32>{}), + detail::find_arg(std::forward(args)..., + cuco::cuda_allocator{}), + detail::find_arg(std::forward(args)..., + cuco::experimental::aow_storage<1>{}), + detail::find_arg(std::forward(args)..., cuda_stream_ref{})}; +} + } // namespace experimental } // namespace cuco diff --git a/include/cuco/detail/traits.hpp b/include/cuco/detail/traits.hpp new file mode 100644 index 000000000..722b0f9c3 --- /dev/null +++ b/include/cuco/detail/traits.hpp @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +// #include // FIXME check inheritance +#include + +#include +#include + +namespace cuco::experimental::detail { + +// Trait to check if T is a valid probing scheme type +template +struct is_probing_scheme : std::false_type { +}; + +template +struct is_probing_scheme< + T, + std::enable_if_t< + std::is_base_of_v, T>>> + : std::true_type { +}; + +template +inline constexpr bool is_probing_scheme_v = is_probing_scheme::value; + +template +struct is_storage : std::false_type { +}; + +template +struct is_storage< + T, + std::enable_if_t, T>>> + : std::true_type { +}; + +/* FIXME +std::enable_if_t, + T>>> : std::true_type {}; +*/ + +template +inline constexpr bool is_storage_v = is_storage::value; + +// Trait to check if T is a `cuco::extent` +template +struct is_extent : std::false_type { +}; + +template +struct is_extent< + T, + std::enable_if_t< + std::is_base_of_v, T>>> + : std::true_type { +}; + +template +inline constexpr bool is_extent_v = is_extent::value; + +// Trait to check if T is allocator-like +template > +struct is_allocator : std::false_type { +}; + +template +struct is_allocator().allocate(std::size_t{})), + decltype(std::declval().deallocate( + std::declval(), std::size_t{}))>> + : std::true_type { +}; + +template +inline constexpr bool is_allocator_v = is_allocator::value; + +template +struct key_equal_traits { + template > + struct is_equal_functor : std::false_type { + }; + + template + struct is_equal_functor< + T, + std::void_t>>> + : std::true_type { + }; + + template + using is_equal_functor_t = typename is_equal_functor::type; + + template + static constexpr bool is_equal_functor_v = is_equal_functor::value; +}; + +} // namespace cuco::experimental::detail \ No newline at end of file diff --git a/include/cuco/detail/utils.hpp b/include/cuco/detail/utils.hpp index 513ccd559..50b9034cd 100644 --- a/include/cuco/detail/utils.hpp +++ b/include/cuco/detail/utils.hpp @@ -105,4 +105,44 @@ constexpr ForwardIt lower_bound(ForwardIt first, ForwardIt last, const T& value) } } // namespace detail + +namespace experimental::detail { + +// TODO docs +template