Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cub/cub/agent/agent_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ enum BlockHistogramMemoryPreference
BLEND
};

#if !_CCCL_COMPILER(NVRTC)
#if _CCCL_HOSTED()
inline ::std::ostream& operator<<(::std::ostream& os, BlockHistogramMemoryPreference mempref)
{
switch (mempref)
Expand All @@ -52,7 +52,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, BlockHistogramMemoryPrefer
return os << "<unknown BlockHistogramMemoryPreference: " << static_cast<int>(mempref) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC)
#endif // _CCCL_HOSTED()

//! Parameterizable tuning policy type for AgentHistogram
//!
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/agent/agent_radix_sort_onesweep.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ enum RadixSortStoreAlgorithm
RADIX_SORT_STORE_ALIGNED
};

#if !_CCCL_COMPILER(NVRTC) && !defined(_CCCL_DOXYGEN_INVOKED)
#if _CCCL_HOSTED() && !defined(_CCCL_DOXYGEN_INVOKED)
inline ::std::ostream& operator<<(::std::ostream& os, RadixSortStoreAlgorithm algo)
{
switch (algo)
Expand All @@ -68,7 +68,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, RadixSortStoreAlgorithm al
return os << "<unknown RadixSortStoreAlgorithm: " << static_cast<int>(algo) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC) && !_CCCL_DOXYGEN_INVOKED
#endif // _CCCL_HOSTED() && !_CCCL_DOXYGEN_INVOKED

template <int NominalBlockThreads4B,
int NominalItemsPerThread4B,
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/block/block_load.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ enum BlockLoadAlgorithm
BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED,
};

#if !_CCCL_COMPILER(NVRTC) && !defined(_CCCL_DOXYGEN_INVOKED)
#if _CCCL_HOSTED() && !defined(_CCCL_DOXYGEN_INVOKED)
inline ::std::ostream& operator<<(::std::ostream& os, BlockLoadAlgorithm algo)
{
switch (algo)
Expand All @@ -744,7 +744,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, BlockLoadAlgorithm algo)
return os << "<unknown BlockLoadAlgorithm: " << static_cast<int>(algo) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC) && !_CCCL_DOXYGEN_INVOKED
#endif // _CCCL_HOSTED() && !_CCCL_DOXYGEN_INVOKED

//! @rst
//! The BlockLoad class provides :ref:`collective <collective-primitives>` data movement methods for loading a linear
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/block/block_radix_rank.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ enum RadixRankAlgorithm
RADIX_RANK_MATCH_EARLY_COUNTS_ATOMIC_OR
};

#if !_CCCL_COMPILER(NVRTC) && !defined(_CCCL_DOXYGEN_INVOKED)
#if _CCCL_HOSTED() && !defined(_CCCL_DOXYGEN_INVOKED)
inline ::std::ostream& operator<<(::std::ostream& os, RadixRankAlgorithm algo)
{
switch (algo)
Expand All @@ -90,7 +90,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, RadixRankAlgorithm algo)
return os << "<unknown RadixRankAlgorithm: " << static_cast<int>(algo) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC) && !_CCCL_DOXYGEN_INVOKED
#endif // _CCCL_HOSTED() && !_CCCL_DOXYGEN_INVOKED

/** Empty callback implementation */
template <int BINS_PER_THREAD>
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/block/block_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ enum BlockReduceAlgorithm
BLOCK_REDUCE_WARP_REDUCTIONS_NONDETERMINISTIC,
};

#if !_CCCL_COMPILER(NVRTC) && !defined(_CCCL_DOXYGEN_INVOKED)
#if _CCCL_HOSTED() && !defined(_CCCL_DOXYGEN_INVOKED)
inline ::std::ostream& operator<<(::std::ostream& os, const BlockReduceAlgorithm& alg)
{
switch (alg)
Expand All @@ -166,7 +166,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, const BlockReduceAlgorithm
return os << "<unknown BlockReduceAlgorithm: " << static_cast<int>(alg) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC) && !_CCCL_DOXYGEN_INVOKED
#endif // _CCCL_HOSTED() && !_CCCL_DOXYGEN_INVOKED

//! @rst
//! The BlockReduce class provides :ref:`collective <collective-primitives>` methods for computing a
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/block/block_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ enum BlockScanAlgorithm
BLOCK_SCAN_WARP_SCANS,
};

#if !_CCCL_COMPILER(NVRTC) && !defined(_CCCL_DOXYGEN_INVOKED)
#if _CCCL_HOSTED() && !defined(_CCCL_DOXYGEN_INVOKED)
inline ::std::ostream& operator<<(::std::ostream& os, BlockScanAlgorithm algo)
{
switch (algo)
Expand All @@ -115,7 +115,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, BlockScanAlgorithm algo)
return os << "<unknown BlockScanAlgorithm: " << static_cast<int>(algo) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC) && !_CCCL_DOXYGEN_INVOKED
#endif // _CCCL_HOSTED() && !_CCCL_DOXYGEN_INVOKED

//! @rst
//! The BlockScan class provides :ref:`collective <collective-primitives>` methods for computing a parallel prefix
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/block/block_store.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ enum BlockStoreAlgorithm
BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED,
};

#if !_CCCL_COMPILER(NVRTC) && !defined(_CCCL_DOXYGEN_INVOKED)
#if _CCCL_HOSTED() && !defined(_CCCL_DOXYGEN_INVOKED)
inline ::std::ostream& operator<<(::std::ostream& os, BlockStoreAlgorithm algo)
{
switch (algo)
Expand All @@ -563,7 +563,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, BlockStoreAlgorithm algo)
return os << "<unknown BlockStoreAlgorithm: " << static_cast<int>(algo) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC) && !_CCCL_DOXYGEN_INVOKED
#endif // _CCCL_HOSTED() && !_CCCL_DOXYGEN_INVOKED

//! @rst
//! The BlockStore class provides :ref:`collective <collective-primitives>` data movement
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/config.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@
#include <cub/util_macro.cuh> // IWYU pragma: export
#include <cub/util_namespace.cuh> // IWYU pragma: export

#if !_CCCL_COMPILER(NVRTC)
#if _CCCL_HOSTED()
# include <cuda/__nvtx/nvtx.h>
#endif // !_CCCL_COMPILER(NVRTC)
#endif // _CCCL_HOSTED()
8 changes: 4 additions & 4 deletions cub/cub/detail/delay_constructor.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ enum class delay_constructor_kind
reduce_by_key
};

#if !_CCCL_COMPILER(NVRTC)
#if _CCCL_HOSTED()
inline ::std::ostream& operator<<(::std::ostream& os, delay_constructor_kind kind)
{
switch (kind)
Expand All @@ -61,7 +61,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, delay_constructor_kind kin
return os << "<unknown delay_constructor_kind: " << static_cast<int>(kind) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC)
#endif // _CCCL_HOSTED()

struct delay_constructor_policy
{
Expand All @@ -79,13 +79,13 @@ struct delay_constructor_policy
return !(lhs == rhs);
}

#if !_CCCL_COMPILER(NVRTC)
#if _CCCL_HOSTED()
friend ::std::ostream& operator<<(::std::ostream& os, const delay_constructor_policy& p)
{
return os << "delay_constructor_policy { .kind = " << p.kind << ", .delay = " << p.delay
<< ", .l2_write_latency = " << p.l2_write_latency << " }";
}
#endif // !_CCCL_COMPILER(NVRTC)
#endif // _CCCL_HOSTED()
};

template <typename DelayConstructor>
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/dispatch_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -344,12 +344,12 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE auto dispatch(
}

const adjacent_difference_policy active_policy = policy_selector(arch_id);
#if !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#if _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)
NV_IF_TARGET(
NV_IS_HOST,
(::std::stringstream ss; ss << active_policy;
_CubLog("Dispatching DeviceAdjacentDifference to arch %d with tuning: %s\n", (int) arch_id, ss.str().c_str());))
#endif // !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#endif // _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)

const int tile_size = active_policy.block_threads * active_policy.items_per_thread;
const int num_tiles = static_cast<int>(::cuda::ceil_div(num_items, tile_size));
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/dispatch_batch_memcpy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -316,12 +316,12 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t dispatch(
}
const batch_memcpy_policy active_policy = policy_selector(arch_id);

#if !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#if _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)
NV_IF_TARGET(
NV_IS_HOST,
(::std::stringstream ss; ss << active_policy; _CubLog(
"Dispatching DeviceBatchMemcpy to arch %d with tuning: %s\n", static_cast<int>(arch_id), ss.str().c_str());))
#endif
#endif // _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)

enum : uint32_t
{
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/dispatch_find.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,11 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t dispatch(

const find_policy active_policy = policy_selector(arch_id);

#if !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#if _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)
NV_IF_TARGET(NV_IS_HOST,
(std::stringstream ss; ss << active_policy;
_CubLog("Dispatching DeviceFind to arch %d with tuning: %s\n", (int) arch_id, ss.str().c_str());))
#endif // !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#endif // _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)

const int tile_size = active_policy.block_threads * active_policy.items_per_thread;
const int num_tiles = static_cast<int>(::cuda::ceil_div(num_items, tile_size));
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/dispatch_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -200,11 +200,11 @@ CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE auto dispatch(

const histogram_policy active_policy = policy_selector(arch_id);

#if !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#if _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)
NV_IF_TARGET(NV_IS_HOST,
(std::stringstream ss; ss << active_policy;
_CubLog("Dispatching DeviceHistogram to arch %d with tuning: %s\n", (int) arch_id, ss.str().c_str());))
#endif
#endif // _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)

const auto init_kernel = kernel_source.template HistogramInitKernel<PolicySelector>();
auto sweep_kernel = [&] {
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/dispatch_merge.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -216,12 +216,12 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t dispatch(
}

return dispatch_arch(policy_selector, arch_id, [&](auto policy_getter) {
#if !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#if _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)
NV_IF_TARGET(
NV_IS_HOST,
(std::stringstream ss; ss << policy_getter();
_CubLog("Dispatching DeviceMerge to arch %d with tuning: %s\n", static_cast<int>(arch_id), ss.str().c_str());))
#endif // !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#endif // _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)

static_assert(::cuda::std::is_empty_v<decltype(policy_getter)>);
using AgentT = typename choose_merge_agent<
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/dispatch_merge_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -507,12 +507,12 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE auto dispatch(
constexpr merge_sort_policy active_policy = vsmem_adapted_agents::policy;
#endif // CUB_DEFINE_RUNTIME_POLICIES

#if !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#if _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)
NV_IF_TARGET(
NV_IS_HOST,
(std::stringstream ss; ss << active_policy;
_CubLog("Dispatching DeviceMergeSort to arch %d with tuning: %s\n", (int) arch_id, ss.str().c_str());))
#endif
#endif // _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)

const auto tile_size = active_policy.items_per_tile();
const auto num_tiles = ::cuda::ceil_div(num_items, tile_size);
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/dispatch_radix_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1208,11 +1208,11 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t dispatch(
return error;
}

#if !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#if _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)
NV_IF_TARGET(NV_IS_HOST,
(std::stringstream ss; ss << policy_selector(arch_id);
_CubLog("Dispatching DeviceReduce to arch %d with tuning: %s\n", (int) arch_id, ss.str().c_str());))
#endif // !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#endif // _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)

return dispatch_arch(policy_selector, arch_id, [&](auto policy_getter) {
return DispatchRadixSort<Order, KeyT, ValueT, OffsetT, DecomposerT, fake_policy, KernelSource, KernelLauncherFactory>{
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/dispatch_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -780,11 +780,11 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE auto dispatch(
}

const reduce_policy active_policy = policy_selector(arch_id);
#if !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#if _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)
NV_IF_TARGET(NV_IS_HOST,
(std::stringstream ss; ss << active_policy;
_CubLog("Dispatching DeviceReduce to arch %d with tuning: %s\n", (int) arch_id, ss.str().c_str());))
#endif // !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#endif // _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)

// Check for small, single tile size
if (num_items
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/dispatch_reduce_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -709,12 +709,12 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t dispatch(
}

return detail::dispatch_arch(policy_selector, arch_id, [&](auto policy_getter) {
#if !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#if _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)
NV_IF_TARGET(
NV_IS_HOST,
(::std::stringstream ss; ss << policy_getter(); _CubLog(
"Dispatching DeviceReduceByKey to arch %d with tuning: %s\n", static_cast<int>(arch_id), ss.str().c_str());))
#endif
#endif // _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)

const auto [block_threads, items_per_thread, vsmem_per_block] = determine_threads_items_vsmem<
decltype(policy_getter),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,12 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE auto dispatch_nondeterministic(
}

const reduce_policy active_policy = policy_selector(arch_id);
#if !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#if _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)
NV_IF_TARGET(
NV_IS_HOST,
(std::stringstream ss; ss << active_policy; _CubLog(
"Dispatching DeviceReduceNondeterministic to arch %d with tuning: %s\n", (int) arch_id, ss.str().c_str());))
#endif // !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#endif // _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)

// No temp storage needed but keep API consistent
if (d_temp_storage == nullptr)
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/dispatch_rle.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -679,12 +679,12 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t dispatch(
}

const non_trivial_runs::rle_non_trivial_runs_policy active_policy = policy_selector(arch_id);
#if !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#if _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)
NV_IF_TARGET(
NV_IS_HOST,
(::std::stringstream ss; ss << active_policy;
_CubLog("Dispatching DeviceRle to arch %d with tuning: %s\n", static_cast<int>(arch_id), ss.str().c_str());))
#endif // !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#endif // _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)

const int block_threads = active_policy.block_threads;
const int items_per_thread = active_policy.items_per_thread;
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/dispatch_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -961,11 +961,11 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE auto dispatch(
return error;
}

#if !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#if _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)
NV_IF_TARGET(NV_IS_HOST,
(std::stringstream ss; ss << policy_selector(arch_id);
_CubLog("Dispatching DeviceScan to arch %d with tuning: %s\n", (int) arch_id, ss.str().c_str());))
#endif // !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#endif // _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)

struct fake_policy
{
Expand Down
8 changes: 4 additions & 4 deletions cub/cub/device/dispatch/dispatch_scan_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -641,12 +641,12 @@ struct DispatchScanByKey
return error;
}

#if !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#if _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)
NV_IF_TARGET(
NV_IS_HOST,
(::std::stringstream ss; ss << policy_selector(arch_id); _CubLog(
"Dispatching DeviceScanByKey to arch %d with tuning: %s\n", static_cast<int>(arch_id), ss.str().c_str());))
#endif
#endif // _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)

const detail::scan_by_key::scan_by_key_policy active_policy = policy_selector(arch_id);

Expand Down Expand Up @@ -733,12 +733,12 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE auto dispatch(
return error;
}

#if !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#if _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)
NV_IF_TARGET(
NV_IS_HOST,
(::std::stringstream ss; ss << policy_selector(arch_id);
_CubLog("Dispatching DeviceScanByKey to arch %d with tuning: %s\n", static_cast<int>(arch_id), ss.str().c_str());))
#endif // !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#endif // _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)

struct fake_policy
{
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/dispatch_segmented_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -511,12 +511,12 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE auto dispatch(
}

const segmented_reduce_policy active_policy = policy_selector(arch_id);
#if !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#if _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)
NV_IF_TARGET(
NV_IS_HOST,
(::std::stringstream ss; ss << active_policy;
_CubLog("Dispatching DeviceSegmentedReduce to arch %d with tuning: %s\n", (int) arch_id, ss.str().c_str());))
#endif // !_CCCL_COMPILER(NVRTC) && defined(CUB_DEBUG_LOG)
#endif // _CCCL_HOSTED() && defined(CUB_DEBUG_LOG)

// Compute segments_per_block based on max_segment_size hint
int segments_per_block = 1;
Expand Down
Loading
Loading