22 #include <cuda_runtime_api.h> 
   33   auto thread_local cache = std::vector<int>{};
 
   34   if (cache.size() == 0) {
 
   35     auto device_count = 
int{};
 
   37     cache.resize(device_count);
 
   38     for (
auto dev = 0; dev < device_count; ++dev) {
 
   40         cudaDeviceGetAttribute(&(cache[dev]), cudaDevAttrMaxSharedMemoryPerBlockOptin, dev));
 
   48   auto thread_local cache = std::vector<int>{};
 
   49   if (cache.size() == 0) {
 
   50     auto device_count = 
int{};
 
   52     cache.resize(device_count);
 
   53     for (
auto dev = 0; dev < device_count; ++dev) {
 
   55         cudaDeviceGetAttribute(&(cache[dev]), cudaDevAttrMultiProcessorCount, dev));
 
   65     cudaDeviceGetAttribute(&result, cudaDevAttrMaxThreadsPerMultiProcessor, 
device_id.value()));
 
   71   auto thread_local cache = std::vector<int>{};
 
   72   if (cache.size() == 0) {
 
   73     auto device_count = 
int{};
 
   75     cache.resize(device_count);
 
   76     for (
auto dev = 0; dev < device_count; ++dev) {
 
   78         cudaDeviceGetAttribute(&(cache[dev]), cudaDevAttrMaxSharedMemoryPerMultiprocessor, dev));
 
   88     cudaDeviceGetAttribute(&result, cudaDevAttrMemoryClockRate, 
device_id.value()));
 
  100 auto constexpr 
static const MAX_READ_CHUNK        = 
index_type{128};
 
  101 auto constexpr 
static const MAX_BLOCKS            = 
index_type{65536};
 
  102 auto constexpr 
static const WARP_SIZE             = 
index_type{32};
 
  103 auto constexpr 
static const MAX_THREADS_PER_BLOCK = 
index_type{256};
 
  105 #if __CUDA_ARCH__ == 720 || __CUDA_ARCH__ == 750 || __CUDA_ARCH__ == 860 || \ 
  106   __CUDA_ARCH__ == 870 || __CUDA_ARCH__ == 890 || __CUDA_ARCH__ == 1200 || __CUDA_ARCH__ == 1210 
  107 auto constexpr 
static const MAX_THREADS_PER_SM = 
index_type{1024};
 
  109 auto constexpr 
static const MAX_THREADS_PER_SM = 
index_type{2048};
 
  112 auto constexpr 
static const MAX_THREADS_PER_SM = 
index_type{2048};
 
  115 auto constexpr 
static const MIN_BLOCKS_PER_SM = MAX_THREADS_PER_SM / MAX_THREADS_PER_BLOCK;
 
auto get_sm_count(raft_proto::device_id< raft_proto::device_type::gpu > device_id)
Definition: gpu_introspection.hpp:46
 
auto get_max_threads_per_sm(raft_proto::device_id< raft_proto::device_type::gpu > device_id)
Definition: gpu_introspection.hpp:61
 
auto get_mem_clock_rate(raft_proto::device_id< raft_proto::device_type::gpu > device_id)
Definition: gpu_introspection.hpp:84
 
auto get_max_shared_mem_per_sm(raft_proto::device_id< raft_proto::device_type::gpu > device_id)
Definition: gpu_introspection.hpp:69
 
auto get_core_clock_rate(raft_proto::device_id< raft_proto::device_type::gpu > device_id)
Definition: gpu_introspection.hpp:92
 
auto get_max_shared_mem_per_block(raft_proto::device_id< raft_proto::device_type::gpu > device_id)
Definition: gpu_introspection.hpp:30
 
uint32_t index_type
Definition: index_type.hpp:20
 
Definition: dbscan.hpp:29
 
void cuda_check(error_t const &err) noexcept(!GPU_ENABLED)
Definition: cuda_check.hpp:26
 
detail::device_id< D > device_id
Definition: device_id.hpp:29