9 #include <raft/core/handle.hpp>
10 #include <raft/linalg/add.cuh>
11 #include <raft/linalg/ternary_op.cuh>
12 #include <raft/util/cuda_utils.cuh>
13 #include <raft/util/cudart_utils.hpp>
18 #include <raft/linalg/detail/cublas_wrappers.hpp>
19 #include <raft/linalg/map_then_reduce.cuh>
20 #include <raft/linalg/norm.cuh>
21 #include <raft/linalg/unary_op.cuh>
23 #include <rmm/device_uvector.hpp>
46 void reset(T* data_,
int m_,
int n_)
55 inline static void gemm(
const raft::handle_t& handle,
69 ASSERT(A.
n == C.
m,
"GEMM invalid dims: m");
72 ASSERT(A.
m == C.
m,
"GEMM invalid dims: m");
76 ASSERT(B.
m == C.
n,
"GEMM invalid dims: n");
79 ASSERT(B.
n == C.
n,
"GEMM invalid dims: n");
81 ASSERT(kA == kB,
"GEMM invalid dims: k");
85 raft::linalg::detail::cublasgemm(handle.get_cublas_handle(),
86 transA ? CUBLAS_OP_T : CUBLAS_OP_N,
87 transB ? CUBLAS_OP_T : CUBLAS_OP_N,
104 gemm(handle, alpha, Acm, !transA, B, transB, beta, C, stream);
109 gemm(handle, alpha, A, transA, Bcm, !transB, beta, C, stream);
114 gemm(handle, alpha, B, !transB, A, !transA, beta, Ccm, stream);
119 inline void gemmb(
const raft::handle_t& handle,
126 cudaStream_t stream)
const override
147 B.
gemmb(handle, alpha, A, transA, transB, beta, *
this, stream);
153 ASSERT(
ord == x.
ord,
"SimpleDenseMat::ax: Storage orders must match");
155 auto scale = [a] __device__(
const T x) {
return a * x; };
156 raft::linalg::unaryOp(
data, x.
data,
len, scale, stream);
165 ASSERT(
ord == x.
ord,
"SimpleDenseMat::axpy: Storage orders must match");
166 ASSERT(
ord == y.
ord,
"SimpleDenseMat::axpy: Storage orders must match");
168 auto axpy = [a] __device__(
const T x,
const T y) {
return a * x + y; };
172 template <
typename Lambda>
175 ASSERT(
ord == other.
ord,
"SimpleDenseMat::assign_unary: Storage orders must match");
177 raft::linalg::unaryOp(
data, other.
data,
len, f, stream);
180 template <
typename Lambda>
186 ASSERT(
ord == other1.
ord,
"SimpleDenseMat::assign_binary: Storage orders must match");
187 ASSERT(
ord == other2.
ord,
"SimpleDenseMat::assign_binary: Storage orders must match");
189 raft::linalg::binaryOp(
data, other1.
data, other2.
data,
len, f, stream);
192 template <
typename Lambda>
199 ASSERT(
ord == other1.
ord,
"SimpleDenseMat::assign_ternary: Storage orders must match");
200 ASSERT(
ord == other2.
ord,
"SimpleDenseMat::assign_ternary: Storage orders must match");
201 ASSERT(
ord == other3.
ord,
"SimpleDenseMat::assign_ternary: Storage orders must match");
206 inline void fill(
const T val, cudaStream_t stream)
209 auto f = [val] __device__(
const T x) {
return val; };
215 ASSERT((
ord == other.
ord) && (this->m == other.
m) && (this->n == other.
n),
216 "SimpleDenseMat::copy: matrices not compatible");
219 cudaMemcpyAsync(
data, other.
data,
len *
sizeof(T), cudaMemcpyDeviceToDevice, stream));
222 void print(std::ostream& oss)
const override { oss << (*this) << std::endl; }
227 template <
typename T>
249 template <
typename T>
252 ASSERT(mat.
ord ==
COL_MAJOR,
"col_ref only available for column major mats");
253 T* tmp = &mat.
data[mat.
m * c];
254 mask_vec.
reset(tmp, mat.
m);
257 template <
typename T>
263 ASSERT(c_from >= 0 && c_from < mat.
n,
"col_slice: invalid from");
264 ASSERT(c_to >= 0 && c_to <= mat.
n,
"col_slice: invalid to");
266 ASSERT(mat.
ord ==
COL_MAJOR,
"col_ref only available for column major mats");
267 ASSERT(mask_mat.
ord ==
COL_MAJOR,
"col_ref only available for column major mask");
268 T* tmp = &mat.
data[mat.
m * c_from];
269 mask_mat.
reset(tmp, mat.
m, c_to - c_from);
276 template <
typename T>
279 auto f = [] __device__(
const T x,
const T y) {
return x * y; };
280 raft::linalg::mapThenSumReduce(tmp_dev, u.
len, f, stream, u.
data, v.
data);
282 raft::update_host(&tmp_host, tmp_dev, 1, stream);
288 template <
typename T>
291 return dot(u, u, tmp_dev, stream);
294 template <
typename T>
297 auto f = [] __device__(
const T x) {
return raft::abs<T>(x); };
298 auto r = [] __device__(
const T x,
const T y) {
return raft::max<T>(x, y); };
299 raft::linalg::mapThenReduce(tmp_dev, u.
len, T(0), f, r, stream, u.
data);
301 raft::update_host(&tmp_host, tmp_dev, 1, stream);
306 template <
typename T>
309 return raft::mySqrt<T>(
squaredNorm(u, tmp_dev, stream));
312 template <
typename T>
315 raft::linalg::rowNorm<raft::linalg::NormType::L1Norm, true>(
316 tmp_dev, u.
data, u.
len, 1, stream, raft::Nop<T>());
318 raft::update_host(&tmp_host, tmp_dev, 1, stream);
323 template <
typename T>
326 std::vector<T> out(v.
len);
327 raft::update_host(&out[0], v.
data, v.
len, 0);
330 for (; it < v.
len - 1;) {
331 os << out[it] <<
" ";
338 template <
typename T>
341 os <<
"ord=" << (mat.
ord ==
COL_MAJOR ?
"CM" :
"RM") <<
"\n";
342 std::vector<T> out(mat.
len);
343 raft::update_host(&out[0], mat.
data, mat.
len, rmm::cuda_stream_default);
346 for (
int r = 0; r < mat.
m; r++) {
348 for (
int c = 0; c < mat.
n - 1; c++) {
349 os << out[idx] <<
",";
352 os << out[idx] << std::endl;
355 for (
int c = 0; c < mat.
m; c++) {
357 for (
int r = 0; r < mat.
n - 1; r++) {
358 os << out[idx] <<
",";
361 os << out[idx] << std::endl;
368 template <
typename T>
384 template <
typename T>
Definition: dbscan.hpp:18
void col_slice(const SimpleDenseMat< T > &mat, SimpleDenseMat< T > &mask_mat, int c_from, int c_to)
Definition: dense.hpp:258
T nrm1(const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream)
Definition: dense.hpp:313
std::ostream & operator<<(std::ostream &os, const SimpleVec< T > &v)
Definition: dense.hpp:324
T nrmMax(const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream)
Definition: dense.hpp:295
T squaredNorm(const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream)
Definition: dense.hpp:289
T dot(const SimpleVec< T > &u, const SimpleVec< T > &v, T *tmp_dev, cudaStream_t stream)
Definition: dense.hpp:277
T nrm2(const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream)
Definition: dense.hpp:307
STORAGE_ORDER
Definition: dense.hpp:27
@ ROW_MAJOR
Definition: dense.hpp:27
@ COL_MAJOR
Definition: dense.hpp:27
void col_ref(const SimpleDenseMat< T > &mat, SimpleVec< T > &mask_vec, int c)
Definition: dense.hpp:250
void synchronize(cuda_stream stream)
Definition: cuda_stream.hpp:16
void fill(const T val, cudaStream_t stream)
Definition: dense.hpp:206
void assign_binary(const SimpleDenseMat< T > &other1, const SimpleDenseMat< T > &other2, Lambda &f, cudaStream_t stream)
Definition: dense.hpp:181
void assign_gemm(const raft::handle_t &handle, const T alpha, const SimpleDenseMat< T > &A, const bool transA, const SimpleMat< T > &B, const bool transB, const T beta, cudaStream_t stream)
Definition: dense.hpp:138
static void gemm(const raft::handle_t &handle, const T alpha, const SimpleDenseMat< T > &A, const bool transA, const SimpleDenseMat< T > &B, const bool transB, const T beta, SimpleDenseMat< T > &C, cudaStream_t stream)
Definition: dense.hpp:55
void ax(const T a, const SimpleDenseMat< T > &x, cudaStream_t stream)
Definition: dense.hpp:151
void gemmb(const raft::handle_t &handle, const T alpha, const SimpleDenseMat< T > &A, const bool transA, const bool transB, const T beta, SimpleDenseMat< T > &C, cudaStream_t stream) const override
Definition: dense.hpp:119
void assign_unary(const SimpleDenseMat< T > &other, Lambda f, cudaStream_t stream)
Definition: dense.hpp:173
SimpleDenseMat(T *data, int m, int n, STORAGE_ORDER order=COL_MAJOR)
Definition: dense.hpp:41
void axpy(const T a, const SimpleDenseMat< T > &x, const SimpleDenseMat< T > &y, cudaStream_t stream)
Definition: dense.hpp:160
SimpleDenseMat(STORAGE_ORDER order=COL_MAJOR)
Definition: dense.hpp:37
void assign_ternary(const SimpleDenseMat< T > &other1, const SimpleDenseMat< T > &other2, const SimpleDenseMat< T > &other3, Lambda &f, cudaStream_t stream)
Definition: dense.hpp:193
int len
Definition: dense.hpp:32
void operator=(const SimpleDenseMat< T > &other)=delete
void copy_async(const SimpleDenseMat< T > &other, cudaStream_t stream)
Definition: dense.hpp:213
T * data
Definition: dense.hpp:33
void print(std::ostream &oss) const override
Definition: dense.hpp:222
SimpleMat< T > Super
Definition: dense.hpp:31
void reset(T *data_, int m_, int n_)
Definition: dense.hpp:46
STORAGE_ORDER ord
Definition: dense.hpp:35
Definition: dense.hpp:385
int m
Definition: base.hpp:18
SimpleMatOwning(int m, int n, cudaStream_t stream, STORAGE_ORDER order=COL_MAJOR)
Definition: dense.hpp:395
Buffer buf
Definition: dense.hpp:388
int n
Definition: base.hpp:18
rmm::device_uvector< T > Buffer
Definition: dense.hpp:387
SimpleDenseMat< T > Super
Definition: dense.hpp:386
void operator=(const SimpleVec< T > &other)=delete
int m
Definition: base.hpp:18
int n
Definition: base.hpp:18
virtual void gemmb(const raft::handle_t &handle, const T alpha, const SimpleDenseMat< T > &A, const bool transA, const bool transB, const T beta, SimpleDenseMat< T > &C, cudaStream_t stream) const =0
Definition: dense.hpp:369
void operator=(const SimpleVec< T > &other)=delete
SimpleVecOwning(int n, cudaStream_t stream)
Definition: dense.hpp:376
SimpleVec< T > Super
Definition: dense.hpp:370
rmm::device_uvector< T > Buffer
Definition: dense.hpp:371
Buffer buf
Definition: dense.hpp:372
Definition: dense.hpp:228
SimpleDenseMat< T > Super
Definition: dense.hpp:229
void reset(T *new_data, int n)
Definition: dense.hpp:246
void assign_gemv(const raft::handle_t &handle, const T alpha, const SimpleDenseMat< T > &A, bool transA, const SimpleVec< T > &x, const T beta, cudaStream_t stream)
Definition: dense.hpp:233
SimpleVec(T *data, const int n)
Definition: dense.hpp:231
SimpleVec()
Definition: dense.hpp:244