20 #include <raft/core/handle.hpp>
21 #include <raft/linalg/add.cuh>
22 #include <raft/linalg/ternary_op.cuh>
23 #include <raft/util/cuda_utils.cuh>
24 #include <raft/util/cudart_utils.hpp>
29 #include <raft/linalg/detail/cublas_wrappers.hpp>
30 #include <raft/linalg/map_then_reduce.cuh>
31 #include <raft/linalg/norm.cuh>
32 #include <raft/linalg/unary_op.cuh>
34 #include <rmm/device_uvector.hpp>
57 void reset(T* data_,
int m_,
int n_)
66 inline static void gemm(
const raft::handle_t& handle,
80 ASSERT(A.
n == C.
m,
"GEMM invalid dims: m");
83 ASSERT(A.
m == C.
m,
"GEMM invalid dims: m");
87 ASSERT(B.
m == C.
n,
"GEMM invalid dims: n");
90 ASSERT(B.
n == C.
n,
"GEMM invalid dims: n");
92 ASSERT(kA == kB,
"GEMM invalid dims: k");
96 raft::linalg::detail::cublasgemm(handle.get_cublas_handle(),
97 transA ? CUBLAS_OP_T : CUBLAS_OP_N,
98 transB ? CUBLAS_OP_T : CUBLAS_OP_N,
115 gemm(handle, alpha, Acm, !transA, B, transB, beta, C, stream);
120 gemm(handle, alpha, A, transA, Bcm, !transB, beta, C, stream);
125 gemm(handle, alpha, B, !transB, A, !transA, beta, Ccm, stream);
130 inline void gemmb(
const raft::handle_t& handle,
137 cudaStream_t stream)
const override
158 B.
gemmb(handle, alpha, A, transA, transB, beta, *
this, stream);
164 ASSERT(
ord == x.
ord,
"SimpleDenseMat::ax: Storage orders must match");
166 auto scale = [a] __device__(
const T x) {
return a * x; };
167 raft::linalg::unaryOp(
data, x.
data,
len, scale, stream);
176 ASSERT(
ord == x.
ord,
"SimpleDenseMat::axpy: Storage orders must match");
177 ASSERT(
ord == y.
ord,
"SimpleDenseMat::axpy: Storage orders must match");
179 auto axpy = [a] __device__(
const T x,
const T y) {
return a * x + y; };
183 template <
typename Lambda>
186 ASSERT(
ord == other.
ord,
"SimpleDenseMat::assign_unary: Storage orders must match");
188 raft::linalg::unaryOp(
data, other.
data,
len, f, stream);
191 template <
typename Lambda>
197 ASSERT(
ord == other1.
ord,
"SimpleDenseMat::assign_binary: Storage orders must match");
198 ASSERT(
ord == other2.
ord,
"SimpleDenseMat::assign_binary: Storage orders must match");
200 raft::linalg::binaryOp(
data, other1.
data, other2.
data,
len, f, stream);
203 template <
typename Lambda>
210 ASSERT(
ord == other1.
ord,
"SimpleDenseMat::assign_ternary: Storage orders must match");
211 ASSERT(
ord == other2.
ord,
"SimpleDenseMat::assign_ternary: Storage orders must match");
212 ASSERT(
ord == other3.
ord,
"SimpleDenseMat::assign_ternary: Storage orders must match");
217 inline void fill(
const T val, cudaStream_t stream)
220 auto f = [val] __device__(
const T x) {
return val; };
226 ASSERT((
ord == other.
ord) && (this->m == other.
m) && (this->n == other.
n),
227 "SimpleDenseMat::copy: matrices not compatible");
230 cudaMemcpyAsync(
data, other.
data,
len *
sizeof(T), cudaMemcpyDeviceToDevice, stream));
233 void print(std::ostream& oss)
const override { oss << (*this) << std::endl; }
238 template <
typename T>
260 template <
typename T>
263 ASSERT(mat.
ord ==
COL_MAJOR,
"col_ref only available for column major mats");
264 T* tmp = &mat.
data[mat.
m * c];
265 mask_vec.
reset(tmp, mat.
m);
268 template <
typename T>
274 ASSERT(c_from >= 0 && c_from < mat.
n,
"col_slice: invalid from");
275 ASSERT(c_to >= 0 && c_to <= mat.
n,
"col_slice: invalid to");
277 ASSERT(mat.
ord ==
COL_MAJOR,
"col_ref only available for column major mats");
278 ASSERT(mask_mat.
ord ==
COL_MAJOR,
"col_ref only available for column major mask");
279 T* tmp = &mat.
data[mat.
m * c_from];
280 mask_mat.
reset(tmp, mat.
m, c_to - c_from);
287 template <
typename T>
290 auto f = [] __device__(
const T x,
const T y) {
return x * y; };
291 raft::linalg::mapThenSumReduce(tmp_dev, u.
len, f, stream, u.
data, v.
data);
293 raft::update_host(&tmp_host, tmp_dev, 1, stream);
299 template <
typename T>
302 return dot(u, u, tmp_dev, stream);
305 template <
typename T>
308 auto f = [] __device__(
const T x) {
return raft::abs<T>(x); };
309 auto r = [] __device__(
const T x,
const T y) {
return raft::max<T>(x, y); };
310 raft::linalg::mapThenReduce(tmp_dev, u.
len, T(0), f, r, stream, u.
data);
312 raft::update_host(&tmp_host, tmp_dev, 1, stream);
317 template <
typename T>
320 return raft::mySqrt<T>(
squaredNorm(u, tmp_dev, stream));
323 template <
typename T>
326 raft::linalg::rowNorm(
327 tmp_dev, u.
data, u.
len, 1, raft::linalg::L1Norm,
true, stream, raft::Nop<T>());
329 raft::update_host(&tmp_host, tmp_dev, 1, stream);
334 template <
typename T>
337 std::vector<T> out(v.
len);
338 raft::update_host(&out[0], v.
data, v.
len, 0);
341 for (; it < v.
len - 1;) {
342 os << out[it] <<
" ";
349 template <
typename T>
352 os <<
"ord=" << (mat.
ord ==
COL_MAJOR ?
"CM" :
"RM") <<
"\n";
353 std::vector<T> out(mat.
len);
354 raft::update_host(&out[0], mat.
data, mat.
len, rmm::cuda_stream_default);
357 for (
int r = 0; r < mat.
m; r++) {
359 for (
int c = 0; c < mat.
n - 1; c++) {
360 os << out[idx] <<
",";
363 os << out[idx] << std::endl;
366 for (
int c = 0; c < mat.
m; c++) {
368 for (
int r = 0; r < mat.
n - 1; r++) {
369 os << out[idx] <<
",";
372 os << out[idx] << std::endl;
379 template <
typename T>
395 template <
typename T>
Definition: dbscan.hpp:30
void col_slice(const SimpleDenseMat< T > &mat, SimpleDenseMat< T > &mask_mat, int c_from, int c_to)
Definition: dense.hpp:269
T nrm1(const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream)
Definition: dense.hpp:324
std::ostream & operator<<(std::ostream &os, const SimpleVec< T > &v)
Definition: dense.hpp:335
T nrmMax(const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream)
Definition: dense.hpp:306
T squaredNorm(const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream)
Definition: dense.hpp:300
T dot(const SimpleVec< T > &u, const SimpleVec< T > &v, T *tmp_dev, cudaStream_t stream)
Definition: dense.hpp:288
T nrm2(const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream)
Definition: dense.hpp:318
STORAGE_ORDER
Definition: dense.hpp:38
@ ROW_MAJOR
Definition: dense.hpp:38
@ COL_MAJOR
Definition: dense.hpp:38
void col_ref(const SimpleDenseMat< T > &mat, SimpleVec< T > &mask_vec, int c)
Definition: dense.hpp:261
void synchronize(cuda_stream stream)
Definition: cuda_stream.hpp:27
void fill(const T val, cudaStream_t stream)
Definition: dense.hpp:217
void assign_binary(const SimpleDenseMat< T > &other1, const SimpleDenseMat< T > &other2, Lambda &f, cudaStream_t stream)
Definition: dense.hpp:192
void assign_gemm(const raft::handle_t &handle, const T alpha, const SimpleDenseMat< T > &A, const bool transA, const SimpleMat< T > &B, const bool transB, const T beta, cudaStream_t stream)
Definition: dense.hpp:149
static void gemm(const raft::handle_t &handle, const T alpha, const SimpleDenseMat< T > &A, const bool transA, const SimpleDenseMat< T > &B, const bool transB, const T beta, SimpleDenseMat< T > &C, cudaStream_t stream)
Definition: dense.hpp:66
void ax(const T a, const SimpleDenseMat< T > &x, cudaStream_t stream)
Definition: dense.hpp:162
void gemmb(const raft::handle_t &handle, const T alpha, const SimpleDenseMat< T > &A, const bool transA, const bool transB, const T beta, SimpleDenseMat< T > &C, cudaStream_t stream) const override
Definition: dense.hpp:130
void assign_unary(const SimpleDenseMat< T > &other, Lambda f, cudaStream_t stream)
Definition: dense.hpp:184
SimpleDenseMat(T *data, int m, int n, STORAGE_ORDER order=COL_MAJOR)
Definition: dense.hpp:52
void axpy(const T a, const SimpleDenseMat< T > &x, const SimpleDenseMat< T > &y, cudaStream_t stream)
Definition: dense.hpp:171
SimpleDenseMat(STORAGE_ORDER order=COL_MAJOR)
Definition: dense.hpp:48
void assign_ternary(const SimpleDenseMat< T > &other1, const SimpleDenseMat< T > &other2, const SimpleDenseMat< T > &other3, Lambda &f, cudaStream_t stream)
Definition: dense.hpp:204
int len
Definition: dense.hpp:43
void operator=(const SimpleDenseMat< T > &other)=delete
void copy_async(const SimpleDenseMat< T > &other, cudaStream_t stream)
Definition: dense.hpp:224
T * data
Definition: dense.hpp:44
void print(std::ostream &oss) const override
Definition: dense.hpp:233
SimpleMat< T > Super
Definition: dense.hpp:42
void reset(T *data_, int m_, int n_)
Definition: dense.hpp:57
STORAGE_ORDER ord
Definition: dense.hpp:46
Definition: dense.hpp:396
int m
Definition: base.hpp:29
SimpleMatOwning(int m, int n, cudaStream_t stream, STORAGE_ORDER order=COL_MAJOR)
Definition: dense.hpp:406
Buffer buf
Definition: dense.hpp:399
int n
Definition: base.hpp:29
rmm::device_uvector< T > Buffer
Definition: dense.hpp:398
SimpleDenseMat< T > Super
Definition: dense.hpp:397
void operator=(const SimpleVec< T > &other)=delete
int m
Definition: base.hpp:29
int n
Definition: base.hpp:29
virtual void gemmb(const raft::handle_t &handle, const T alpha, const SimpleDenseMat< T > &A, const bool transA, const bool transB, const T beta, SimpleDenseMat< T > &C, cudaStream_t stream) const =0
Definition: dense.hpp:380
void operator=(const SimpleVec< T > &other)=delete
SimpleVecOwning(int n, cudaStream_t stream)
Definition: dense.hpp:387
SimpleVec< T > Super
Definition: dense.hpp:381
rmm::device_uvector< T > Buffer
Definition: dense.hpp:382
Buffer buf
Definition: dense.hpp:383
Definition: dense.hpp:239
SimpleDenseMat< T > Super
Definition: dense.hpp:240
void reset(T *new_data, int n)
Definition: dense.hpp:257
void assign_gemv(const raft::handle_t &handle, const T alpha, const SimpleDenseMat< T > &A, bool transA, const SimpleVec< T > &x, const T beta, cudaStream_t stream)
Definition: dense.hpp:244
SimpleVec(T *data, const int n)
Definition: dense.hpp:242
SimpleVec()
Definition: dense.hpp:255