cuML C++ API: include/cuml/manifold/tsne.h Source File

 /*

  * SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.

  * SPDX-License-Identifier: Apache-2.0

  */


 #pragma once


 #include <cuml/common/distance_type.hpp>

 #include <cuml/common/logger.hpp>


 namespace raft {

 class handle_t;

 }


 namespace ML {


 enum TSNE_ALGORITHM { EXACT, BARNES_HUT, FFT };


 enum TSNE_INIT { RANDOM, PCA };


 struct TSNEParams {

   // Number of output dimensions for embeddings Y.

   int dim = 2;


   // Number of nearest neighbors used.

   int n_neighbors = 1023;


   // Float between 0 and 1. Tradeoff for speed (0) vs accuracy (1).

   // (Barnes-Hut only.)

   float theta = 0.5f;


   // A tiny jitter to promote numerical stability. (Barnes-Hut only.)

   float epssq = 0.0025;


   // How many nearest neighbors are used during construction of Pij.

   float perplexity = 50.0f;


   // Number of iterations used to construct Pij.

   int perplexity_max_iter = 100;


   // The small tolerance used for Pij to ensure numerical stability.

   float perplexity_tol = 1e-5;


   // How much pressure to apply to clusters to spread out

   // during the exaggeration phase.

   float early_exaggeration = 12.0f;


   // How much pressure to apply to clusters to

   // spread out after the exaggeration phase. (FIT-SNE only)

   float late_exaggeration = 1.0f;


   // How many iterations you want the early pressure to run for.

   // If late exaggeration is used, it will be applied to all iterations

   // that remain after this number of iterations.

   int exaggeration_iter = 250;


   // Rounds up small gradient updates. (Barnes-Hut and Exact only.)

   float min_gain = 0.01f;


   // The learning rate during exaggeration phase.

   float pre_learning_rate = 200.0f;


   // The learning rate after exaggeration phase.

   float post_learning_rate = 500.0f;


   // The maximum number of iterations TSNE should run for.

   int max_iter = 1000;


   // The smallest gradient norm TSNE should terminate on.

   // (Exact only; ignored for others.)

   float min_grad_norm = 1e-7;


   // The momentum used during the exaggeration phase.

   float pre_momentum = 0.5;


   // The momentum used after the exaggeration phase.

   float post_momentum = 0.8;


   // Set this to -1 for pure random initializations or >= 0 for

   // reproducible outputs. This sets random seed correctly, but there

   // may still be some variance due to the parallel nature of this algorithm.

   long long random_state = -1;


   // verbosity level for logging messages during execution

   rapids_logger::level_enum verbosity = rapids_logger::level_enum::info;


   // Embedding initializer algorithm

   TSNE_INIT init = TSNE_INIT::RANDOM;


   // When this is set to true, the distances from the knn graph will

   // always be squared before computing conditional probabilities, even if

   // the knn graph is passed in explicitly. This is to better match the

   // behavior of Scikit-learn's T-SNE.

   bool square_distances = true;


   // Distance metric to use.

   ML::distance::DistanceType metric = ML::distance::DistanceType::L2SqrtExpanded;


   // Value of p for Minkowski distance

   float p = 2.0;


   // Which implementation algorithm to use.

   TSNE_ALGORITHM algorithm = TSNE_ALGORITHM::FFT;

 };


 void TSNE_fit(const raft::handle_t& handle,

               float* X,

               float* Y,

               int n,

               int p,

               int64_t* knn_indices,

               float* knn_dists,

               TSNEParams& params,

               float* kl_div = nullptr,

               int* n_iter   = nullptr);


 void TSNE_fit_sparse(const raft::handle_t& handle,

                      int* indptr,

                      int* indices,

                      float* data,

                      float* Y,

                      int nnz,

                      int n,

                      int p,

                      int* knn_indices,

                      float* knn_dists,

                      TSNEParams& params,

                      float* kl_div = nullptr,

                      int* n_iter   = nullptr);


 }  // namespace ML

ML::params
Definition: params.hpp:23

distance_type.hpp

logger.hpp

ML::distance::DistanceType
DistanceType
Definition: distance_type.hpp:10

ML::distance::DistanceType::L2SqrtExpanded
@ L2SqrtExpanded

ML
Definition: dbscan.hpp:18

ML::TSNE_fit
void TSNE_fit(const raft::handle_t &handle, float *X, float *Y, int n, int p, int64_t *knn_indices, float *knn_dists, TSNEParams ¶ms, float *kl_div=nullptr, int *n_iter=nullptr)
Dimensionality reduction via TSNE using Barnes-Hut, Fourier Interpolation, or naive methods....

ML::TSNE_ALGORITHM
TSNE_ALGORITHM
Definition: tsne.h:17

ML::BARNES_HUT
@ BARNES_HUT
Definition: tsne.h:17

ML::FFT
@ FFT
Definition: tsne.h:17

ML::EXACT
@ EXACT
Definition: tsne.h:17

ML::TSNE_INIT
TSNE_INIT
Definition: tsne.h:19

ML::RANDOM
@ RANDOM
Definition: tsne.h:19

ML::PCA
@ PCA
Definition: tsne.h:19

ML::TSNE_fit_sparse
void TSNE_fit_sparse(const raft::handle_t &handle, int *indptr, int *indices, float *data, float *Y, int nnz, int n, int p, int *knn_indices, float *knn_dists, TSNEParams ¶ms, float *kl_div=nullptr, int *n_iter=nullptr)
Dimensionality reduction via TSNE using either Barnes Hut O(NlogN) or brute force O(N^2).

raft
Definition: dbscan.hpp:14

ML::TSNEParams
Definition: tsne.h:21

ML::TSNEParams::perplexity
float perplexity
Definition: tsne.h:36

ML::TSNEParams::pre_learning_rate
float pre_learning_rate
Definition: tsne.h:61

ML::TSNEParams::metric
ML::distance::DistanceType metric
Definition: tsne.h:97

ML::TSNEParams::perplexity_max_iter
int perplexity_max_iter
Definition: tsne.h:39

ML::TSNEParams::square_distances
bool square_distances
Definition: tsne.h:94

ML::TSNEParams::exaggeration_iter
int exaggeration_iter
Definition: tsne.h:55

ML::TSNEParams::min_grad_norm
float min_grad_norm
Definition: tsne.h:71

ML::TSNEParams::theta
float theta
Definition: tsne.h:30

ML::TSNEParams::late_exaggeration
float late_exaggeration
Definition: tsne.h:50

ML::TSNEParams::algorithm
TSNE_ALGORITHM algorithm
Definition: tsne.h:103

ML::TSNEParams::random_state
long long random_state
Definition: tsne.h:82

ML::TSNEParams::pre_momentum
float pre_momentum
Definition: tsne.h:74

ML::TSNEParams::n_neighbors
int n_neighbors
Definition: tsne.h:26

ML::TSNEParams::early_exaggeration
float early_exaggeration
Definition: tsne.h:46

ML::TSNEParams::post_momentum
float post_momentum
Definition: tsne.h:77

ML::TSNEParams::dim
int dim
Definition: tsne.h:23

ML::TSNEParams::min_gain
float min_gain
Definition: tsne.h:58

ML::TSNEParams::post_learning_rate
float post_learning_rate
Definition: tsne.h:64

ML::TSNEParams::epssq
float epssq
Definition: tsne.h:33

ML::TSNEParams::init
TSNE_INIT init
Definition: tsne.h:88

ML::TSNEParams::perplexity_tol
float perplexity_tol
Definition: tsne.h:42

ML::TSNEParams::max_iter
int max_iter
Definition: tsne.h:67

ML::TSNEParams::p
float p
Definition: tsne.h:100

ML::TSNEParams::verbosity
rapids_logger::level_enum verbosity
Definition: tsne.h:85