cuML C++ API: include/cuml/manifold/tsne.h Source File

 /*

  * Copyright (c) 2019-2025, NVIDIA CORPORATION.

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */


 #pragma once


 #include <cuml/common/logger.hpp>


 #include <cuvs/distance/distance.hpp>


 namespace raft {

 class handle_t;

 }


 namespace ML {


 enum TSNE_ALGORITHM { EXACT, BARNES_HUT, FFT };


 enum TSNE_INIT { RANDOM, PCA };


 struct TSNEParams {

   // Number of output dimensions for embeddings Y.

   int dim = 2;


   // Number of nearest neighbors used.

   int n_neighbors = 1023;


   // Float between 0 and 1. Tradeoff for speed (0) vs accuracy (1).

   // (Barnes-Hut only.)

   float theta = 0.5f;


   // A tiny jitter to promote numerical stability. (Barnes-Hut only.)

   float epssq = 0.0025;


   // How many nearest neighbors are used during construction of Pij.

   float perplexity = 50.0f;


   // Number of iterations used to construct Pij.

   int perplexity_max_iter = 100;


   // The small tolerance used for Pij to ensure numerical stability.

   float perplexity_tol = 1e-5;


   // How much pressure to apply to clusters to spread out

   // during the exaggeration phase.

   float early_exaggeration = 12.0f;


   // How much pressure to apply to clusters to

   // spread out after the exaggeration phase. (FIT-SNE only)

   float late_exaggeration = 1.0f;


   // How many iterations you want the early pressure to run for.

   // If late exaggeration is used, it will be applied to all iterations

   // that remain after this number of iterations.

   int exaggeration_iter = 250;


   // Rounds up small gradient updates. (Barnes-Hut and Exact only.)

   float min_gain = 0.01f;


   // The learning rate during exaggeration phase.

   float pre_learning_rate = 200.0f;


   // The learning rate after exaggeration phase.

   float post_learning_rate = 500.0f;


   // The maximum number of iterations TSNE should run for.

   int max_iter = 1000;


   // The smallest gradient norm TSNE should terminate on.

   // (Exact only; ignored for others.)

   float min_grad_norm = 1e-7;


   // The momentum used during the exaggeration phase.

   float pre_momentum = 0.5;


   // The momentum used after the exaggeration phase.

   float post_momentum = 0.8;


   // Set this to -1 for pure random initializations or >= 0 for

   // reproducible outputs. This sets random seed correctly, but there

   // may still be some variance due to the parallel nature of this algorithm.

   long long random_state = -1;


   // verbosity level for logging messages during execution

   rapids_logger::level_enum verbosity = rapids_logger::level_enum::info;


   // Embedding initializer algorithm

   TSNE_INIT init = TSNE_INIT::RANDOM;


   // When this is set to true, the distances from the knn graph will

   // always be squared before computing conditional probabilities, even if

   // the knn graph is passed in explicitly. This is to better match the

   // behavior of Scikit-learn's T-SNE.

   bool square_distances = true;


   // Distance metric to use.

   cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2SqrtExpanded;


   // Value of p for Minkowski distance

   float p = 2.0;


   // Which implementation algorithm to use.

   TSNE_ALGORITHM algorithm = TSNE_ALGORITHM::FFT;

 };


 void TSNE_fit(const raft::handle_t& handle,

               float* X,

               float* Y,

               int n,

               int p,

               int64_t* knn_indices,

               float* knn_dists,

               TSNEParams& params,

               float* kl_div = nullptr);


 void TSNE_fit_sparse(const raft::handle_t& handle,

                      int* indptr,

                      int* indices,

                      float* data,

                      float* Y,

                      int nnz,

                      int n,

                      int p,

                      int* knn_indices,

                      float* knn_dists,

                      TSNEParams& params,

                      float* kl_div = nullptr);


 }  // namespace ML

ML::params
Definition: params.hpp:34

logger.hpp

ML
Definition: dbscan.hpp:30

ML::TSNE_fit
void TSNE_fit(const raft::handle_t &handle, float *X, float *Y, int n, int p, int64_t *knn_indices, float *knn_dists, TSNEParams ¶ms, float *kl_div=nullptr)
Dimensionality reduction via TSNE using Barnes-Hut, Fourier Interpolation, or naive methods....

ML::TSNE_fit_sparse
void TSNE_fit_sparse(const raft::handle_t &handle, int *indptr, int *indices, float *data, float *Y, int nnz, int n, int p, int *knn_indices, float *knn_dists, TSNEParams ¶ms, float *kl_div=nullptr)
Dimensionality reduction via TSNE using either Barnes Hut O(NlogN) or brute force O(N^2).

ML::TSNE_ALGORITHM
TSNE_ALGORITHM
Definition: tsne.h:29

ML::BARNES_HUT
@ BARNES_HUT
Definition: tsne.h:29

ML::FFT
@ FFT
Definition: tsne.h:29

ML::EXACT
@ EXACT
Definition: tsne.h:29

ML::TSNE_INIT
TSNE_INIT
Definition: tsne.h:31

ML::RANDOM
@ RANDOM
Definition: tsne.h:31

ML::PCA
@ PCA
Definition: tsne.h:31

raft
Definition: dbscan.hpp:26

ML::TSNEParams
Definition: tsne.h:33

ML::TSNEParams::perplexity
float perplexity
Definition: tsne.h:48

ML::TSNEParams::pre_learning_rate
float pre_learning_rate
Definition: tsne.h:73

ML::TSNEParams::perplexity_max_iter
int perplexity_max_iter
Definition: tsne.h:51

ML::TSNEParams::square_distances
bool square_distances
Definition: tsne.h:106

ML::TSNEParams::exaggeration_iter
int exaggeration_iter
Definition: tsne.h:67

ML::TSNEParams::min_grad_norm
float min_grad_norm
Definition: tsne.h:83

ML::TSNEParams::metric
cuvs::distance::DistanceType metric
Definition: tsne.h:109

ML::TSNEParams::theta
float theta
Definition: tsne.h:42

ML::TSNEParams::late_exaggeration
float late_exaggeration
Definition: tsne.h:62

ML::TSNEParams::algorithm
TSNE_ALGORITHM algorithm
Definition: tsne.h:115

ML::TSNEParams::random_state
long long random_state
Definition: tsne.h:94

ML::TSNEParams::pre_momentum
float pre_momentum
Definition: tsne.h:86

ML::TSNEParams::n_neighbors
int n_neighbors
Definition: tsne.h:38

ML::TSNEParams::early_exaggeration
float early_exaggeration
Definition: tsne.h:58

ML::TSNEParams::post_momentum
float post_momentum
Definition: tsne.h:89

ML::TSNEParams::dim
int dim
Definition: tsne.h:35

ML::TSNEParams::min_gain
float min_gain
Definition: tsne.h:70

ML::TSNEParams::post_learning_rate
float post_learning_rate
Definition: tsne.h:76

ML::TSNEParams::epssq
float epssq
Definition: tsne.h:45

ML::TSNEParams::init
TSNE_INIT init
Definition: tsne.h:100

ML::TSNEParams::perplexity_tol
float perplexity_tol
Definition: tsne.h:54

ML::TSNEParams::max_iter
int max_iter
Definition: tsne.h:79

ML::TSNEParams::p
float p
Definition: tsne.h:112

ML::TSNEParams::verbosity
rapids_logger::level_enum verbosity
Definition: tsne.h:97