PCA#

Principal Component Analysis (PCA) is a linear dimensionality reduction technique that projects data onto orthogonal directions of maximum variance.

#include <cuvs/preprocessing/pca.hpp>

namespace cuvs::preprocessing::pca

Params#

struct params#

Parameters for PCA decomposition. Ref: http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html.

Public Members

int n_components = 1#

Number of components to keep.

bool copy = true#

If false, data passed to fit are overwritten and running fit(X).transform(X) will not yield the expected results, use fit_transform(X) instead.

bool whiten = false#

When true (false by default) the components vectors are multiplied by the square root of n_samples and then divided by the singular values to ensure uncorrelated outputs with unit component-wise variances.

solver algorithm = solver::COV_EIG_DQ#

The solver algorithm to use.

float tol = 0.0f#

Tolerance for singular values computed by svd_solver == ‘arpack’ or the Jacobi solver.

int n_iterations = 15#

Number of iterations for the power method computed by the Jacobi solver.

PCA#

void fit(
raft::resources const &handle,
const params &config,
raft::device_matrix_view<float, int64_t, raft::col_major> input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> explained_var,
raft::device_vector_view<float, int64_t> explained_var_ratio,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_scalar_view<float, int64_t> noise_vars,
bool flip_signs_based_on_U = false
)#

Perform PCA fit operation.

Computes the principal components, explained variances, singular values, and column means from the input data.

#include <raft/core/resources.hpp>
#include <cuvs/preprocessing/pca.hpp>

raft::resources handle;

cuvs::preprocessing::pca::params params;
params.n_components = 2;

auto input = raft::make_device_matrix<float, int>(handle, n_rows, n_cols);
// ... fill input ...

auto components       = raft::make_device_matrix<float, int, raft::col_major>(
    handle, params.n_components, n_cols);
auto explained_var    = raft::make_device_vector<float, int>(handle, params.n_components);
auto explained_var_ratio = raft::make_device_vector<float, int>(handle, params.n_components);
auto singular_vals    = raft::make_device_vector<float, int>(handle, params.n_components);
auto mu               = raft::make_device_vector<float, int>(handle, n_cols);
auto noise_vars       = raft::make_device_scalar<float>(handle);

cuvs::preprocessing::pca::fit(handle, params,
    input.view(), components.view(), explained_var.view(),
    explained_var_ratio.view(), singular_vals.view(), mu.view(), noise_vars.view());
Parameters:
  • handle[in] raft resource handle

  • config[in] PCA parameters

  • input[inout] input data [n_rows x n_cols] (col-major). Modified temporarily.

  • components[out] principal components [n_components x n_cols] (col-major)

  • explained_var[out] explained variances [n_components]

  • explained_var_ratio[out] explained variance ratios [n_components]

  • singular_vals[out] singular values [n_components]

  • mu[out] column means [n_cols]

  • noise_vars[out] noise variance (scalar)

  • flip_signs_based_on_U[in] whether to determine signs by U (true) or V.T (false)

void fit_transform(
raft::resources const &handle,
const params &config,
raft::device_matrix_view<float, int64_t, raft::col_major> input,
raft::device_matrix_view<float, int64_t, raft::col_major> trans_input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> explained_var,
raft::device_vector_view<float, int64_t> explained_var_ratio,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_scalar_view<float, int64_t> noise_vars,
bool flip_signs_based_on_U = false
)#

Perform PCA fit and transform operations.

Computes the principal components and transforms the input data into the eigenspace in a single operation.

Parameters:
  • handle[in] raft resource handle

  • config[in] PCA parameters

  • input[inout] input data [n_rows x n_cols] (col-major). Modified temporarily.

  • trans_input[out] transformed data [n_rows x n_components] (col-major)

  • components[out] principal components [n_components x n_cols] (col-major)

  • explained_var[out] explained variances [n_components]

  • explained_var_ratio[out] explained variance ratios [n_components]

  • singular_vals[out] singular values [n_components]

  • mu[out] column means [n_cols]

  • noise_vars[out] noise variance (scalar)

  • flip_signs_based_on_U[in] whether to determine signs by U (true) or V.T (false)

void transform(
raft::resources const &handle,
const params &config,
raft::device_matrix_view<float, int64_t, raft::col_major> input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_matrix_view<float, int64_t, raft::col_major> trans_input
)#

Perform PCA transform operation.

Transforms the input data into the eigenspace using previously computed principal components.

Parameters:
  • handle[in] raft resource handle

  • config[in] PCA parameters

  • input[inout] data to transform [n_rows x n_cols] (col-major). Modified temporarily (mean-centered then restored).

  • components[in] principal components [n_components x n_cols] (col-major)

  • singular_vals[in] singular values [n_components]

  • mu[in] column means [n_cols]

  • trans_input[out] transformed data [n_rows x n_components] (col-major)

void inverse_transform(
raft::resources const &handle,
const params &config,
raft::device_matrix_view<float, int64_t, raft::col_major> trans_input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_matrix_view<float, int64_t, raft::col_major> output
)#

Perform PCA inverse transform operation.

Transforms data from the eigenspace back to the original space.

Parameters:
  • handle[in] raft resource handle

  • config[in] PCA parameters

  • trans_input[in] transformed data [n_rows x n_components] (col-major)

  • components[in] principal components [n_components x n_cols] (col-major)

  • singular_vals[in] singular values [n_components]

  • mu[in] column means [n_cols]

  • output[out] reconstructed data [n_rows x n_cols] (col-major)