Source code for suod.models.jl_projection

# -*- coding: utf-8 -*-
"""Johnson–Lindenstrauss process. Part of the code is adapted from
https://github.com/PTAug/jlt-python
"""
# Author: Yue Zhao <zhaoy@cmu.edu>
# License: MIT

import numpy as np
from sklearn.utils import check_array
import math


[docs] def jl_fit_transform(X, objective_dim, method="basic"): """Fit and transform the input data by Johnson–Lindenstrauss process. See :cite:`johnson1984extensions` for details. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. objective_dim : int The expected output dimension. method : string, optional (default = 'basic') The JL projection method: - "basic": each component of the transformation matrix is taken at random in N(0,1). - "discrete", each component of the transformation matrix is taken at random in {-1,1}. - "circulant": the first row of the transformation matrix is taken at random in N(0,1), and each row is obtained from the previous one by a one-left shift. - "toeplitz": the first row and column of the transformation matrix is taken at random in N(0,1), and each diagonal has a constant value taken from these first vector. Returns ------- X_transformed : numpy array of shape (n_samples, objective_dim) The dataset after the JL projection. jl_transformer : object Transformer instance. """ if method.lower() == "basic": jl_transformer = (1 / math.sqrt(objective_dim)) \ * np.random.normal(0, 1, size=(objective_dim, len(X[0]))) elif method.lower() == "discrete": jl_transformer = (1 / math.sqrt(objective_dim)) \ * np.random.choice([-1, 1], size=(objective_dim, len(X[0]))) elif method.lower() == "circulant": from scipy.linalg import circulant first_row = np.random.normal(0, 1, size=(1, len(X[0]))) jl_transformer = ((1 / math.sqrt(objective_dim)) * circulant( first_row))[:objective_dim] elif method.lower() == "toeplitz": from scipy.linalg import toeplitz first_row = np.random.normal(0, 1, size=(1, len(X[0]))) first_column = np.random.normal(0, 1, size=(1, objective_dim)) jl_transformer = ( (1 / math.sqrt(objective_dim)) * toeplitz(first_column, first_row)) else: NotImplementedError('Wrong transformation type') jl_transformer = jl_transformer.T return np.dot(X, jl_transformer), jl_transformer
[docs] def jl_transform(X, jl_transformer): """Use the fitted transformer to conduct JL projection. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. jl_transformer : object Fitted transformer instance. Returns ------- X_transformed : numpy array of shape (n_samples, reduced_dimensions) Transformed matrix. """ X = check_array(X) jl_transformer = check_array(jl_transformer) # no need for transformation if np.array_equal(jl_transformer, np.ones([X.shape[1], X.shape[1]])): return X if X.shape[1] != jl_transformer.shape[0]: ValueError("X and jl_transformer have different dimensions.") return np.dot(X, jl_transformer)