Source code for divik.feature_selection._stat_selector_mixin

from abc import ABCMeta

import numpy as np
from sklearn.base import BaseEstimator
from sklearn.feature_selection.base import SelectorMixin


[docs]class StatSelectorMixin(SelectorMixin, metaclass=ABCMeta): """ Transformer mixin that performs feature selection given a support mask This mixin provides a feature selector implementation with `transform` and `inverse_transform` functionality given that `selected_` is specified during `fit`. Additionally, provides a `_to_characteristics` and `_to_raw` implementations given `stat`, optionally `use_log` and `preserve_high`. """ def _to_characteristics(self, X): """Extract & normalize characteristics from data""" if self.stat == 'mean': vals = np.mean(X, axis=0) elif self.stat == 'var': vals = np.var(X, axis=0) else: raise ValueError('stat must be one of {"mean", "var"}') if hasattr(self, 'use_log') and self.use_log: if np.any(vals < 0): raise ValueError("Feature characteristic cannot be negative " "with log filtering") vals = np.log(vals) if hasattr(self, 'preserve_high') and not self.preserve_high: vals = -vals return vals def _to_raw(self, threshold): """Convert threshold to the feature characteristic space""" if hasattr(self, 'preserve_high') and not self.preserve_high: threshold = -threshold if hasattr(self, 'use_log') and self.use_log: threshold = np.exp(threshold) return threshold def _get_support_mask(self): """ Get the boolean mask indicating which features are selected Returns ------- support : boolean array of shape [# input features] An element is True iff its corresponding feature is selected for retention. """ return self.selected_
[docs]class NoSelector(BaseEstimator, StatSelectorMixin): """Dummy selector to use when no selection is supposed to be made."""
[docs] def fit(self, X, y=None): """Pass data forward Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Sample vectors to pass. y : any Ignored. This parameter exists only for compatibility with sklearn.pipeline.Pipeline. Returns ------- self """ self.selected_ = np.ones((X.shape[1],), dtype=bool) return self