Source code for divik.feature_selection._exims._sklearn

from sklearn.base import BaseEstimator

from divik.core import configurable
from divik.feature_selection._stat_selector_mixin import SelectorMixin

from ._exims import exims
from ._selection import select_features

[docs]@configurable class EximsSelector(BaseEstimator, SelectorMixin): """Select features based on their spatial distribution Preserves features that yield biologically plausible structures. References ---------- Wijetunge, Chalini D., et al. "EXIMS: an improved data analysis pipeline based on a new peak picking method for EXploring Imaging Mass Spectrometry data." Bioinformatics 31.19 (2015): 3198-3206. """ def __init__(self): super(EximsSelector, self).__init__() def _get_support_mask(self): """ Get the boolean mask indicating which features are selected Returns ------- support : boolean array of shape [# input features] An element is True iff its corresponding feature is selected for retention. """ return self.selected_
[docs] def fit(self, X, y=None, xy=None): """Learn data-driven feature thresholds from X. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Sample vectors from which to compute feature characteristic. y : any Ignored. This parameter exists only for compatibility with sklearn.pipeline.Pipeline. xy : array-like, shape (n_samples, 2) Spatial coordinates of the samples. Expects integers, indices over am image. Returns ------- self """ if xy is None: raise ValueError("xy coordinates are required") self.structness_ = exims(X, *xy.T) features_selection = select_features(self.structness_) self.threshold_ = features_selection.threshold self.selected_ = features_selection.selection return self