# -*- coding: utf-8 -*-
"""Generic feature selection mixin"""
# Authors: G. Varoquaux, A. Gramfort, L. Buitinck, J. Nothman
# License: BSD 3 clause
from abc import ABCMeta, abstractmethod
from warnings import warn
from operator import attrgetter
import numpy as np
from scipy.sparse import issparse, csc_matrix
from ..base import TransformerMixin
from ..utils import (
check_array,
safe_mask,
safe_sqr,
)
from ..utils._tags import _safe_tags
[docs]class SelectorMixin(TransformerMixin, metaclass=ABCMeta):
"""
Transformer mixin that performs feature selection given a support mask
This mixin provides a feature selector implementation with `transform` and
`inverse_transform` functionality given an implementation of
`_get_support_mask`.
"""
[docs] def get_support(self, indices=False):
"""
Get a mask, or integer index, of the features selected
Parameters
----------
indices : bool, default=False
If True, the return value will be an array of integers, rather
than a boolean mask.
Returns
-------
support : array
An index that selects the retained features from a feature vector.
If `indices` is False, this is a boolean array of shape
[# input features], in which an element is True iff its
corresponding feature is selected for retention. If `indices` is
True, this is an integer array of shape [# output features] whose
values are indices into the input feature vector.
"""
mask = self._get_support_mask()
return mask if not indices else np.where(mask)[0]
@abstractmethod
def _get_support_mask(self):
"""
Get the boolean mask indicating which features are selected
Returns
-------
support : boolean array of shape [# input features]
An element is True iff its corresponding feature is selected for
retention.
"""
def _get_feature_importances(estimator, getter, transform_func=None,
norm_order=1):
"""
Retrieve and aggregate (ndim > 1) the feature importances
from an estimator. Also optionally applies transformation.
Parameters
----------
estimator : estimator
A scikit-learn estimator from which we want to get the feature
importances.
getter : "auto", str or callable
An attribute or a callable to get the feature importance. If `"auto"`,
`estimator` is expected to expose `coef_` or `feature_importances`.
transform_func : {"norm", "square"}, default=None
The transform to apply to the feature importances. By default (`None`)
no transformation is applied.
norm_order : int, default=1
The norm order to apply when `transform_func="norm"`. Only applied
when `importances.ndim > 1`.
Returns
-------
importances : ndarray of shape (n_features,)
The features importances, optionally transformed.
"""
if isinstance(getter, str):
if getter == 'auto':
if hasattr(estimator, 'coef_'):
getter = attrgetter('coef_')
elif hasattr(estimator, 'feature_importances_'):
getter = attrgetter('feature_importances_')
else:
raise ValueError(
f"when `importance_getter=='auto'`, the underlying "
f"estimator {estimator.__class__.__name__} should have "
f"`coef_` or `feature_importances_` attribute. Either "
f"pass a fitted estimator to feature selector or call fit "
f"before calling transform."
)
else:
getter = attrgetter(getter)
elif not callable(getter):
raise ValueError(
'`importance_getter` has to be a string or `callable`'
)
importances = getter(estimator)
if transform_func is None:
return importances
elif transform_func == "norm":
if importances.ndim == 1:
importances = np.abs(importances)
else:
importances = np.linalg.norm(importances, axis=0,
ord=norm_order)
elif transform_func == "square":
if importances.ndim == 1:
importances = safe_sqr(importances)
else:
importances = safe_sqr(importances).sum(axis=0)
else:
raise ValueError("Valid values for `transform_func` are " +
"None, 'norm' and 'square'. Those two " +
"transformation are only supported now")
return importances