Module fri.main

View Source
from sklearn.base import BaseEstimator

from sklearn.exceptions import NotFittedError

import warnings

try:

    # TODO: remove catch and try except on sklearn > 0.22 when regression is merged: https://github.com/scikit-learn/scikit-learn/pull/16132

    with warnings.catch_warnings():

        warnings.simplefilter("ignore")

        from sklearn.feature_selection.base import SelectorMixin

except ModuleNotFoundError:

    from sklearn.feature_selection import SelectorMixin

from sklearn.utils import check_random_state

from sklearn.utils.validation import check_is_fitted

from fri.compute import RelevanceBoundsIntervals

from fri.model.base_type import ProblemType

from fri.parameter_searcher import find_best_model

RELEVANCE_MAPPING = {0: "Irrelevant", 1: "Weak relevant", 2: "Strong relevant"}

class NotFeasibleForParameters(Exception):

    """ Problem was infeasible with the current parameter set.

    """

class FRIBase(BaseEstimator, SelectorMixin):

    def __init__(

        self,

        problem_type: ProblemType,

        random_state=None,

        n_jobs=1,

        verbose=0,

        n_param_search=30,

        n_probe_features=40,

        normalize=True,

        **kwargs,

    ):

        """

        Parameters

        ----------

        problem_type : abc.ABCMeta

        random_state : Union[mtrand.RandomState, int, None, None, None, None, None, None, None]

        n_jobs : int

        verbose : int

        n_param_search : int

        n_probe_features : int

        normalize : bool

        kwargs :

        Attributes

        ----------

        interval_ : array-like

         Feature relevance Intervals

        optim_model_ : `InitModel`

        Baseline model fitted on data

        relevance_classes_ : list(int)

        Classes of relevance encoded as int: 0 irrelevant, 1 weakly relevant, 2 strongly relevant

        relevance_classes_string_ : list(str)

        Classes of relevance encoded as string

        allrel_prediction_ : list(int)

        Relevance prediction encoded as boolean: 0 irrelevant, 1 relevant

        """

        self.problem_type = problem_type

        self.n_probe_features = n_probe_features

        self.n_param_search = n_param_search

        self.random_state = check_random_state(random_state)

        self.n_jobs = n_jobs

        self.verbose = verbose

        self.normalize = normalize

        self.other_args = kwargs

        for k, v in kwargs.items():

            setattr(self, k, v)

        self.interval_ = None

        self.optim_model_ = None

        self.relevance_classes_ = None

        self.relevance_classes_string_ = None

        self.allrel_prediction_ = None

    def fit(self, X, y, lupi_features=0, **kwargs):

        """

        Method to fit model on data.

        Parameters

        ----------

        X : numpy.ndarray

        y : numpy.ndarray

        lupi_features : int

            Amount of features which are considered privileged information in `X`.

            The data is expected to be structured in a way that all lupi features are at the end of the set.

            For example `lupi_features=1` would denote the last column of `X` to be privileged.

        kwargs : dict

            Dictionary of additional keyword arguments depending on the `model`.

        Returns

        -------

        `FRIBase`

        """

        self.problem_object_ = self.problem_type(**self.other_args)

        self.lupi_features_ = lupi_features

        self.n_samples_ = X.shape[0]

        self.n_features_ = X.shape[1] - lupi_features

        self.optim_model_, best_score = self._fit_baseline(

            X, y, lupi_features, **kwargs

        )

        data = self.problem_object_.preprocessing((X, y), lupi_features=lupi_features)

        self._relevance_bounds_computer = RelevanceBoundsIntervals(

            data,

            self.problem_object_,

            self.optim_model_,

            self.random_state,

            self.n_probe_features,

            self.n_jobs,

            self.verbose,

            normalize=self.normalize,

        )

        if lupi_features == 0:

            (

                self.interval_,

                feature_classes,

            ) = self._relevance_bounds_computer.get_normalized_intervals()

        else:

            (

                self.interval_,

                feature_classes,

            ) = self._relevance_bounds_computer.get_normalized_lupi_intervals(

                lupi_features=lupi_features

            )

        self._get_relevance_mask(feature_classes)

        # Return the classifier

        return self

    def _fit_baseline(self, X, y, lupi_features=0, **kwargs):

        # Preprocessing

        data = self.problem_object_.preprocessing((X, y), lupi_features=lupi_features)

        # Get predefined template for our init. model

        init_model_template = self.problem_object_.get_initmodel_template

        # Get hyperparameters which are predefined to our model template and can be seleted by user choice

        hyperparameters = self.problem_object_.get_all_parameters()

        # search_samples = len(hyperparameters) * self.n_param_search # TODO: remove this

        search_samples = self.n_param_search

        # Find an optimal, fitted model using hyperparemeter search

        optimal_model, best_score = find_best_model(

            init_model_template,

            hyperparameters,

            data,

            self.random_state,

            search_samples,

            self.n_jobs,

            self.verbose,

            lupi_features=lupi_features,

            **kwargs,

        )

        return optimal_model, best_score

    def _get_relevance_mask(self, prediction):

        """Determines relevancy using feature relevance interval values

        Parameters

        ----------

        fpr : float, optional

            false positive rate allowed under H_0

        Returns

        -------

        boolean array

            Relevancy prediction for each feature

        """

        self.relevance_classes_ = prediction

        self.relevance_classes_string_ = [RELEVANCE_MAPPING[p] for p in prediction]

        self.allrel_prediction_ = prediction > 0

        self.allrel_prediction_nonpriv_ = self.allrel_prediction_[: self.n_features_]

        self.allrel_prediction_priv_ = self.allrel_prediction_[self.n_features_ :]

        self.relevance_classes_nonpriv_ = self.relevance_classes_[: self.n_features_]

        self.relevance_classes_priv_ = self.relevance_classes_[self.n_features_ :]

        return self.allrel_prediction_

    def _n_selected_features(self):

        """

        Returns the number of selected features.

        -------

        """

        check_is_fitted(self, "allrel_prediction_")

        return sum(self.allrel_prediction_)

    def _get_support_mask(self):

        """Method for SelectorMixin

        Returns

        -------

        boolean array

        """

        return self.allrel_prediction_

    def score(self, X, y):

        """

        Using fitted model predict points for `X` and compare to truth `y`.

        Parameters

        ----------

        X : numpy.ndarray

        y : numpy.ndarray

        Returns

        -------

        Model specific score (0 is worst, 1 is best)

        """

        if self.optim_model_:

            return self.optim_model_.score(X, y)

        else:

            raise NotFittedError()

    def constrained_intervals(self, preset: dict):

        """

        Method to return relevance intervals which are constrained using preset ranges or values.

        Parameters

        ----------

        preset : dict like, {i:float} or {i:[float,float]}

            Keys denote feature index, values represent a fixed single value (float) or a range of allowed values (lower and upper bound).

            Example: To set  feature 0 to a fixed value use

            >>> preset = {0: 0.1}

            or to use the minimum relevance bound

            >>> preset[1] = self.interval_[1, 0]

        Returns

        -------

        array like

            Relevance bounds with user constraints

        """

        # Do we have intervals?

        check_is_fitted(self, "interval_")

        return self._relevance_bounds_computer.compute_multi_preset_relevance_bounds(

            preset=preset, lupi_features=self.lupi_features_

        )

    def print_interval_with_class(self):

        """

        Pretty print the relevance intervals and determined feature relevance class

        """

        output = ""

        if self.interval_ is None:

            output += "Model is not fitted."

        output += "############## Relevance bounds ##############\n"

        output += "feature: [LB -- UB], relevance class\n"

        for i in range(self.n_features_ + self.lupi_features_):

            if i == self.n_features_:

                output += "########## LUPI Relevance bounds\n"

            output += (

                f"{i:7}: [{self.interval_[i, 0]:1.1f} -- {self.interval_[i, 1]:1.1f}],"

            )

            output += f" {self.relevance_classes_string_[i]}\n"

        return output

Variables

RELEVANCE_MAPPING

Classes

FRIBase

class FRIBase(
    problem_type: fri.model.base_type.ProblemType,
    random_state=None,
    n_jobs=1,
    verbose=0,
    n_param_search=30,
    n_probe_features=40,
    normalize=True,
    **kwargs
)

Base class for all estimators in scikit-learn

Notes

All estimators should specify all the parameters that can be set at the class level in their __init__ as explicit keyword arguments (no *args or **kwargs).

View Source
class FRIBase(BaseEstimator, SelectorMixin):

    def __init__(

        self,

        problem_type: ProblemType,

        random_state=None,

        n_jobs=1,

        verbose=0,

        n_param_search=30,

        n_probe_features=40,

        normalize=True,

        **kwargs,

    ):

        """

        Parameters

        ----------

        problem_type : abc.ABCMeta

        random_state : Union[mtrand.RandomState, int, None, None, None, None, None, None, None]

        n_jobs : int

        verbose : int

        n_param_search : int

        n_probe_features : int

        normalize : bool

        kwargs :

        Attributes

        ----------

        interval_ : array-like

         Feature relevance Intervals

        optim_model_ : `InitModel`

        Baseline model fitted on data

        relevance_classes_ : list(int)

        Classes of relevance encoded as int: 0 irrelevant, 1 weakly relevant, 2 strongly relevant

        relevance_classes_string_ : list(str)

        Classes of relevance encoded as string

        allrel_prediction_ : list(int)

        Relevance prediction encoded as boolean: 0 irrelevant, 1 relevant

        """

        self.problem_type = problem_type

        self.n_probe_features = n_probe_features

        self.n_param_search = n_param_search

        self.random_state = check_random_state(random_state)

        self.n_jobs = n_jobs

        self.verbose = verbose

        self.normalize = normalize

        self.other_args = kwargs

        for k, v in kwargs.items():

            setattr(self, k, v)

        self.interval_ = None

        self.optim_model_ = None

        self.relevance_classes_ = None

        self.relevance_classes_string_ = None

        self.allrel_prediction_ = None

    def fit(self, X, y, lupi_features=0, **kwargs):

        """

        Method to fit model on data.

        Parameters

        ----------

        X : numpy.ndarray

        y : numpy.ndarray

        lupi_features : int

            Amount of features which are considered privileged information in `X`.

            The data is expected to be structured in a way that all lupi features are at the end of the set.

            For example `lupi_features=1` would denote the last column of `X` to be privileged.

        kwargs : dict

            Dictionary of additional keyword arguments depending on the `model`.

        Returns

        -------

        `FRIBase`

        """

        self.problem_object_ = self.problem_type(**self.other_args)

        self.lupi_features_ = lupi_features

        self.n_samples_ = X.shape[0]

        self.n_features_ = X.shape[1] - lupi_features

        self.optim_model_, best_score = self._fit_baseline(

            X, y, lupi_features, **kwargs

        )

        data = self.problem_object_.preprocessing((X, y), lupi_features=lupi_features)

        self._relevance_bounds_computer = RelevanceBoundsIntervals(

            data,

            self.problem_object_,

            self.optim_model_,

            self.random_state,

            self.n_probe_features,

            self.n_jobs,

            self.verbose,

            normalize=self.normalize,

        )

        if lupi_features == 0:

            (

                self.interval_,

                feature_classes,

            ) = self._relevance_bounds_computer.get_normalized_intervals()

        else:

            (

                self.interval_,

                feature_classes,

            ) = self._relevance_bounds_computer.get_normalized_lupi_intervals(

                lupi_features=lupi_features

            )

        self._get_relevance_mask(feature_classes)

        # Return the classifier

        return self

    def _fit_baseline(self, X, y, lupi_features=0, **kwargs):

        # Preprocessing

        data = self.problem_object_.preprocessing((X, y), lupi_features=lupi_features)

        # Get predefined template for our init. model

        init_model_template = self.problem_object_.get_initmodel_template

        # Get hyperparameters which are predefined to our model template and can be seleted by user choice

        hyperparameters = self.problem_object_.get_all_parameters()

        # search_samples = len(hyperparameters) * self.n_param_search # TODO: remove this

        search_samples = self.n_param_search

        # Find an optimal, fitted model using hyperparemeter search

        optimal_model, best_score = find_best_model(

            init_model_template,

            hyperparameters,

            data,

            self.random_state,

            search_samples,

            self.n_jobs,

            self.verbose,

            lupi_features=lupi_features,

            **kwargs,

        )

        return optimal_model, best_score

    def _get_relevance_mask(self, prediction):

        """Determines relevancy using feature relevance interval values

        Parameters

        ----------

        fpr : float, optional

            false positive rate allowed under H_0

        Returns

        -------

        boolean array

            Relevancy prediction for each feature

        """

        self.relevance_classes_ = prediction

        self.relevance_classes_string_ = [RELEVANCE_MAPPING[p] for p in prediction]

        self.allrel_prediction_ = prediction > 0

        self.allrel_prediction_nonpriv_ = self.allrel_prediction_[: self.n_features_]

        self.allrel_prediction_priv_ = self.allrel_prediction_[self.n_features_ :]

        self.relevance_classes_nonpriv_ = self.relevance_classes_[: self.n_features_]

        self.relevance_classes_priv_ = self.relevance_classes_[self.n_features_ :]

        return self.allrel_prediction_

    def _n_selected_features(self):

        """

        Returns the number of selected features.

        -------

        """

        check_is_fitted(self, "allrel_prediction_")

        return sum(self.allrel_prediction_)

    def _get_support_mask(self):

        """Method for SelectorMixin

        Returns

        -------

        boolean array

        """

        return self.allrel_prediction_

    def score(self, X, y):

        """

        Using fitted model predict points for `X` and compare to truth `y`.

        Parameters

        ----------

        X : numpy.ndarray

        y : numpy.ndarray

        Returns

        -------

        Model specific score (0 is worst, 1 is best)

        """

        if self.optim_model_:

            return self.optim_model_.score(X, y)

        else:

            raise NotFittedError()

    def constrained_intervals(self, preset: dict):

        """

        Method to return relevance intervals which are constrained using preset ranges or values.

        Parameters

        ----------

        preset : dict like, {i:float} or {i:[float,float]}

            Keys denote feature index, values represent a fixed single value (float) or a range of allowed values (lower and upper bound).

            Example: To set  feature 0 to a fixed value use

            >>> preset = {0: 0.1}

            or to use the minimum relevance bound

            >>> preset[1] = self.interval_[1, 0]

        Returns

        -------

        array like

            Relevance bounds with user constraints

        """

        # Do we have intervals?

        check_is_fitted(self, "interval_")

        return self._relevance_bounds_computer.compute_multi_preset_relevance_bounds(

            preset=preset, lupi_features=self.lupi_features_

        )

    def print_interval_with_class(self):

        """

        Pretty print the relevance intervals and determined feature relevance class

        """

        output = ""

        if self.interval_ is None:

            output += "Model is not fitted."

        output += "############## Relevance bounds ##############\n"

        output += "feature: [LB -- UB], relevance class\n"

        for i in range(self.n_features_ + self.lupi_features_):

            if i == self.n_features_:

                output += "########## LUPI Relevance bounds\n"

            output += (

                f"{i:7}: [{self.interval_[i, 0]:1.1f} -- {self.interval_[i, 1]:1.1f}],"

            )

            output += f" {self.relevance_classes_string_[i]}\n"

        return output

Ancestors (in MRO)

  • sklearn.base.BaseEstimator
  • sklearn.feature_selection._base.SelectorMixin
  • sklearn.base.TransformerMixin

Descendants

  • fri.FRI

Methods

constrained_intervals
def constrained_intervals(
    self,
    preset: dict
)

Method to return relevance intervals which are constrained using preset ranges or values.

Parameters

preset : dict like, {i:float} or {i:[float,float]} Keys denote feature index, values represent a fixed single value (float) or a range of allowed values (lower and upper bound).

Example: To set  feature 0 to a fixed value use

>>> preset = {0: 0.1}

or to use the minimum relevance bound

>>> preset[1] = self.interval_[1, 0]

Returns

array like Relevance bounds with user constraints

View Source
    def constrained_intervals(self, preset: dict):

        """

        Method to return relevance intervals which are constrained using preset ranges or values.

        Parameters

        ----------

        preset : dict like, {i:float} or {i:[float,float]}

            Keys denote feature index, values represent a fixed single value (float) or a range of allowed values (lower and upper bound).

            Example: To set  feature 0 to a fixed value use

            >>> preset = {0: 0.1}

            or to use the minimum relevance bound

            >>> preset[1] = self.interval_[1, 0]

        Returns

        -------

        array like

            Relevance bounds with user constraints

        """

        # Do we have intervals?

        check_is_fitted(self, "interval_")

        return self._relevance_bounds_computer.compute_multi_preset_relevance_bounds(

            preset=preset, lupi_features=self.lupi_features_

        )
fit
def fit(
    self,
    X,
    y,
    lupi_features=0,
    **kwargs
)

Method to fit model on data.

Parameters

X : numpy.ndarray y : numpy.ndarray lupi_features : int Amount of features which are considered privileged information in X. The data is expected to be structured in a way that all lupi features are at the end of the set. For example lupi_features=1 would denote the last column of X to be privileged. kwargs : dict Dictionary of additional keyword arguments depending on the model.

Returns

FRIBase

View Source
    def fit(self, X, y, lupi_features=0, **kwargs):

        """

        Method to fit model on data.

        Parameters

        ----------

        X : numpy.ndarray

        y : numpy.ndarray

        lupi_features : int

            Amount of features which are considered privileged information in `X`.

            The data is expected to be structured in a way that all lupi features are at the end of the set.

            For example `lupi_features=1` would denote the last column of `X` to be privileged.

        kwargs : dict

            Dictionary of additional keyword arguments depending on the `model`.

        Returns

        -------

        `FRIBase`

        """

        self.problem_object_ = self.problem_type(**self.other_args)

        self.lupi_features_ = lupi_features

        self.n_samples_ = X.shape[0]

        self.n_features_ = X.shape[1] - lupi_features

        self.optim_model_, best_score = self._fit_baseline(

            X, y, lupi_features, **kwargs

        )

        data = self.problem_object_.preprocessing((X, y), lupi_features=lupi_features)

        self._relevance_bounds_computer = RelevanceBoundsIntervals(

            data,

            self.problem_object_,

            self.optim_model_,

            self.random_state,

            self.n_probe_features,

            self.n_jobs,

            self.verbose,

            normalize=self.normalize,

        )

        if lupi_features == 0:

            (

                self.interval_,

                feature_classes,

            ) = self._relevance_bounds_computer.get_normalized_intervals()

        else:

            (

                self.interval_,

                feature_classes,

            ) = self._relevance_bounds_computer.get_normalized_lupi_intervals(

                lupi_features=lupi_features

            )

        self._get_relevance_mask(feature_classes)

        # Return the classifier

        return self
fit_transform
def fit_transform(
    self,
    X,
    y=None,
    **fit_params
)

Fit to data, then transform it.

Fits transformer to X and y with optional parameters fit_params and returns a transformed version of X.

Parameters

X : numpy array of shape [n_samples, n_features] Training set.

y : numpy array of shape [n_samples] Target values.

**fit_params : dict Additional fit parameters.

Returns

X_new : numpy array of shape [n_samples, n_features_new] Transformed array.

View Source
    def fit_transform(self, X, y=None, **fit_params):

        """

        Fit to data, then transform it.

        Fits transformer to X and y with optional parameters fit_params

        and returns a transformed version of X.

        Parameters

        ----------

        X : numpy array of shape [n_samples, n_features]

            Training set.

        y : numpy array of shape [n_samples]

            Target values.

        **fit_params : dict

            Additional fit parameters.

        Returns

        -------

        X_new : numpy array of shape [n_samples, n_features_new]

            Transformed array.

        """

        # non-optimized default implementation; override when a better

        # method is possible for a given clustering algorithm

        if y is None:

            # fit method of arity 1 (unsupervised transformation)

            return self.fit(X, **fit_params).transform(X)

        else:

            # fit method of arity 2 (supervised transformation)

            return self.fit(X, y, **fit_params).transform(X)
get_params
def get_params(
    self,
    deep=True
)

Get parameters for this estimator.

Parameters

deep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators.

Returns

params : mapping of string to any Parameter names mapped to their values.

View Source
    def get_params(self, deep=True):

        """

        Get parameters for this estimator.

        Parameters

        ----------

        deep : bool, default=True

            If True, will return the parameters for this estimator and

            contained subobjects that are estimators.

        Returns

        -------

        params : mapping of string to any

            Parameter names mapped to their values.

        """

        out = dict()

        for key in self._get_param_names():

            try:

                value = getattr(self, key)

            except AttributeError:

                warnings.warn('From version 0.24, get_params will raise an '

                              'AttributeError if a parameter cannot be '

                              'retrieved as an instance attribute. Previously '

                              'it would return None.',

                              FutureWarning)

                value = None

            if deep and hasattr(value, 'get_params'):

                deep_items = value.get_params().items()

                out.update((key + '__' + k, val) for k, val in deep_items)

            out[key] = value

        return out
get_support
def get_support(
    self,
    indices=False
)

Get a mask, or integer index, of the features selected

Parameters

indices : boolean (default False) If True, the return value will be an array of integers, rather than a boolean mask.

Returns

support : array An index that selects the retained features from a feature vector. If indices is False, this is a boolean array of shape [# input features], in which an element is True iff its corresponding feature is selected for retention. If indices is True, this is an integer array of shape [# output features] whose values are indices into the input feature vector.

View Source
    def get_support(self, indices=False):

        """

        Get a mask, or integer index, of the features selected

        Parameters

        ----------

        indices : boolean (default False)

            If True, the return value will be an array of integers, rather

            than a boolean mask.

        Returns

        -------

        support : array

            An index that selects the retained features from a feature vector.

            If `indices` is False, this is a boolean array of shape

            [# input features], in which an element is True iff its

            corresponding feature is selected for retention. If `indices` is

            True, this is an integer array of shape [# output features] whose

            values are indices into the input feature vector.

        """

        mask = self._get_support_mask()

        return mask if not indices else np.where(mask)[0]
inverse_transform
def inverse_transform(
    self,
    X
)

Reverse the transformation operation

Parameters

X : array of shape [n_samples, n_selected_features] The input samples.

Returns

X_r : array of shape [n_samples, n_original_features] X with columns of zeros inserted where features would have been removed by :meth:transform.

View Source
    def inverse_transform(self, X):

        """

        Reverse the transformation operation

        Parameters

        ----------

        X : array of shape [n_samples, n_selected_features]

            The input samples.

        Returns

        -------

        X_r : array of shape [n_samples, n_original_features]

            `X` with columns of zeros inserted where features would have

            been removed by :meth:`transform`.

        """

        if issparse(X):

            X = X.tocsc()

            # insert additional entries in indptr:

            # e.g. if transform changed indptr from [0 2 6 7] to [0 2 3]

            # col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3]

            it = self.inverse_transform(np.diff(X.indptr).reshape(1, -1))

            col_nonzeros = it.ravel()

            indptr = np.concatenate([[0], np.cumsum(col_nonzeros)])

            Xt = csc_matrix((X.data, X.indices, indptr),

                            shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype)

            return Xt

        support = self.get_support()

        X = check_array(X, dtype=None)

        if support.sum() != X.shape[1]:

            raise ValueError("X has a different shape than during fitting.")

        if X.ndim == 1:

            X = X[None, :]

        Xt = np.zeros((X.shape[0], support.size), dtype=X.dtype)

        Xt[:, support] = X

        return Xt
def print_interval_with_class(
    self
)

Pretty print the relevance intervals and determined feature relevance class

View Source
    def print_interval_with_class(self):

        """

        Pretty print the relevance intervals and determined feature relevance class

        """

        output = ""

        if self.interval_ is None:

            output += "Model is not fitted."

        output += "############## Relevance bounds ##############\n"

        output += "feature: [LB -- UB], relevance class\n"

        for i in range(self.n_features_ + self.lupi_features_):

            if i == self.n_features_:

                output += "########## LUPI Relevance bounds\n"

            output += (

                f"{i:7}: [{self.interval_[i, 0]:1.1f} -- {self.interval_[i, 1]:1.1f}],"

            )

            output += f" {self.relevance_classes_string_[i]}\n"

        return output
score
def score(
    self,
    X,
    y
)

Using fitted model predict points for X and compare to truth y.

Parameters

X : numpy.ndarray y : numpy.ndarray

Returns

Model specific score (0 is worst, 1 is best)

View Source
    def score(self, X, y):

        """

        Using fitted model predict points for `X` and compare to truth `y`.

        Parameters

        ----------

        X : numpy.ndarray

        y : numpy.ndarray

        Returns

        -------

        Model specific score (0 is worst, 1 is best)

        """

        if self.optim_model_:

            return self.optim_model_.score(X, y)

        else:

            raise NotFittedError()
set_params
def set_params(
    self,
    **params
)

Set the parameters of this estimator.

The method works on simple estimators as well as on nested objects (such as pipelines). The latter have parameters of the form <component>__<parameter> so that it's possible to update each component of a nested object.

Parameters

**params : dict Estimator parameters.

Returns

self : object Estimator instance.

View Source
    def set_params(self, **params):

        """

        Set the parameters of this estimator.

        The method works on simple estimators as well as on nested objects

        (such as pipelines). The latter have parameters of the form

        ``<component>__<parameter>`` so that it's possible to update each

        component of a nested object.

        Parameters

        ----------

        **params : dict

            Estimator parameters.

        Returns

        -------

        self : object

            Estimator instance.

        """

        if not params:

            # Simple optimization to gain speed (inspect is slow)

            return self

        valid_params = self.get_params(deep=True)

        nested_params = defaultdict(dict)  # grouped by prefix

        for key, value in params.items():

            key, delim, sub_key = key.partition('__')

            if key not in valid_params:

                raise ValueError('Invalid parameter %s for estimator %s. '

                                 'Check the list of available parameters '

                                 'with `estimator.get_params().keys()`.' %

                                 (key, self))

            if delim:

                nested_params[key][sub_key] = value

            else:

                setattr(self, key, value)

                valid_params[key] = value

        for key, sub_params in nested_params.items():

            valid_params[key].set_params(**sub_params)

        return self
transform
def transform(
    self,
    X
)

Reduce X to the selected features.

Parameters

X : array of shape [n_samples, n_features] The input samples.

Returns

X_r : array of shape [n_samples, n_selected_features] The input samples with only the selected features.

View Source
    def transform(self, X):

        """Reduce X to the selected features.

        Parameters

        ----------

        X : array of shape [n_samples, n_features]

            The input samples.

        Returns

        -------

        X_r : array of shape [n_samples, n_selected_features]

            The input samples with only the selected features.

        """

        tags = self._get_tags()

        X = check_array(X, dtype=None, accept_sparse='csr',

                        force_all_finite=not tags.get('allow_nan', True))

        mask = self.get_support()

        if not mask.any():

            warn("No features were selected: either the data is"

                 " too noisy or the selection test too strict.",

                 UserWarning)

            return np.empty(0).reshape((X.shape[0], 0))

        if len(mask) != X.shape[1]:

            raise ValueError("X has a different shape than during fitting.")

        return X[:, safe_mask(X, mask)]

NotFeasibleForParameters

class NotFeasibleForParameters(
    /,
    *args,
    **kwargs
)

Problem was infeasible with the current parameter set.

View Source
class NotFeasibleForParameters(Exception):

    """ Problem was infeasible with the current parameter set.

    """

Ancestors (in MRO)

  • builtins.Exception
  • builtins.BaseException

Class variables

args

Methods

with_traceback
def with_traceback(
    ...
)

Exception.with_traceback(tb) -- set self.traceback to tb and return self.