Module fri

View Source

import logging

from importlib_metadata import version

try:

    __version__ = version(__name__)

except:

    pass

logging.basicConfig(level=logging.INFO)

from enum import Enum

import fri.model

import arfs_gen

class ProblemName(Enum):

    """

    Enum which contains usable models for which feature relevance intervals can be computed in :func:`~FRI`.

    Values of enums contains class of model and data generation method found in external library `arfs_gen`.

    """

    CLASSIFICATION = [fri.model.Classification, arfs_gen.ProblemName.CLASSIFICATION]

    REGRESSION = [fri.model.Regression, arfs_gen.ProblemName.REGRESSION]

    ORDINALREGRESSION = [

        fri.model.OrdinalRegression,

        arfs_gen.ProblemName.ORDINALREGRESSION,

    ]

    LUPI_CLASSIFICATION = [

        fri.model.LUPI_Classification,

        arfs_gen.ProblemName.LUPI_CLASSIFICATION,

    ]

    LUPI_REGRESSION = [fri.model.LUPI_Regression, arfs_gen.ProblemName.LUPI_REGRESSION]

    LUPI_ORDREGRESSION = [

        fri.model.LUPI_OrdinalRegression,

        arfs_gen.ProblemName.LUPI_ORDREGRESSION,

    ]

NORMAL_MODELS = [

    ProblemName.CLASSIFICATION,

    ProblemName.REGRESSION,

    ProblemName.ORDINALREGRESSION,

]

LUPI_MODELS = [

    ProblemName.LUPI_CLASSIFICATION,

    ProblemName.LUPI_REGRESSION,

    ProblemName.LUPI_ORDREGRESSION,

]

from arfs_gen import genRegressionData, genClassificationData, genOrdinalRegressionData

def quick_generate(problemtype, **kwargs):

    "Overwrite arfs_gen method to handle different format of problemtype in fri"

    return arfs_gen.quick_generate(problemtype.value[1], **kwargs)

def genLupiData(problemname, **kwargs):

    "Overwrite arfs_gen method to handle different format of problemtype in fri"

    return arfs_gen.genLupiData(problemname.value[1], **kwargs)

from fri.main import FRIBase

from fri.plot import plot_intervals

class FRI(FRIBase):

    def __init__(

        self,

        problemName: object,

        random_state: object = None,

        n_jobs: int = 1,

        verbose: int = 0,

        n_param_search: int = 10,

        n_probe_features: int = 20,

        w_l1_slack: float = 0.001,

        loss_slack: float = 0.001,

        normalize: bool = True,

        **kwargs,

    ):

        """

        Main class to use `FRI` in programatic fashion following the scikit-learn paradigm.

        Parameters

        ----------

        problemName: `ProblemName` or str

            Type of Problem as enum value or explicit string (e.g. "classification")

        random_state: object or int

            Random state object or int

        n_jobs: int or None

            Number of threads or -1 for automatic.

        verbose: int

            Verbosity if > 0

        n_param_search: int

            Number of parameter samples in random search for hyperparameters.

        n_probe_features: int

            Number of probes to generate to improve feature selection.

        w_l1_slack: float

            Allow deviation from optimal L1 norm.

        loss_slack: float

            Allow deviation of loss.

        normalize: boolean

            Normalize relevace bounds to range of [0,1] depending on L1 norm.

        """

        self.problemName = problemName

        if isinstance(problemName, ProblemName):

            problemtype = problemName.value

        else:

            if problemName == "classification" or problemName == "class":

                problemtype = ProblemName.CLASSIFICATION

            elif problemName == "regression" or problemName == "reg":

                problemtype = ProblemName.REGRESSION

            elif problemName == "ordinalregression" or problemName == "ordreg":

                problemtype = ProblemName.ORDINALREGRESSION

            elif problemName == "lupi_classification" or problemName == "lupi_class":

                problemtype = ProblemName.LUPI_CLASSIFICATION

        if problemtype is None:

            names = [enum.name.lower() for enum in ProblemName]

            print(

                f"Parameter 'problemName' was not recognized or unset. Try one of {names}."

            )

        else:

            problem_class = problemtype[0]

            super().__init__(

                problem_class,

                random_state=random_state,

                n_jobs=n_jobs,

                verbose=verbose,

                n_param_search=n_param_search,

                n_probe_features=n_probe_features,

                w_l1_slack=w_l1_slack,

                loss_slack=loss_slack,

                normalize=normalize,

                **kwargs,

            )

__all__ = [

    "genRegressionData",

    "genClassificationData",

    "genOrdinalRegressionData",

    "quick_generate",

    "plot_intervals",

    "ProblemName",

    "FRI",

    "LUPI_MODELS",

    "NORMAL_MODELS",

    "genLupiData",

]

Sub-modules

Variables

LUPI_MODELS

NORMAL_MODELS

Functions

genClassificationData

def genClassificationData(
    n_samples: int = 100,
    n_features: int = 2,
    n_redundant: int = 0,
    n_strel: int = 1,
    n_repeated: int = 0,
    noise: float = 0.1,
    flip_y: float = 0,
    random_state: object = None,
    partition=None,
    linear=True
)

Generate synthetic classification data

Parameters

n_samples : int, optional Number of samples n_features : int, optional Number of features n_redundant : int, optional Number of features which are part of redundant subsets (weakly relevant) n_strel : int, optional Number of features which are mandatory for the underlying model (strongly relevant) n_repeated : int, optional Number of features which are clones of existing ones. noise : float Added gaussian noise to data. Parameter scales Std of normal distribution. flip_y : float, optional Ratio of samples randomly switched to wrong class. random_state : object, optional Randomstate object used for generation.

Returns

X : array of shape [n_samples, n_features] The generated samples. y : array of shape [n_samples] The output classes.

Raises

ValueError Description ValueError Wrong parameters for specified amonut of features/samples.

Examples

X,y = genClassificationData(n_samples=200) Generating dataset with d=2,n=200,strongly=1,weakly=0, partition of weakly=None X.shape (200, 2) y.shape (200,)

View Source

def genClassificationData(

    n_samples: int = 100,

    n_features: int = 2,

    n_redundant: int = 0,

    n_strel: int = 1,

    n_repeated: int = 0,

    noise: float = 0.1,

    flip_y: float = 0,

    random_state: object = None,

    partition=None,

    linear=True,

):

    """Generate synthetic classification data

    Parameters

    ----------

    n_samples : int, optional

        Number of samples

    n_features : int, optional

        Number of features

    n_redundant : int, optional

        Number of features which are part of redundant subsets (weakly relevant)

    n_strel : int, optional

        Number of features which are mandatory for the underlying model (strongly relevant)

    n_repeated : int, optional

        Number of features which are clones of existing ones.

    noise : float

        Added gaussian noise to data. Parameter scales Std of normal distribution.

    flip_y : float, optional

        Ratio of samples randomly switched to wrong class.

    random_state : object, optional

        Randomstate object used for generation.

    Returns

    -------

    X : array of shape [n_samples, n_features]

        The generated samples.

    y : array of shape [n_samples]

        The output classes.

    Raises

    ------

    ValueError

        Description

    ValueError

    Wrong parameters for specified amonut of features/samples.

    Examples

    ---------

    >>> X,y = genClassificationData(n_samples=200)

    Generating dataset with d=2,n=200,strongly=1,weakly=0, partition of weakly=None

    >>> X.shape

    (200, 2)

    >>> y.shape

    (200,)

    """

    _checkParam(**locals())

    random_state = check_random_state(random_state)

    X = np.zeros((n_samples, n_features))

    # Find partitions which defíne the weakly relevant subsets

    if partition is None and n_redundant > 0:

        if not linear and n_strel==0 and n_redundant <=4:

            raise ValueError(

                "Generating non-linear data requires multiple informative features."

                "Increase `n_redundant` to 5 or more add at least 1 `n_strel` feature"

            )

        elif not linear and n_strel==0:

            assert n_redundant>4

            # We create 2 partitions to have at least 2 informative features for non-linear problem

            partition = [2,n_redundant-2]

            part_size = 2

        else:

            partition = [n_redundant]

            part_size = 1

    elif partition is not None:

        part_size = len(partition)

    else:

        part_size = 0

    n_informative = n_strel + part_size

    if linear:

        X_informative, Y = generate_binary_classification_problem(

            n_samples, n_informative, random_state

        )

    else:

        if n_informative < 2:

            raise ValueError(

                "Generating non-linear data requires more than 1 strongly relevant feature. "

                "Specifying 'n_redundant' implicitly requests only 1 strongly relevant feature. "

                "Try increasing 'n_strel' to >=1."

            )

        # Create classif. set with 2 clusters per class

        X_informative, Y = make_classification(

            n_samples=n_samples,

            n_features=n_informative,

            n_informative=n_informative,

            n_redundant=0,

            n_repeated=0,

            n_classes=2,

            n_clusters_per_class=2,

            flip_y=0.00,

            class_sep=0.5,

            hypercube=True,

            shift=0.0,

            scale=1.0,

            shuffle=False,

            random_state=random_state,

        )

    # We extend X with several types of other features (linear combinations, repeats, random features)

    X = _fillVariableSpace(

        X_informative,

        random_state,

        n_features=n_features,

        n_redundant=n_redundant,

        n_strel=n_strel,

        n_repeated=n_repeated,

        partition=partition,

    )

    # Add target noise

    if flip_y > 0:

        n_flip = int(flip_y * n_samples)

        Y[random_state.choice(n_samples, n_flip)] *= -1

    # Add gaussian noise to data

    X = X + random_state.normal(size=(n_samples, n_features), scale=noise / X.std())

    return X, Y

genLupiData

def genLupiData(
    problemname,
    **kwargs
)

Overwrite arfs_gen method to handle different format of problemtype in fri

View Source

def genLupiData(problemname, **kwargs):

    "Overwrite arfs_gen method to handle different format of problemtype in fri"

    return arfs_gen.genLupiData(problemname.value[1], **kwargs)

genOrdinalRegressionData

def genOrdinalRegressionData(
    n_samples: int = 100,
    n_features: int = 2,
    n_redundant: int = 0,
    n_strel: int = 1,
    n_repeated: int = 0,
    noise: float = 0.0,
    random_state: object = None,
    partition=None,
    n_target_bins: int = 3
)

Generate ordinal regression data

Parameters

n_samples : int, optional Number of samples n_features : int, optional Number of features n_redundant : int, optional Number of features which are part of redundant subsets (weakly relevant) n_strel : int, optional Number of features which are mandatory for the underlying model (strongly relevant) n_repeated : int, optional Number of features which are clones of existing ones. noise : float, optional Noise of the created samples around ground truth. random_state : object, optional Randomstate object used for generation. n_target_bins : int, optional Number of bins in which the regressional target variable is split to form the ordinal classes

Returns

X : array of shape [n_samples, n_features] The generated samples. y : array of shape [n_samples] The output values (target).

Raises

ValueError Wrong parameters for specified amonut of features/samples.

View Source

def genOrdinalRegressionData(

    n_samples: int = 100,

    n_features: int = 2,

    n_redundant: int = 0,

    n_strel: int = 1,

    n_repeated: int = 0,

    noise: float = 0.0,

    random_state: object = None,

    partition=None,

    n_target_bins: int = 3,

):

    """

    Generate ordinal regression data

    Parameters

    ----------

    n_samples : int, optional

        Number of samples

    n_features : int, optional

        Number of features

    n_redundant : int, optional

        Number of features which are part of redundant subsets (weakly relevant)

    n_strel : int, optional

        Number of features which are mandatory for the underlying model (strongly relevant)

    n_repeated : int, optional

        Number of features which are clones of existing ones.

    noise : float, optional

        Noise of the created samples around ground truth.

    random_state : object, optional

        Randomstate object used for generation.

    n_target_bins : int, optional

        Number of bins in which the regressional target variable is split to form the ordinal classes

    Returns

    -------

    X : array of shape [n_samples, n_features]

        The generated samples.

    y : array of shape [n_samples]

        The output values (target).

    Raises

    ------

    ValueError

    Wrong parameters for specified amonut of features/samples.

    """

    _checkParam(**locals())

    random_state = check_random_state(random_state)

    if not n_target_bins > 1:

        raise ValueError("At least 2 target bins needed")

    # Use normal regression data as starting point

    X_regression, Y_regression = genRegressionData(

        n_samples=int(n_samples),

        n_features=int(n_features),

        n_redundant=int(n_redundant),

        n_strel=int(n_strel),

        n_repeated=int(n_repeated),

        noise=0,

        random_state=random_state,

        partition=partition,

    )

    bin_size = int(np.floor(n_samples / n_target_bins))

    rest = int(n_samples - (bin_size * n_target_bins))

    # Sort the target values and rearange the data accordingly

    sort_indices = np.argsort(Y_regression)

    X = X_regression[sort_indices]

    Y = Y_regression[sort_indices]

    # Assign ordinal classes as target values

    for i in range(n_target_bins):

        Y[bin_size * i : bin_size * (i + 1)] = i

    # Put non divisable rest into last bin

    if rest > 0:

        Y[-rest:] = n_target_bins - 1

    X, Y = shuffle(X, Y, random_state=random_state)

    # Add gaussian noise to data

    X = X + random_state.normal(size=(n_samples, n_features), scale=noise)

    return X, Y

genRegressionData

def genRegressionData(
    n_samples: int = 100,
    n_features: int = 2,
    n_redundant: int = 0,
    n_strel: int = 1,
    n_repeated: int = 0,
    noise: float = 0.0,
    random_state: object = None,
    partition=None
) -> object

Generate synthetic regression data

Parameters

n_samples : int, optional Number of samples n_features : int, optional Number of features n_redundant : int, optional Number of features which are part of redundant subsets (weakly relevant) n_strel : int, optional Number of features which are mandatory for the underlying model (strongly relevant) n_repeated : int, optional Number of features which are clones of existing ones. noise : float, optional Noise of the created samples around ground truth. random_state : object, optional Randomstate object used for generation.

Returns

X : array of shape [n_samples, n_features] The generated samples. y : array of shape [n_samples] The output values (target).

Raises

ValueError Wrong parameters for specified amonut of features/samples.

View Source

def genRegressionData(

    n_samples: int = 100,

    n_features: int = 2,

    n_redundant: int = 0,

    n_strel: int = 1,

    n_repeated: int = 0,

    noise: float = 0.0,

    random_state: object = None,

    partition=None,

) -> object:

    """Generate synthetic regression data

    Parameters

    ----------

    n_samples : int, optional

        Number of samples

    n_features : int, optional

        Number of features

    n_redundant : int, optional

        Number of features which are part of redundant subsets (weakly relevant)

    n_strel : int, optional

        Number of features which are mandatory for the underlying model (strongly relevant)

    n_repeated : int, optional

        Number of features which are clones of existing ones.

    noise : float, optional

        Noise of the created samples around ground truth.

    random_state : object, optional

        Randomstate object used for generation.

    Returns

    -------

    X : array of shape [n_samples, n_features]

        The generated samples.

    y : array of shape [n_samples]

        The output values (target).

    Raises

    ------

    ValueError

    Wrong parameters for specified amonut of features/samples.

    """

    _checkParam(**locals())

    random_state = check_random_state(random_state)

    # Find partitions which defíne the weakly relevant subsets

    if partition is None and n_redundant > 0:

        partition = [n_redundant]

        part_size = 1

    elif partition is not None:

        part_size = len(partition)

    else:

        part_size = 0

    n_informative = n_strel + part_size

    X = random_state.randn(n_samples, n_informative)

    ground_truth = np.zeros((n_informative, 1))

    ground_truth[:n_informative, :] = 0.3

    bias = 0

    y = np.dot(X, ground_truth) + bias

    # Add noise

    if noise > 0.0:

        y += random_state.normal(scale=noise, size=y.shape)

    X = _fillVariableSpace(

        X,

        random_state,

        n_features=n_features,

        n_redundant=n_redundant,

        n_strel=n_strel,

        n_repeated=n_repeated,

        partition=partition,

    )

    y = np.squeeze(y)

    return X, y

plot_intervals

def plot_intervals(
    model,
    ticklabels=None
)

Plot the relevance intervals.

Parameters

model : FRI model Needs to be fitted before. ticklabels : list of str, optional Strs for ticklabels on x-axis (features)

View Source

def plot_intervals(model, ticklabels=None):

    """Plot the relevance intervals.

    Parameters

    ----------

    model : FRI model

        Needs to be fitted before.

    ticklabels : list of str, optional

        Strs for ticklabels on x-axis (features)

    """

    if model.interval_ is not None:

        plotIntervals(

            model.interval_, ticklabels=ticklabels, classes=model.relevance_classes_

        )

    else:

        print("Intervals not computed. Try running fit() function first.")

quick_generate

def quick_generate(
    problemtype,
    **kwargs
)

Overwrite arfs_gen method to handle different format of problemtype in fri

View Source

def quick_generate(problemtype, **kwargs):

    "Overwrite arfs_gen method to handle different format of problemtype in fri"

    return arfs_gen.quick_generate(problemtype.value[1], **kwargs)

Classes

FRI

class FRI(
    problemName: object,
    random_state: object = None,
    n_jobs: int = 1,
    verbose: int = 0,
    n_param_search: int = 10,
    n_probe_features: int = 20,
    w_l1_slack: float = 0.001,
    loss_slack: float = 0.001,
    normalize: bool = True,
    **kwargs
)

Base class for all estimators in scikit-learn.

Notes

All estimators should specify all the parameters that can be set at the class level in their __init__ as explicit keyword arguments (no *args or **kwargs).

View Source

class FRI(FRIBase):

    def __init__(

        self,

        problemName: object,

        random_state: object = None,

        n_jobs: int = 1,

        verbose: int = 0,

        n_param_search: int = 10,

        n_probe_features: int = 20,

        w_l1_slack: float = 0.001,

        loss_slack: float = 0.001,

        normalize: bool = True,

        **kwargs,

    ):

        """

        Main class to use `FRI` in programatic fashion following the scikit-learn paradigm.

        Parameters

        ----------

        problemName: `ProblemName` or str

            Type of Problem as enum value or explicit string (e.g. "classification")

        random_state: object or int

            Random state object or int

        n_jobs: int or None

            Number of threads or -1 for automatic.

        verbose: int

            Verbosity if > 0

        n_param_search: int

            Number of parameter samples in random search for hyperparameters.

        n_probe_features: int

            Number of probes to generate to improve feature selection.

        w_l1_slack: float

            Allow deviation from optimal L1 norm.

        loss_slack: float

            Allow deviation of loss.

        normalize: boolean

            Normalize relevace bounds to range of [0,1] depending on L1 norm.

        """

        self.problemName = problemName

        if isinstance(problemName, ProblemName):

            problemtype = problemName.value

        else:

            if problemName == "classification" or problemName == "class":

                problemtype = ProblemName.CLASSIFICATION

            elif problemName == "regression" or problemName == "reg":

                problemtype = ProblemName.REGRESSION

            elif problemName == "ordinalregression" or problemName == "ordreg":

                problemtype = ProblemName.ORDINALREGRESSION

            elif problemName == "lupi_classification" or problemName == "lupi_class":

                problemtype = ProblemName.LUPI_CLASSIFICATION

        if problemtype is None:

            names = [enum.name.lower() for enum in ProblemName]

            print(

                f"Parameter 'problemName' was not recognized or unset. Try one of {names}."

            )

        else:

            problem_class = problemtype[0]

            super().__init__(

                problem_class,

                random_state=random_state,

                n_jobs=n_jobs,

                verbose=verbose,

                n_param_search=n_param_search,

                n_probe_features=n_probe_features,

                w_l1_slack=w_l1_slack,

                loss_slack=loss_slack,

                normalize=normalize,

                **kwargs,

            )

Ancestors (in MRO)

fri.main.FRIBase
sklearn.base.BaseEstimator
sklearn.feature_selection._base.SelectorMixin
sklearn.base.TransformerMixin

Methods

constrained_intervals

def constrained_intervals(
    self,
    preset: dict
)

Method to return relevance intervals which are constrained using preset ranges or values.

Parameters

preset : dict like, {i:float} or {i:[float,float]} Keys denote feature index, values represent a fixed single value (float) or a range of allowed values (lower and upper bound).

Example: To set  feature 0 to a fixed value use

>>> preset = {0: 0.1}

or to use the minimum relevance bound

>>> preset[1] = self.interval_[1, 0]

Returns

array like Relevance bounds with user constraints

View Source

    def constrained_intervals(self, preset: dict):

        """

        Method to return relevance intervals which are constrained using preset ranges or values.

        Parameters

        ----------

        preset : dict like, {i:float} or {i:[float,float]}

            Keys denote feature index, values represent a fixed single value (float) or a range of allowed values (lower and upper bound).

            Example: To set  feature 0 to a fixed value use

            >>> preset = {0: 0.1}

            or to use the minimum relevance bound

            >>> preset[1] = self.interval_[1, 0]

        Returns

        -------

        array like

            Relevance bounds with user constraints

        """

        # Do we have intervals?

        check_is_fitted(self, "interval_")

        return self._relevance_bounds_computer.compute_multi_preset_relevance_bounds(

            preset=preset, lupi_features=self.lupi_features_

        )

fit

def fit(
    self,
    X,
    y,
    lupi_features=0,
    **kwargs
)

Method to fit model on data.

Parameters

X : numpy.ndarray y : numpy.ndarray lupi_features : int Amount of features which are considered privileged information in X. The data is expected to be structured in a way that all lupi features are at the end of the set. For example lupi_features=1 would denote the last column of X to be privileged. kwargs : dict Dictionary of additional keyword arguments depending on the model.

Returns

FRIBase

View Source

    def fit(self, X, y, lupi_features=0, **kwargs):

        """

        Method to fit model on data.

        Parameters

        ----------

        X : numpy.ndarray

        y : numpy.ndarray

        lupi_features : int

            Amount of features which are considered privileged information in `X`.

            The data is expected to be structured in a way that all lupi features are at the end of the set.

            For example `lupi_features=1` would denote the last column of `X` to be privileged.

        kwargs : dict

            Dictionary of additional keyword arguments depending on the `model`.

        Returns

        -------

        `FRIBase`

        """

        self.problem_object_ = self.problem_type(**self.other_args)

        self.lupi_features_ = lupi_features

        self.n_samples_ = X.shape[0]

        self.n_features_ = X.shape[1] - lupi_features

        self.optim_model_, best_score = self._fit_baseline(

            X, y, lupi_features, **kwargs

        )

        data = self.problem_object_.preprocessing((X, y), lupi_features=lupi_features)

        self._relevance_bounds_computer = RelevanceBoundsIntervals(

            data,

            self.problem_object_,

            self.optim_model_,

            self.random_state,

            self.n_probe_features,

            self.n_jobs,

            self.verbose,

            normalize=self.normalize,

        )

        if lupi_features == 0:

            (

                self.interval_,

                feature_classes,

            ) = self._relevance_bounds_computer.get_normalized_intervals()

        else:

            (

                self.interval_,

                feature_classes,

            ) = self._relevance_bounds_computer.get_normalized_lupi_intervals(

                lupi_features=lupi_features

            )

        self._get_relevance_mask(feature_classes)

        # Return the classifier

        return self

fit_transform

def fit_transform(
    self,
    X,
    y=None,
    **fit_params
)

Fit to data, then transform it.

Fits transformer to X and y with optional parameters fit_params and returns a transformed version of X.

Parameters

X : array-like of shape (n_samples, n_features) Input samples.

y : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None Target values (None for unsupervised transformations).

**fit_params : dict Additional fit parameters.

Returns

X_new : ndarray array of shape (n_samples, n_features_new) Transformed array.

View Source

    def fit_transform(self, X, y=None, **fit_params):

        """

        Fit to data, then transform it.

        Fits transformer to `X` and `y` with optional parameters `fit_params`

        and returns a transformed version of `X`.

        Parameters

        ----------

        X : array-like of shape (n_samples, n_features)

            Input samples.

        y :  array-like of shape (n_samples,) or (n_samples, n_outputs), \

                default=None

            Target values (None for unsupervised transformations).

        **fit_params : dict

            Additional fit parameters.

        Returns

        -------

        X_new : ndarray array of shape (n_samples, n_features_new)

            Transformed array.

        """

        # non-optimized default implementation; override when a better

        # method is possible for a given clustering algorithm

        if y is None:

            # fit method of arity 1 (unsupervised transformation)

            return self.fit(X, **fit_params).transform(X)

        else:

            # fit method of arity 2 (supervised transformation)

            return self.fit(X, y, **fit_params).transform(X)

get_grouping

def get_grouping(
    self,
    **kwargs
)

View Source

    def get_grouping(self, **kwargs):

        check_is_fitted(self, "allrel_prediction_")

        groups, link = self._relevance_bounds_computer.grouping(

            self.interval_, **kwargs

        )

        return groups, link

get_params

def get_params(
    self,
    deep=True
)

Get parameters for this estimator.

Parameters

deep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators.

Returns

params : dict Parameter names mapped to their values.

View Source

    def get_params(self, deep=True):

        """

        Get parameters for this estimator.

        Parameters

        ----------

        deep : bool, default=True

            If True, will return the parameters for this estimator and

            contained subobjects that are estimators.

        Returns

        -------

        params : dict

            Parameter names mapped to their values.

        """

        out = dict()

        for key in self._get_param_names():

            value = getattr(self, key)

            if deep and hasattr(value, 'get_params'):

                deep_items = value.get_params().items()

                out.update((key + '__' + k, val) for k, val in deep_items)

            out[key] = value

        return out

get_support

def get_support(
    self,
    indices=False
)

Get a mask, or integer index, of the features selected

Parameters

indices : bool, default=False If True, the return value will be an array of integers, rather than a boolean mask.

Returns

support : array An index that selects the retained features from a feature vector. If indices is False, this is a boolean array of shape [# input features], in which an element is True iff its corresponding feature is selected for retention. If indices is True, this is an integer array of shape [# output features] whose values are indices into the input feature vector.

View Source

    def get_support(self, indices=False):

        """

        Get a mask, or integer index, of the features selected

        Parameters

        ----------

        indices : bool, default=False

            If True, the return value will be an array of integers, rather

            than a boolean mask.

        Returns

        -------

        support : array

            An index that selects the retained features from a feature vector.

            If `indices` is False, this is a boolean array of shape

            [# input features], in which an element is True iff its

            corresponding feature is selected for retention. If `indices` is

            True, this is an integer array of shape [# output features] whose

            values are indices into the input feature vector.

        """

        mask = self._get_support_mask()

        return mask if not indices else np.where(mask)[0]

inverse_transform

def inverse_transform(
    self,
    X
)

Reverse the transformation operation

Parameters

X : array of shape [n_samples, n_selected_features] The input samples.

Returns

X_r : array of shape [n_samples, n_original_features] X with columns of zeros inserted where features would have been removed by :meth:transform.

View Source

    def inverse_transform(self, X):

        """

        Reverse the transformation operation

        Parameters

        ----------

        X : array of shape [n_samples, n_selected_features]

            The input samples.

        Returns

        -------

        X_r : array of shape [n_samples, n_original_features]

            `X` with columns of zeros inserted where features would have

            been removed by :meth:`transform`.

        """

        if issparse(X):

            X = X.tocsc()

            # insert additional entries in indptr:

            # e.g. if transform changed indptr from [0 2 6 7] to [0 2 3]

            # col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3]

            it = self.inverse_transform(np.diff(X.indptr).reshape(1, -1))

            col_nonzeros = it.ravel()

            indptr = np.concatenate([[0], np.cumsum(col_nonzeros)])

            Xt = csc_matrix((X.data, X.indices, indptr),

                            shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype)

            return Xt

        support = self.get_support()

        X = check_array(X, dtype=None)

        if support.sum() != X.shape[1]:

            raise ValueError("X has a different shape than during fitting.")

        if X.ndim == 1:

            X = X[None, :]

        Xt = np.zeros((X.shape[0], support.size), dtype=X.dtype)

        Xt[:, support] = X

        return Xt

print_interval_with_class

def print_interval_with_class(
    self
)

Pretty print the relevance intervals and determined feature relevance class

View Source

    def print_interval_with_class(self):

        """

        Pretty print the relevance intervals and determined feature relevance class

        """

        output = ""

        if self.interval_ is None:

            output += "Model is not fitted."

        output += "############## Relevance bounds ##############\n"

        output += "feature: [LB -- UB], relevance class\n"

        for i in range(self.n_features_ + self.lupi_features_):

            if i == self.n_features_:

                output += "########## LUPI Relevance bounds\n"

            output += (

                f"{i:7}: [{self.interval_[i, 0]:1.1f} -- {self.interval_[i, 1]:1.1f}],"

            )

            output += f" {self.relevance_classes_string_[i]}\n"

        return output

score

def score(
    self,
    X,
    y
)

Using fitted model predict points for X and compare to truth y.

Parameters

X : numpy.ndarray y : numpy.ndarray

Returns

Model specific score (0 is worst, 1 is best)

View Source

    def score(self, X, y):

        """

        Using fitted model predict points for `X` and compare to truth `y`.

        Parameters

        ----------

        X : numpy.ndarray

        y : numpy.ndarray

        Returns

        -------

        Model specific score (0 is worst, 1 is best)

        """

        if self.optim_model_:

            return self.optim_model_.score(X, y)

        else:

            raise NotFittedError()

set_params

def set_params(
    self,
    **params
)

Set the parameters of this estimator.

The method works on simple estimators as well as on nested objects (such as :class:~sklearn.pipeline.Pipeline). The latter have parameters of the form <component>__<parameter> so that it's possible to update each component of a nested object.

Parameters

**params : dict Estimator parameters.

Returns

self : estimator instance Estimator instance.

View Source

    def set_params(self, **params):

        """

        Set the parameters of this estimator.

        The method works on simple estimators as well as on nested objects

        (such as :class:`~sklearn.pipeline.Pipeline`). The latter have

        parameters of the form ``<component>__<parameter>`` so that it's

        possible to update each component of a nested object.

        Parameters

        ----------

        **params : dict

            Estimator parameters.

        Returns

        -------

        self : estimator instance

            Estimator instance.

        """

        if not params:

            # Simple optimization to gain speed (inspect is slow)

            return self

        valid_params = self.get_params(deep=True)

        nested_params = defaultdict(dict)  # grouped by prefix

        for key, value in params.items():

            key, delim, sub_key = key.partition('__')

            if key not in valid_params:

                raise ValueError('Invalid parameter %s for estimator %s. '

                                 'Check the list of available parameters '

                                 'with `estimator.get_params().keys()`.' %

                                 (key, self))

            if delim:

                nested_params[key][sub_key] = value

            else:

                setattr(self, key, value)

                valid_params[key] = value

        for key, sub_params in nested_params.items():

            valid_params[key].set_params(**sub_params)

        return self

transform

def transform(
    self,
    X
)

Reduce X to the selected features.

Parameters

X : array of shape [n_samples, n_features] The input samples.

Returns

X_r : array of shape [n_samples, n_selected_features] The input samples with only the selected features.

View Source

    def transform(self, X):

        """Reduce X to the selected features.

        Parameters

        ----------

        X : array of shape [n_samples, n_features]

            The input samples.

        Returns

        -------

        X_r : array of shape [n_samples, n_selected_features]

            The input samples with only the selected features.

        """

        # note: we use _safe_tags instead of _get_tags because this is a

        # public Mixin.

        X = check_array(

            X,

            dtype=None,

            accept_sparse="csr",

            force_all_finite=not _safe_tags(self, key="allow_nan"),

        )

        mask = self.get_support()

        if not mask.any():

            warn("No features were selected: either the data is"

                 " too noisy or the selection test too strict.",

                 UserWarning)

            return np.empty(0).reshape((X.shape[0], 0))

        if len(mask) != X.shape[1]:

            raise ValueError("X has a different shape than during fitting.")

        return X[:, safe_mask(X, mask)]

ProblemName

class ProblemName(
    /,
    *args,
    **kwargs
)

Enum which contains usable models for which feature relevance intervals can be computed in :func:~FRI. Values of enums contains class of model and data generation method found in external library arfs_gen.

View Source

class ProblemName(Enum):

    """

    Enum which contains usable models for which feature relevance intervals can be computed in :func:`~FRI`.

    Values of enums contains class of model and data generation method found in external library `arfs_gen`.

    """

    CLASSIFICATION = [fri.model.Classification, arfs_gen.ProblemName.CLASSIFICATION]

    REGRESSION = [fri.model.Regression, arfs_gen.ProblemName.REGRESSION]

    ORDINALREGRESSION = [

        fri.model.OrdinalRegression,

        arfs_gen.ProblemName.ORDINALREGRESSION,

    ]

    LUPI_CLASSIFICATION = [

        fri.model.LUPI_Classification,

        arfs_gen.ProblemName.LUPI_CLASSIFICATION,

    ]

    LUPI_REGRESSION = [fri.model.LUPI_Regression, arfs_gen.ProblemName.LUPI_REGRESSION]

    LUPI_ORDREGRESSION = [

        fri.model.LUPI_OrdinalRegression,

        arfs_gen.ProblemName.LUPI_ORDREGRESSION,

    ]

Ancestors (in MRO)

enum.Enum

Class variables

CLASSIFICATION

LUPI_CLASSIFICATION

LUPI_ORDREGRESSION

LUPI_REGRESSION

ORDINALREGRESSION

REGRESSION

name

value