Module fri
View Source
import logging
from importlib_metadata import version
try:
__version__ = version(__name__)
except:
pass
logging.basicConfig(level=logging.INFO)
from enum import Enum
import fri.model
import arfs_gen
class ProblemName(Enum):
"""
Enum which contains usable models for which feature relevance intervals can be computed in :func:`~FRI`.
Values of enums contains class of model and data generation method found in external library `arfs_gen`.
"""
CLASSIFICATION = [fri.model.Classification, arfs_gen.ProblemName.CLASSIFICATION]
REGRESSION = [fri.model.Regression, arfs_gen.ProblemName.REGRESSION]
ORDINALREGRESSION = [
fri.model.OrdinalRegression,
arfs_gen.ProblemName.ORDINALREGRESSION,
]
LUPI_CLASSIFICATION = [
fri.model.LUPI_Classification,
arfs_gen.ProblemName.LUPI_CLASSIFICATION,
]
LUPI_REGRESSION = [fri.model.LUPI_Regression, arfs_gen.ProblemName.LUPI_REGRESSION]
LUPI_ORDREGRESSION = [
fri.model.LUPI_OrdinalRegression,
arfs_gen.ProblemName.LUPI_ORDREGRESSION,
]
NORMAL_MODELS = [
ProblemName.CLASSIFICATION,
ProblemName.REGRESSION,
ProblemName.ORDINALREGRESSION,
]
LUPI_MODELS = [
ProblemName.LUPI_CLASSIFICATION,
ProblemName.LUPI_REGRESSION,
ProblemName.LUPI_ORDREGRESSION,
]
from arfs_gen import genRegressionData, genClassificationData, genOrdinalRegressionData
def quick_generate(problemtype, **kwargs):
"Overwrite arfs_gen method to handle different format of problemtype in fri"
return arfs_gen.quick_generate(problemtype.value[1], **kwargs)
def genLupiData(problemname, **kwargs):
"Overwrite arfs_gen method to handle different format of problemtype in fri"
return arfs_gen.genLupiData(problemname.value[1], **kwargs)
from fri.main import FRIBase
from fri.plot import plot_intervals
class FRI(FRIBase):
def __init__(
self,
problemName: object,
random_state: object = None,
n_jobs: int = 1,
verbose: int = 0,
n_param_search: int = 10,
n_probe_features: int = 20,
w_l1_slack: float = 0.001,
loss_slack: float = 0.001,
normalize: bool = True,
**kwargs,
):
"""
Main class to use `FRI` in programatic fashion following the scikit-learn paradigm.
Parameters
----------
problemName: `ProblemName` or str
Type of Problem as enum value or explicit string (e.g. "classification")
random_state: object or int
Random state object or int
n_jobs: int or None
Number of threads or -1 for automatic.
verbose: int
Verbosity if > 0
n_param_search: int
Number of parameter samples in random search for hyperparameters.
n_probe_features: int
Number of probes to generate to improve feature selection.
w_l1_slack: float
Allow deviation from optimal L1 norm.
loss_slack: float
Allow deviation of loss.
normalize: boolean
Normalize relevace bounds to range of [0,1] depending on L1 norm.
"""
self.problemName = problemName
if isinstance(problemName, ProblemName):
problemtype = problemName.value
else:
if problemName == "classification" or problemName == "class":
problemtype = ProblemName.CLASSIFICATION
elif problemName == "regression" or problemName == "reg":
problemtype = ProblemName.REGRESSION
elif problemName == "ordinalregression" or problemName == "ordreg":
problemtype = ProblemName.ORDINALREGRESSION
elif problemName == "lupi_classification" or problemName == "lupi_class":
problemtype = ProblemName.LUPI_CLASSIFICATION
if problemtype is None:
names = [enum.name.lower() for enum in ProblemName]
print(
f"Parameter 'problemName' was not recognized or unset. Try one of {names}."
)
else:
problem_class = problemtype[0]
super().__init__(
problem_class,
random_state=random_state,
n_jobs=n_jobs,
verbose=verbose,
n_param_search=n_param_search,
n_probe_features=n_probe_features,
w_l1_slack=w_l1_slack,
loss_slack=loss_slack,
normalize=normalize,
**kwargs,
)
__all__ = [
"genRegressionData",
"genClassificationData",
"genOrdinalRegressionData",
"quick_generate",
"plot_intervals",
"ProblemName",
"FRI",
"LUPI_MODELS",
"NORMAL_MODELS",
"genLupiData",
]
Sub-modules
Variables
LUPI_MODELS
NORMAL_MODELS
Functions
genClassificationData
def genClassificationData(
n_samples: int = 100,
n_features: int = 2,
n_redundant: int = 0,
n_strel: int = 1,
n_repeated: int = 0,
noise: float = 0.1,
flip_y: float = 0,
random_state: object = None,
partition=None,
linear=True
)
Generate synthetic classification data
Parameters
n_samples : int, optional Number of samples n_features : int, optional Number of features n_redundant : int, optional Number of features which are part of redundant subsets (weakly relevant) n_strel : int, optional Number of features which are mandatory for the underlying model (strongly relevant) n_repeated : int, optional Number of features which are clones of existing ones. noise : float Added gaussian noise to data. Parameter scales Std of normal distribution. flip_y : float, optional Ratio of samples randomly switched to wrong class. random_state : object, optional Randomstate object used for generation.
Returns
X : array of shape [n_samples, n_features] The generated samples. y : array of shape [n_samples] The output classes.
Raises
ValueError Description ValueError Wrong parameters for specified amonut of features/samples.
Examples
X,y = genClassificationData(n_samples=200) Generating dataset with d=2,n=200,strongly=1,weakly=0, partition of weakly=None X.shape (200, 2) y.shape (200,)
View Source
def genClassificationData(
n_samples: int = 100,
n_features: int = 2,
n_redundant: int = 0,
n_strel: int = 1,
n_repeated: int = 0,
noise: float = 0.1,
flip_y: float = 0,
random_state: object = None,
partition=None,
linear=True,
):
"""Generate synthetic classification data
Parameters
----------
n_samples : int, optional
Number of samples
n_features : int, optional
Number of features
n_redundant : int, optional
Number of features which are part of redundant subsets (weakly relevant)
n_strel : int, optional
Number of features which are mandatory for the underlying model (strongly relevant)
n_repeated : int, optional
Number of features which are clones of existing ones.
noise : float
Added gaussian noise to data. Parameter scales Std of normal distribution.
flip_y : float, optional
Ratio of samples randomly switched to wrong class.
random_state : object, optional
Randomstate object used for generation.
Returns
-------
X : array of shape [n_samples, n_features]
The generated samples.
y : array of shape [n_samples]
The output classes.
Raises
------
ValueError
Description
ValueError
Wrong parameters for specified amonut of features/samples.
Examples
---------
>>> X,y = genClassificationData(n_samples=200)
Generating dataset with d=2,n=200,strongly=1,weakly=0, partition of weakly=None
>>> X.shape
(200, 2)
>>> y.shape
(200,)
"""
_checkParam(**locals())
random_state = check_random_state(random_state)
X = np.zeros((n_samples, n_features))
# Find partitions which defĂne the weakly relevant subsets
if partition is None and n_redundant > 0:
if not linear and n_strel==0 and n_redundant <=4:
raise ValueError(
"Generating non-linear data requires multiple informative features."
"Increase `n_redundant` to 5 or more add at least 1 `n_strel` feature"
)
elif not linear and n_strel==0:
assert n_redundant>4
# We create 2 partitions to have at least 2 informative features for non-linear problem
partition = [2,n_redundant-2]
part_size = 2
else:
partition = [n_redundant]
part_size = 1
elif partition is not None:
part_size = len(partition)
else:
part_size = 0
n_informative = n_strel + part_size
if linear:
X_informative, Y = generate_binary_classification_problem(
n_samples, n_informative, random_state
)
else:
if n_informative < 2:
raise ValueError(
"Generating non-linear data requires more than 1 strongly relevant feature. "
"Specifying 'n_redundant' implicitly requests only 1 strongly relevant feature. "
"Try increasing 'n_strel' to >=1."
)
# Create classif. set with 2 clusters per class
X_informative, Y = make_classification(
n_samples=n_samples,
n_features=n_informative,
n_informative=n_informative,
n_redundant=0,
n_repeated=0,
n_classes=2,
n_clusters_per_class=2,
flip_y=0.00,
class_sep=0.5,
hypercube=True,
shift=0.0,
scale=1.0,
shuffle=False,
random_state=random_state,
)
# We extend X with several types of other features (linear combinations, repeats, random features)
X = _fillVariableSpace(
X_informative,
random_state,
n_features=n_features,
n_redundant=n_redundant,
n_strel=n_strel,
n_repeated=n_repeated,
partition=partition,
)
# Add target noise
if flip_y > 0:
n_flip = int(flip_y * n_samples)
Y[random_state.choice(n_samples, n_flip)] *= -1
# Add gaussian noise to data
X = X + random_state.normal(size=(n_samples, n_features), scale=noise / X.std())
return X, Y
genLupiData
def genLupiData(
problemname,
**kwargs
)
Overwrite arfs_gen method to handle different format of problemtype in fri
View Source
def genLupiData(problemname, **kwargs):
"Overwrite arfs_gen method to handle different format of problemtype in fri"
return arfs_gen.genLupiData(problemname.value[1], **kwargs)
genOrdinalRegressionData
def genOrdinalRegressionData(
n_samples: int = 100,
n_features: int = 2,
n_redundant: int = 0,
n_strel: int = 1,
n_repeated: int = 0,
noise: float = 0.0,
random_state: object = None,
partition=None,
n_target_bins: int = 3
)
Generate ordinal regression data
Parameters
n_samples : int, optional Number of samples n_features : int, optional Number of features n_redundant : int, optional Number of features which are part of redundant subsets (weakly relevant) n_strel : int, optional Number of features which are mandatory for the underlying model (strongly relevant) n_repeated : int, optional Number of features which are clones of existing ones. noise : float, optional Noise of the created samples around ground truth. random_state : object, optional Randomstate object used for generation. n_target_bins : int, optional Number of bins in which the regressional target variable is split to form the ordinal classes
Returns
X : array of shape [n_samples, n_features] The generated samples. y : array of shape [n_samples] The output values (target).
Raises
ValueError Wrong parameters for specified amonut of features/samples.
View Source
def genOrdinalRegressionData(
n_samples: int = 100,
n_features: int = 2,
n_redundant: int = 0,
n_strel: int = 1,
n_repeated: int = 0,
noise: float = 0.0,
random_state: object = None,
partition=None,
n_target_bins: int = 3,
):
"""
Generate ordinal regression data
Parameters
----------
n_samples : int, optional
Number of samples
n_features : int, optional
Number of features
n_redundant : int, optional
Number of features which are part of redundant subsets (weakly relevant)
n_strel : int, optional
Number of features which are mandatory for the underlying model (strongly relevant)
n_repeated : int, optional
Number of features which are clones of existing ones.
noise : float, optional
Noise of the created samples around ground truth.
random_state : object, optional
Randomstate object used for generation.
n_target_bins : int, optional
Number of bins in which the regressional target variable is split to form the ordinal classes
Returns
-------
X : array of shape [n_samples, n_features]
The generated samples.
y : array of shape [n_samples]
The output values (target).
Raises
------
ValueError
Wrong parameters for specified amonut of features/samples.
"""
_checkParam(**locals())
random_state = check_random_state(random_state)
if not n_target_bins > 1:
raise ValueError("At least 2 target bins needed")
# Use normal regression data as starting point
X_regression, Y_regression = genRegressionData(
n_samples=int(n_samples),
n_features=int(n_features),
n_redundant=int(n_redundant),
n_strel=int(n_strel),
n_repeated=int(n_repeated),
noise=0,
random_state=random_state,
partition=partition,
)
bin_size = int(np.floor(n_samples / n_target_bins))
rest = int(n_samples - (bin_size * n_target_bins))
# Sort the target values and rearange the data accordingly
sort_indices = np.argsort(Y_regression)
X = X_regression[sort_indices]
Y = Y_regression[sort_indices]
# Assign ordinal classes as target values
for i in range(n_target_bins):
Y[bin_size * i : bin_size * (i + 1)] = i
# Put non divisable rest into last bin
if rest > 0:
Y[-rest:] = n_target_bins - 1
X, Y = shuffle(X, Y, random_state=random_state)
# Add gaussian noise to data
X = X + random_state.normal(size=(n_samples, n_features), scale=noise)
return X, Y
genRegressionData
def genRegressionData(
n_samples: int = 100,
n_features: int = 2,
n_redundant: int = 0,
n_strel: int = 1,
n_repeated: int = 0,
noise: float = 0.0,
random_state: object = None,
partition=None
) -> object
Generate synthetic regression data
Parameters
n_samples : int, optional Number of samples n_features : int, optional Number of features n_redundant : int, optional Number of features which are part of redundant subsets (weakly relevant) n_strel : int, optional Number of features which are mandatory for the underlying model (strongly relevant) n_repeated : int, optional Number of features which are clones of existing ones. noise : float, optional Noise of the created samples around ground truth. random_state : object, optional Randomstate object used for generation.
Returns
X : array of shape [n_samples, n_features] The generated samples. y : array of shape [n_samples] The output values (target).
Raises
ValueError Wrong parameters for specified amonut of features/samples.
View Source
def genRegressionData(
n_samples: int = 100,
n_features: int = 2,
n_redundant: int = 0,
n_strel: int = 1,
n_repeated: int = 0,
noise: float = 0.0,
random_state: object = None,
partition=None,
) -> object:
"""Generate synthetic regression data
Parameters
----------
n_samples : int, optional
Number of samples
n_features : int, optional
Number of features
n_redundant : int, optional
Number of features which are part of redundant subsets (weakly relevant)
n_strel : int, optional
Number of features which are mandatory for the underlying model (strongly relevant)
n_repeated : int, optional
Number of features which are clones of existing ones.
noise : float, optional
Noise of the created samples around ground truth.
random_state : object, optional
Randomstate object used for generation.
Returns
-------
X : array of shape [n_samples, n_features]
The generated samples.
y : array of shape [n_samples]
The output values (target).
Raises
------
ValueError
Wrong parameters for specified amonut of features/samples.
"""
_checkParam(**locals())
random_state = check_random_state(random_state)
# Find partitions which defĂne the weakly relevant subsets
if partition is None and n_redundant > 0:
partition = [n_redundant]
part_size = 1
elif partition is not None:
part_size = len(partition)
else:
part_size = 0
n_informative = n_strel + part_size
X = random_state.randn(n_samples, n_informative)
ground_truth = np.zeros((n_informative, 1))
ground_truth[:n_informative, :] = 0.3
bias = 0
y = np.dot(X, ground_truth) + bias
# Add noise
if noise > 0.0:
y += random_state.normal(scale=noise, size=y.shape)
X = _fillVariableSpace(
X,
random_state,
n_features=n_features,
n_redundant=n_redundant,
n_strel=n_strel,
n_repeated=n_repeated,
partition=partition,
)
y = np.squeeze(y)
return X, y
plot_intervals
def plot_intervals(
model,
ticklabels=None
)
Plot the relevance intervals.
Parameters
model : FRI model Needs to be fitted before. ticklabels : list of str, optional Strs for ticklabels on x-axis (features)
View Source
def plot_intervals(model, ticklabels=None):
"""Plot the relevance intervals.
Parameters
----------
model : FRI model
Needs to be fitted before.
ticklabels : list of str, optional
Strs for ticklabels on x-axis (features)
"""
if model.interval_ is not None:
plotIntervals(
model.interval_, ticklabels=ticklabels, classes=model.relevance_classes_
)
else:
print("Intervals not computed. Try running fit() function first.")
quick_generate
def quick_generate(
problemtype,
**kwargs
)
Overwrite arfs_gen method to handle different format of problemtype in fri
View Source
def quick_generate(problemtype, **kwargs):
"Overwrite arfs_gen method to handle different format of problemtype in fri"
return arfs_gen.quick_generate(problemtype.value[1], **kwargs)
Classes
FRI
class FRI(
problemName: object,
random_state: object = None,
n_jobs: int = 1,
verbose: int = 0,
n_param_search: int = 10,
n_probe_features: int = 20,
w_l1_slack: float = 0.001,
loss_slack: float = 0.001,
normalize: bool = True,
**kwargs
)
Base class for all estimators in scikit-learn.
Notes
All estimators should specify all the parameters that can be set
at the class level in their __init__
as explicit keyword
arguments (no *args
or **kwargs
).
View Source
class FRI(FRIBase):
def __init__(
self,
problemName: object,
random_state: object = None,
n_jobs: int = 1,
verbose: int = 0,
n_param_search: int = 10,
n_probe_features: int = 20,
w_l1_slack: float = 0.001,
loss_slack: float = 0.001,
normalize: bool = True,
**kwargs,
):
"""
Main class to use `FRI` in programatic fashion following the scikit-learn paradigm.
Parameters
----------
problemName: `ProblemName` or str
Type of Problem as enum value or explicit string (e.g. "classification")
random_state: object or int
Random state object or int
n_jobs: int or None
Number of threads or -1 for automatic.
verbose: int
Verbosity if > 0
n_param_search: int
Number of parameter samples in random search for hyperparameters.
n_probe_features: int
Number of probes to generate to improve feature selection.
w_l1_slack: float
Allow deviation from optimal L1 norm.
loss_slack: float
Allow deviation of loss.
normalize: boolean
Normalize relevace bounds to range of [0,1] depending on L1 norm.
"""
self.problemName = problemName
if isinstance(problemName, ProblemName):
problemtype = problemName.value
else:
if problemName == "classification" or problemName == "class":
problemtype = ProblemName.CLASSIFICATION
elif problemName == "regression" or problemName == "reg":
problemtype = ProblemName.REGRESSION
elif problemName == "ordinalregression" or problemName == "ordreg":
problemtype = ProblemName.ORDINALREGRESSION
elif problemName == "lupi_classification" or problemName == "lupi_class":
problemtype = ProblemName.LUPI_CLASSIFICATION
if problemtype is None:
names = [enum.name.lower() for enum in ProblemName]
print(
f"Parameter 'problemName' was not recognized or unset. Try one of {names}."
)
else:
problem_class = problemtype[0]
super().__init__(
problem_class,
random_state=random_state,
n_jobs=n_jobs,
verbose=verbose,
n_param_search=n_param_search,
n_probe_features=n_probe_features,
w_l1_slack=w_l1_slack,
loss_slack=loss_slack,
normalize=normalize,
**kwargs,
)
Ancestors (in MRO)
- fri.main.FRIBase
- sklearn.base.BaseEstimator
- sklearn.feature_selection._base.SelectorMixin
- sklearn.base.TransformerMixin
Methods
constrained_intervals
def constrained_intervals(
self,
preset: dict
)
Method to return relevance intervals which are constrained using preset ranges or values.
Parameters
preset : dict like, {i:float} or {i:[float,float]} Keys denote feature index, values represent a fixed single value (float) or a range of allowed values (lower and upper bound).
Example: To set feature 0 to a fixed value use
>>> preset = {0: 0.1}
or to use the minimum relevance bound
>>> preset[1] = self.interval_[1, 0]
Returns
array like Relevance bounds with user constraints
View Source
def constrained_intervals(self, preset: dict):
"""
Method to return relevance intervals which are constrained using preset ranges or values.
Parameters
----------
preset : dict like, {i:float} or {i:[float,float]}
Keys denote feature index, values represent a fixed single value (float) or a range of allowed values (lower and upper bound).
Example: To set feature 0 to a fixed value use
>>> preset = {0: 0.1}
or to use the minimum relevance bound
>>> preset[1] = self.interval_[1, 0]
Returns
-------
array like
Relevance bounds with user constraints
"""
# Do we have intervals?
check_is_fitted(self, "interval_")
return self._relevance_bounds_computer.compute_multi_preset_relevance_bounds(
preset=preset, lupi_features=self.lupi_features_
)
fit
def fit(
self,
X,
y,
lupi_features=0,
**kwargs
)
Method to fit model on data.
Parameters
X : numpy.ndarray
y : numpy.ndarray
lupi_features : int
Amount of features which are considered privileged information in X
.
The data is expected to be structured in a way that all lupi features are at the end of the set.
For example lupi_features=1
would denote the last column of X
to be privileged.
kwargs : dict
Dictionary of additional keyword arguments depending on the model
.
Returns
FRIBase
View Source
def fit(self, X, y, lupi_features=0, **kwargs):
"""
Method to fit model on data.
Parameters
----------
X : numpy.ndarray
y : numpy.ndarray
lupi_features : int
Amount of features which are considered privileged information in `X`.
The data is expected to be structured in a way that all lupi features are at the end of the set.
For example `lupi_features=1` would denote the last column of `X` to be privileged.
kwargs : dict
Dictionary of additional keyword arguments depending on the `model`.
Returns
-------
`FRIBase`
"""
self.problem_object_ = self.problem_type(**self.other_args)
self.lupi_features_ = lupi_features
self.n_samples_ = X.shape[0]
self.n_features_ = X.shape[1] - lupi_features
self.optim_model_, best_score = self._fit_baseline(
X, y, lupi_features, **kwargs
)
data = self.problem_object_.preprocessing((X, y), lupi_features=lupi_features)
self._relevance_bounds_computer = RelevanceBoundsIntervals(
data,
self.problem_object_,
self.optim_model_,
self.random_state,
self.n_probe_features,
self.n_jobs,
self.verbose,
normalize=self.normalize,
)
if lupi_features == 0:
(
self.interval_,
feature_classes,
) = self._relevance_bounds_computer.get_normalized_intervals()
else:
(
self.interval_,
feature_classes,
) = self._relevance_bounds_computer.get_normalized_lupi_intervals(
lupi_features=lupi_features
)
self._get_relevance_mask(feature_classes)
# Return the classifier
return self
fit_transform
def fit_transform(
self,
X,
y=None,
**fit_params
)
Fit to data, then transform it.
Fits transformer to X
and y
with optional parameters fit_params
and returns a transformed version of X
.
Parameters
X : array-like of shape (n_samples, n_features) Input samples.
y : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None Target values (None for unsupervised transformations).
**fit_params : dict Additional fit parameters.
Returns
X_new : ndarray array of shape (n_samples, n_features_new) Transformed array.
View Source
def fit_transform(self, X, y=None, **fit_params):
"""
Fit to data, then transform it.
Fits transformer to `X` and `y` with optional parameters `fit_params`
and returns a transformed version of `X`.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Input samples.
y : array-like of shape (n_samples,) or (n_samples, n_outputs), \
default=None
Target values (None for unsupervised transformations).
**fit_params : dict
Additional fit parameters.
Returns
-------
X_new : ndarray array of shape (n_samples, n_features_new)
Transformed array.
"""
# non-optimized default implementation; override when a better
# method is possible for a given clustering algorithm
if y is None:
# fit method of arity 1 (unsupervised transformation)
return self.fit(X, **fit_params).transform(X)
else:
# fit method of arity 2 (supervised transformation)
return self.fit(X, y, **fit_params).transform(X)
get_grouping
def get_grouping(
self,
**kwargs
)
View Source
def get_grouping(self, **kwargs):
check_is_fitted(self, "allrel_prediction_")
groups, link = self._relevance_bounds_computer.grouping(
self.interval_, **kwargs
)
return groups, link
get_params
def get_params(
self,
deep=True
)
Get parameters for this estimator.
Parameters
deep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators.
Returns
params : dict Parameter names mapped to their values.
View Source
def get_params(self, deep=True):
"""
Get parameters for this estimator.
Parameters
----------
deep : bool, default=True
If True, will return the parameters for this estimator and
contained subobjects that are estimators.
Returns
-------
params : dict
Parameter names mapped to their values.
"""
out = dict()
for key in self._get_param_names():
value = getattr(self, key)
if deep and hasattr(value, 'get_params'):
deep_items = value.get_params().items()
out.update((key + '__' + k, val) for k, val in deep_items)
out[key] = value
return out
get_support
def get_support(
self,
indices=False
)
Get a mask, or integer index, of the features selected
Parameters
indices : bool, default=False If True, the return value will be an array of integers, rather than a boolean mask.
Returns
support : array
An index that selects the retained features from a feature vector.
If indices
is False, this is a boolean array of shape
[# input features], in which an element is True iff its
corresponding feature is selected for retention. If indices
is
True, this is an integer array of shape [# output features] whose
values are indices into the input feature vector.
View Source
def get_support(self, indices=False):
"""
Get a mask, or integer index, of the features selected
Parameters
----------
indices : bool, default=False
If True, the return value will be an array of integers, rather
than a boolean mask.
Returns
-------
support : array
An index that selects the retained features from a feature vector.
If `indices` is False, this is a boolean array of shape
[# input features], in which an element is True iff its
corresponding feature is selected for retention. If `indices` is
True, this is an integer array of shape [# output features] whose
values are indices into the input feature vector.
"""
mask = self._get_support_mask()
return mask if not indices else np.where(mask)[0]
inverse_transform
def inverse_transform(
self,
X
)
Reverse the transformation operation
Parameters
X : array of shape [n_samples, n_selected_features] The input samples.
Returns
X_r : array of shape [n_samples, n_original_features]
X
with columns of zeros inserted where features would have
been removed by :meth:transform
.
View Source
def inverse_transform(self, X):
"""
Reverse the transformation operation
Parameters
----------
X : array of shape [n_samples, n_selected_features]
The input samples.
Returns
-------
X_r : array of shape [n_samples, n_original_features]
`X` with columns of zeros inserted where features would have
been removed by :meth:`transform`.
"""
if issparse(X):
X = X.tocsc()
# insert additional entries in indptr:
# e.g. if transform changed indptr from [0 2 6 7] to [0 2 3]
# col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3]
it = self.inverse_transform(np.diff(X.indptr).reshape(1, -1))
col_nonzeros = it.ravel()
indptr = np.concatenate([[0], np.cumsum(col_nonzeros)])
Xt = csc_matrix((X.data, X.indices, indptr),
shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype)
return Xt
support = self.get_support()
X = check_array(X, dtype=None)
if support.sum() != X.shape[1]:
raise ValueError("X has a different shape than during fitting.")
if X.ndim == 1:
X = X[None, :]
Xt = np.zeros((X.shape[0], support.size), dtype=X.dtype)
Xt[:, support] = X
return Xt
print_interval_with_class
def print_interval_with_class(
self
)
Pretty print the relevance intervals and determined feature relevance class
View Source
def print_interval_with_class(self):
"""
Pretty print the relevance intervals and determined feature relevance class
"""
output = ""
if self.interval_ is None:
output += "Model is not fitted."
output += "############## Relevance bounds ##############\n"
output += "feature: [LB -- UB], relevance class\n"
for i in range(self.n_features_ + self.lupi_features_):
if i == self.n_features_:
output += "########## LUPI Relevance bounds\n"
output += (
f"{i:7}: [{self.interval_[i, 0]:1.1f} -- {self.interval_[i, 1]:1.1f}],"
)
output += f" {self.relevance_classes_string_[i]}\n"
return output
score
def score(
self,
X,
y
)
Using fitted model predict points for X
and compare to truth y
.
Parameters
X : numpy.ndarray y : numpy.ndarray
Returns
Model specific score (0 is worst, 1 is best)
View Source
def score(self, X, y):
"""
Using fitted model predict points for `X` and compare to truth `y`.
Parameters
----------
X : numpy.ndarray
y : numpy.ndarray
Returns
-------
Model specific score (0 is worst, 1 is best)
"""
if self.optim_model_:
return self.optim_model_.score(X, y)
else:
raise NotFittedError()
set_params
def set_params(
self,
**params
)
Set the parameters of this estimator.
The method works on simple estimators as well as on nested objects
(such as :class:~sklearn.pipeline.Pipeline
). The latter have
parameters of the form <component>__<parameter>
so that it's
possible to update each component of a nested object.
Parameters
**params : dict Estimator parameters.
Returns
self : estimator instance Estimator instance.
View Source
def set_params(self, **params):
"""
Set the parameters of this estimator.
The method works on simple estimators as well as on nested objects
(such as :class:`~sklearn.pipeline.Pipeline`). The latter have
parameters of the form ``<component>__<parameter>`` so that it's
possible to update each component of a nested object.
Parameters
----------
**params : dict
Estimator parameters.
Returns
-------
self : estimator instance
Estimator instance.
"""
if not params:
# Simple optimization to gain speed (inspect is slow)
return self
valid_params = self.get_params(deep=True)
nested_params = defaultdict(dict) # grouped by prefix
for key, value in params.items():
key, delim, sub_key = key.partition('__')
if key not in valid_params:
raise ValueError('Invalid parameter %s for estimator %s. '
'Check the list of available parameters '
'with `estimator.get_params().keys()`.' %
(key, self))
if delim:
nested_params[key][sub_key] = value
else:
setattr(self, key, value)
valid_params[key] = value
for key, sub_params in nested_params.items():
valid_params[key].set_params(**sub_params)
return self
transform
def transform(
self,
X
)
Reduce X to the selected features.
Parameters
X : array of shape [n_samples, n_features] The input samples.
Returns
X_r : array of shape [n_samples, n_selected_features] The input samples with only the selected features.
View Source
def transform(self, X):
"""Reduce X to the selected features.
Parameters
----------
X : array of shape [n_samples, n_features]
The input samples.
Returns
-------
X_r : array of shape [n_samples, n_selected_features]
The input samples with only the selected features.
"""
# note: we use _safe_tags instead of _get_tags because this is a
# public Mixin.
X = check_array(
X,
dtype=None,
accept_sparse="csr",
force_all_finite=not _safe_tags(self, key="allow_nan"),
)
mask = self.get_support()
if not mask.any():
warn("No features were selected: either the data is"
" too noisy or the selection test too strict.",
UserWarning)
return np.empty(0).reshape((X.shape[0], 0))
if len(mask) != X.shape[1]:
raise ValueError("X has a different shape than during fitting.")
return X[:, safe_mask(X, mask)]
ProblemName
class ProblemName(
/,
*args,
**kwargs
)
Enum which contains usable models for which feature relevance intervals can be computed in :func:~FRI
.
Values of enums contains class of model and data generation method found in external library arfs_gen
.
View Source
class ProblemName(Enum):
"""
Enum which contains usable models for which feature relevance intervals can be computed in :func:`~FRI`.
Values of enums contains class of model and data generation method found in external library `arfs_gen`.
"""
CLASSIFICATION = [fri.model.Classification, arfs_gen.ProblemName.CLASSIFICATION]
REGRESSION = [fri.model.Regression, arfs_gen.ProblemName.REGRESSION]
ORDINALREGRESSION = [
fri.model.OrdinalRegression,
arfs_gen.ProblemName.ORDINALREGRESSION,
]
LUPI_CLASSIFICATION = [
fri.model.LUPI_Classification,
arfs_gen.ProblemName.LUPI_CLASSIFICATION,
]
LUPI_REGRESSION = [fri.model.LUPI_Regression, arfs_gen.ProblemName.LUPI_REGRESSION]
LUPI_ORDREGRESSION = [
fri.model.LUPI_OrdinalRegression,
arfs_gen.ProblemName.LUPI_ORDREGRESSION,
]
Ancestors (in MRO)
- enum.Enum
Class variables
CLASSIFICATION
LUPI_CLASSIFICATION
LUPI_ORDREGRESSION
LUPI_REGRESSION
ORDINALREGRESSION
REGRESSION
name
value