Module fri.model.classification

View Source

import cvxpy as cvx

import numpy as np

from sklearn import preprocessing

from sklearn.metrics import fbeta_score, classification_report

from sklearn.preprocessing import LabelEncoder

from sklearn.utils import check_X_y

from sklearn.utils.multiclass import unique_labels

from fri.model.base_cvxproblem import Relevance_CVXProblem

from fri.model.base_initmodel import InitModel

from .base_type import ProblemType

class Classification(ProblemType):

    @classmethod

    def parameters(cls):

        return ["C"]

    @property

    def get_initmodel_template(cls):

        return Classification_SVM

    @property

    def get_cvxproblem_template(cls):

        return Classification_Relevance_Bound

    def relax_factors(cls):

        return ["loss_slack", "w_l1_slack"]

    def preprocessing(self, data, **kwargs):

        X, y = data

        # Check that X and y have correct shape

        X, y = check_X_y(X, y)

        # Store the classes seen during fit

        classes_ = unique_labels(y)

        if len(classes_) > 2:

            raise ValueError("Only binary class data supported")

        # Negative class is set to -1 for decision surface

        y = preprocessing.LabelEncoder().fit_transform(y)

        y[y == 0] = -1

        return X, y

class Classification_SVM(InitModel):

    def __init__(self, C=1):

        super().__init__()

        self.C = C

    def fit(self, X, y, **kwargs):

        (n, d) = X.shape

        C = self.get_params()["C"]

        w = cvx.Variable(shape=(d), name="w")

        slack = cvx.Variable(shape=(n), name="slack")

        b = cvx.Variable(name="bias")

        objective = cvx.Minimize(cvx.norm(w, 1) + C * cvx.sum(slack))

        constraints = [cvx.multiply(y.T, X @ w + b) >= 1 - slack, slack >= 0]

        # Solve problem.

        problem = cvx.Problem(objective, constraints)

        problem.solve(**self.SOLVER_PARAMS)

        w = w.value

        b = b.value

        slack = np.asarray(slack.value).flatten()

        self.model_state = {"w": w, "b": b, "slack": slack}

        loss = np.sum(slack)

        w_l1 = np.linalg.norm(w, ord=1)

        self.constraints = {"loss": loss, "w_l1": w_l1}

        return self

    def predict(self, X):

        w = self.model_state["w"]

        b = self.model_state["b"]

        y = np.dot(X, w) + b >= 0

        y = y.astype(int)

        y[y == 0] = -1

        return y

    def score(self, X, y, **kwargs):

        prediction = self.predict(X)

        # Negative class is set to -1 for decision surface

        y = LabelEncoder().fit_transform(y)

        y[y == 0] = -1

        # Using weighted f1 score to have a stable score for imbalanced datasets

        score = fbeta_score(y, prediction, beta=1, average="weighted")

        if "verbose" in kwargs:

            return classification_report(y, prediction)

        return score

class Classification_Relevance_Bound(Relevance_CVXProblem):

    def init_objective_UB(self, sign=None, **kwargs):

        self.add_constraint(

            self.feature_relevance <= sign * self.w[self.current_feature]

        )

        self._objective = cvx.Maximize(self.feature_relevance)

    def init_objective_LB(self, **kwargs):

        self.add_constraint(

            cvx.abs(self.w[self.current_feature]) <= self.feature_relevance

        )

        self._objective = cvx.Minimize(self.feature_relevance)

    def _init_constraints(self, parameters, init_model_constraints):

        # Upper constraints from initial model

        l1_w = init_model_constraints["w_l1"]

        init_loss = init_model_constraints["loss"]

        C = parameters["C"]

        # New Variables

        self.w = cvx.Variable(shape=(self.d), name="w")

        self.b = cvx.Variable(name="b")

        self.slack = cvx.Variable(shape=(self.n), nonneg=True, name="slack")

        # New Constraints

        distance_from_plane = cvx.multiply(self.y, self.X @ self.w + self.b)

        self.loss = cvx.sum(self.slack)

        self.weight_norm = cvx.norm(self.w, 1)

        self.add_constraint(distance_from_plane >= 1 - self.slack)

        self.add_constraint(self.weight_norm <= l1_w)

        self.add_constraint(C * self.loss <= C * init_loss)

        self.feature_relevance = cvx.Variable(nonneg=True, name="Feature Relevance")

Classes

Classification

class Classification(
    **kwargs
)

Helper class that provides a standard way to create an ABC using inheritance.

View Source

class Classification(ProblemType):

    @classmethod

    def parameters(cls):

        return ["C"]

    @property

    def get_initmodel_template(cls):

        return Classification_SVM

    @property

    def get_cvxproblem_template(cls):

        return Classification_Relevance_Bound

    def relax_factors(cls):

        return ["loss_slack", "w_l1_slack"]

    def preprocessing(self, data, **kwargs):

        X, y = data

        # Check that X and y have correct shape

        X, y = check_X_y(X, y)

        # Store the classes seen during fit

        classes_ = unique_labels(y)

        if len(classes_) > 2:

            raise ValueError("Only binary class data supported")

        # Negative class is set to -1 for decision surface

        y = preprocessing.LabelEncoder().fit_transform(y)

        y[y == 0] = -1

        return X, y

Ancestors (in MRO)

fri.model.base_type.ProblemType
abc.ABC

Static methods

parameters

def parameters(

)

View Source

    @classmethod

    def parameters(cls):

        return ["C"]

Instance variables

get_cvxproblem_template

get_initmodel_template

Methods

get_all_parameters

def get_all_parameters(
    self
)

View Source

    def get_all_parameters(self):

        return {p: self.get_chosen_parameter(p) for p in self.parameters()}

get_all_relax_factors

def get_all_relax_factors(
    self
)

View Source

    def get_all_relax_factors(self):

        return {p: self.get_chosen_relax_factors(p) for p in self.relax_factors()}

get_chosen_parameter

def get_chosen_parameter(
    self,
    p
)

View Source

    def get_chosen_parameter(self, p):

        try:

            return [

                self.chosen_parameters_[p]

            ]  # We return list for param search function

        except:

            # # TODO: rewrite the parameter logic

            # # TODO: move this to subclass

            if p == "scaling_lupi_w":

                # return [0.1, 1, 10, 100, 1000]

                return scipy.stats.reciprocal(a=1e-15, b=1e10)

            # if p == "scaling_lupi_loss":

            #    # value 0>p<1 causes standard svm solution

            #    # p>1 encourages usage of lupi function

            #    return scipy.stats.reciprocal(a=1e-15, b=1e15)

            if p == "C":

                return scipy.stats.reciprocal(a=1e-5, b=1e5)

            if p == "epsilon":

                return [0, 0.001, 0.01, 0.1, 1, 10, 100]

            else:

                return scipy.stats.reciprocal(a=1e-10, b=1e10)

get_chosen_relax_factors

def get_chosen_relax_factors(
    self,
    p
)

View Source

    def get_chosen_relax_factors(self, p):

        try:

            factor = self.relax_factors_[p]

        except KeyError:

            try:

                factor = self.relax_factors_[p + "_slack"]

            except KeyError:

                factor = 0.1

        if factor < 0:

            raise ValueError("Slack Factor multiplier is positive!")

        return factor

get_relaxed_constraints

def get_relaxed_constraints(
    self,
    constraints
)

View Source

    def get_relaxed_constraints(self, constraints):

        return {c: self.relax_constraint(c, v) for c, v in constraints.items()}

postprocessing

def postprocessing(
    self,
    bounds
)

View Source

    def postprocessing(self, bounds):

        return bounds

preprocessing

def preprocessing(
    self,
    data,
    **kwargs
)

View Source

    def preprocessing(self, data, **kwargs):

        X, y = data

        # Check that X and y have correct shape

        X, y = check_X_y(X, y)

        # Store the classes seen during fit

        classes_ = unique_labels(y)

        if len(classes_) > 2:

            raise ValueError("Only binary class data supported")

        # Negative class is set to -1 for decision surface

        y = preprocessing.LabelEncoder().fit_transform(y)

        y[y == 0] = -1

        return X, y

relax_constraint

def relax_constraint(
    self,
    key,
    value
)

View Source

    def relax_constraint(self, key, value):

        return value * (1 + self.get_chosen_relax_factors(key))

relax_factors

def relax_factors(
    cls
)

View Source

    def relax_factors(cls):

        return ["loss_slack", "w_l1_slack"]

Classification_Relevance_Bound

class Classification_Relevance_Bound(
    current_feature: int,
    data: tuple,
    hyperparameters,
    best_model_constraints,
    preset_model=None,
    best_model_state=None,
    probeID=-1,
    **kwargs
)

Helper class that provides a standard way to create an ABC using inheritance.

View Source

class Classification_Relevance_Bound(Relevance_CVXProblem):

    def init_objective_UB(self, sign=None, **kwargs):

        self.add_constraint(

            self.feature_relevance <= sign * self.w[self.current_feature]

        )

        self._objective = cvx.Maximize(self.feature_relevance)

    def init_objective_LB(self, **kwargs):

        self.add_constraint(

            cvx.abs(self.w[self.current_feature]) <= self.feature_relevance

        )

        self._objective = cvx.Minimize(self.feature_relevance)

    def _init_constraints(self, parameters, init_model_constraints):

        # Upper constraints from initial model

        l1_w = init_model_constraints["w_l1"]

        init_loss = init_model_constraints["loss"]

        C = parameters["C"]

        # New Variables

        self.w = cvx.Variable(shape=(self.d), name="w")

        self.b = cvx.Variable(name="b")

        self.slack = cvx.Variable(shape=(self.n), nonneg=True, name="slack")

        # New Constraints

        distance_from_plane = cvx.multiply(self.y, self.X @ self.w + self.b)

        self.loss = cvx.sum(self.slack)

        self.weight_norm = cvx.norm(self.w, 1)

        self.add_constraint(distance_from_plane >= 1 - self.slack)

        self.add_constraint(self.weight_norm <= l1_w)

        self.add_constraint(C * self.loss <= C * init_loss)

        self.feature_relevance = cvx.Variable(nonneg=True, name="Feature Relevance")

Ancestors (in MRO)

fri.model.base_cvxproblem.Relevance_CVXProblem
abc.ABC

Descendants

fri.model.lupi_classification.LUPI_Classification_Relevance_Bound

Static methods

aggregate_max_candidates

def aggregate_max_candidates(
    max_problems_candidates
)

View Source

    @classmethod

    def aggregate_max_candidates(cls, max_problems_candidates):

        vals = [candidate.solved_relevance for candidate in max_problems_candidates]

        max_value = max(vals)

        return max_value

aggregate_min_candidates

def aggregate_min_candidates(
    min_problems_candidates
)

View Source

    @classmethod

    def aggregate_min_candidates(cls, min_problems_candidates):

        vals = [candidate.solved_relevance for candidate in min_problems_candidates]

        min_value = min(vals)

        return min_value

generate_lower_bound_problem

def generate_lower_bound_problem(
    best_hyperparameters,
    init_constraints,
    best_model_state,
    data,
    di,
    preset_model,
    probeID=-1
)

View Source

    @classmethod

    def generate_lower_bound_problem(

        cls,

        best_hyperparameters,

        init_constraints,

        best_model_state,

        data,

        di,

        preset_model,

        probeID=-1,

    ):

        problem = cls(

            di,

            data,

            best_hyperparameters,

            init_constraints,

            preset_model=preset_model,

            best_model_state=best_model_state,

            probeID=probeID,

        )

        problem.init_objective_LB()

        problem.isLowerBound = True

        yield problem

generate_upper_bound_problem

def generate_upper_bound_problem(
    best_hyperparameters,
    init_constraints,
    best_model_state,
    data,
    di,
    preset_model,
    probeID=-1
)

View Source

    @classmethod

    def generate_upper_bound_problem(

        cls,

        best_hyperparameters,

        init_constraints,

        best_model_state,

        data,

        di,

        preset_model,

        probeID=-1,

    ):

        for sign in [-1, 1]:

            problem = cls(

                di,

                data,

                best_hyperparameters,

                init_constraints,

                preset_model=preset_model,

                best_model_state=best_model_state,

                probeID=probeID,

            )

            problem.init_objective_UB(sign=sign)

            problem.isLowerBound = False

            yield problem

Instance variables

accepted_status

constraints

cvx_problem

isProbe

is_solved

objective

probeID

solved_relevance

solver_kwargs

Methods

add_constraint

def add_constraint(
    self,
    new
)

View Source

    def add_constraint(self, new):

        self._constraints.append(new)

init_objective_LB

def init_objective_LB(
    self,
    **kwargs
)

View Source

    def init_objective_LB(self, **kwargs):

        self.add_constraint(

            cvx.abs(self.w[self.current_feature]) <= self.feature_relevance

        )

        self._objective = cvx.Minimize(self.feature_relevance)

init_objective_UB

def init_objective_UB(
    self,
    sign=None,
    **kwargs
)

View Source

    def init_objective_UB(self, sign=None, **kwargs):

        self.add_constraint(

            self.feature_relevance <= sign * self.w[self.current_feature]

        )

        self._objective = cvx.Maximize(self.feature_relevance)

preprocessing_data

def preprocessing_data(
    self,
    data,
    best_model_state
)

View Source

    def preprocessing_data(self, data, best_model_state):

        X, y = data

        self.n = X.shape[0]

        self.d = X.shape[1]

        self.X = X

        self.y = np.array(y)

solve

def solve(
    self
) -> object

View Source

    def solve(self) -> object:

        # We init cvx problem here because pickling LP solver objects is problematic

        # by deferring it to here, worker threads do the problem building themselves and we spare the serialization

        self._cvx_problem = cvx.Problem(

            objective=self.objective, constraints=self.constraints

        )

        try:

            # print("Solve", self)

            self._cvx_problem.solve(**self.solver_kwargs)

        except SolverError:

            # We ignore Solver Errors, which are common with our framework:

            # We solve multiple problems per bound and choose a feasible solution later (see '_create_interval')

            pass

        self._solver_status = self._cvx_problem.status

        # self._cvx_problem = None

        return self

Classification_SVM

class Classification_SVM(
    C=1
)

Helper class that provides a standard way to create an ABC using inheritance.

View Source

class Classification_SVM(InitModel):

    def __init__(self, C=1):

        super().__init__()

        self.C = C

    def fit(self, X, y, **kwargs):

        (n, d) = X.shape

        C = self.get_params()["C"]

        w = cvx.Variable(shape=(d), name="w")

        slack = cvx.Variable(shape=(n), name="slack")

        b = cvx.Variable(name="bias")

        objective = cvx.Minimize(cvx.norm(w, 1) + C * cvx.sum(slack))

        constraints = [cvx.multiply(y.T, X @ w + b) >= 1 - slack, slack >= 0]

        # Solve problem.

        problem = cvx.Problem(objective, constraints)

        problem.solve(**self.SOLVER_PARAMS)

        w = w.value

        b = b.value

        slack = np.asarray(slack.value).flatten()

        self.model_state = {"w": w, "b": b, "slack": slack}

        loss = np.sum(slack)

        w_l1 = np.linalg.norm(w, ord=1)

        self.constraints = {"loss": loss, "w_l1": w_l1}

        return self

    def predict(self, X):

        w = self.model_state["w"]

        b = self.model_state["b"]

        y = np.dot(X, w) + b >= 0

        y = y.astype(int)

        y[y == 0] = -1

        return y

    def score(self, X, y, **kwargs):

        prediction = self.predict(X)

        # Negative class is set to -1 for decision surface

        y = LabelEncoder().fit_transform(y)

        y[y == 0] = -1

        # Using weighted f1 score to have a stable score for imbalanced datasets

        score = fbeta_score(y, prediction, beta=1, average="weighted")

        if "verbose" in kwargs:

            return classification_report(y, prediction)

        return score

Ancestors (in MRO)

fri.model.base_initmodel.InitModel
abc.ABC
sklearn.base.BaseEstimator

Class variables

HYPERPARAMETER

SOLVER_PARAMS

Instance variables

L1_factor

Methods

fit

def fit(
    self,
    X,
    y,
    **kwargs
)

View Source

    def fit(self, X, y, **kwargs):

        (n, d) = X.shape

        C = self.get_params()["C"]

        w = cvx.Variable(shape=(d), name="w")

        slack = cvx.Variable(shape=(n), name="slack")

        b = cvx.Variable(name="bias")

        objective = cvx.Minimize(cvx.norm(w, 1) + C * cvx.sum(slack))

        constraints = [cvx.multiply(y.T, X @ w + b) >= 1 - slack, slack >= 0]

        # Solve problem.

        problem = cvx.Problem(objective, constraints)

        problem.solve(**self.SOLVER_PARAMS)

        w = w.value

        b = b.value

        slack = np.asarray(slack.value).flatten()

        self.model_state = {"w": w, "b": b, "slack": slack}

        loss = np.sum(slack)

        w_l1 = np.linalg.norm(w, ord=1)

        self.constraints = {"loss": loss, "w_l1": w_l1}

        return self

get_params

def get_params(
    self,
    deep=True
)

Get parameters for this estimator.

Parameters

deep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators.

Returns

params : dict Parameter names mapped to their values.

View Source

    def get_params(self, deep=True):

        """

        Get parameters for this estimator.

        Parameters

        ----------

        deep : bool, default=True

            If True, will return the parameters for this estimator and

            contained subobjects that are estimators.

        Returns

        -------

        params : dict

            Parameter names mapped to their values.

        """

        out = dict()

        for key in self._get_param_names():

            value = getattr(self, key)

            if deep and hasattr(value, 'get_params'):

                deep_items = value.get_params().items()

                out.update((key + '__' + k, val) for k, val in deep_items)

            out[key] = value

        return out

make_scorer

def make_scorer(
    self
)

View Source

    def make_scorer(self):

        return None, None

predict

def predict(
    self,
    X
)

View Source

    def predict(self, X):

        w = self.model_state["w"]

        b = self.model_state["b"]

        y = np.dot(X, w) + b >= 0

        y = y.astype(int)

        y[y == 0] = -1

        return y

score

def score(
    self,
    X,
    y,
    **kwargs
)

View Source

    def score(self, X, y, **kwargs):

        prediction = self.predict(X)

        # Negative class is set to -1 for decision surface

        y = LabelEncoder().fit_transform(y)

        y[y == 0] = -1

        # Using weighted f1 score to have a stable score for imbalanced datasets

        score = fbeta_score(y, prediction, beta=1, average="weighted")

        if "verbose" in kwargs:

            return classification_report(y, prediction)

        return score

set_params

def set_params(
    self,
    **params
)

Set the parameters of this estimator.

The method works on simple estimators as well as on nested objects (such as :class:~sklearn.pipeline.Pipeline). The latter have parameters of the form <component>__<parameter> so that it's possible to update each component of a nested object.

Parameters

**params : dict Estimator parameters.

Returns

self : estimator instance Estimator instance.

View Source

    def set_params(self, **params):

        """

        Set the parameters of this estimator.

        The method works on simple estimators as well as on nested objects

        (such as :class:`~sklearn.pipeline.Pipeline`). The latter have

        parameters of the form ``<component>__<parameter>`` so that it's

        possible to update each component of a nested object.

        Parameters

        ----------

        **params : dict

            Estimator parameters.

        Returns

        -------

        self : estimator instance

            Estimator instance.

        """

        if not params:

            # Simple optimization to gain speed (inspect is slow)

            return self

        valid_params = self.get_params(deep=True)

        nested_params = defaultdict(dict)  # grouped by prefix

        for key, value in params.items():

            key, delim, sub_key = key.partition('__')

            if key not in valid_params:

                raise ValueError('Invalid parameter %s for estimator %s. '

                                 'Check the list of available parameters '

                                 'with `estimator.get_params().keys()`.' %

                                 (key, self))

            if delim:

                nested_params[key][sub_key] = value

            else:

                setattr(self, key, value)

                valid_params[key] = value

        for key, sub_params in nested_params.items():

            valid_params[key].set_params(**sub_params)

        return self