Skip to content

Module fri.model.lupi_classification

View Source
import cvxpy as cvx

import numpy as np

from sklearn import preprocessing

from sklearn.metrics import fbeta_score, classification_report

from sklearn.preprocessing import LabelEncoder

from sklearn.utils import check_X_y

from sklearn.utils.multiclass import unique_labels

from .base_initmodel import InitModel

from .base_lupi import LUPI_Relevance_CVXProblem, split_dataset

from .base_type import ProblemType

from .classification import Classification_Relevance_Bound

class LUPI_Classification(ProblemType):

    def __init__(self, **kwargs):

        super().__init__(**kwargs)

        self._lupi_features = None

    @property

    def lupi_features(self):

        return self._lupi_features

    @classmethod

    def parameters(cls):

        return ["C", "scaling_lupi_w", "scaling_lupi_loss"]

    @property

    def get_initmodel_template(cls):

        return LUPI_Classification_SVM

    @property

    def get_cvxproblem_template(cls):

        return LUPI_Classification_Relevance_Bound

    def relax_factors(cls):

        return ["loss_slack", "w_l1_slack"]

    def preprocessing(self, data, lupi_features=None):

        X, y = data

        d = X.shape[1]

        if lupi_features is None:

            raise ValueError("Argument 'lupi_features' missing in fit() call.")

        if not isinstance(lupi_features, int):

            raise ValueError("Argument 'lupi_features' is not type int.")

        if not 0 < lupi_features < d:

            raise ValueError(

                "Argument 'lupi_features' looks wrong. We need at least 1 priviliged feature (>0) or at least one normal feature."

            )

        self._lupi_features = lupi_features

        # Check that X and y have correct shape

        X, y = check_X_y(X, y)

        # Store the classes seen during fit

        classes_ = unique_labels(y)

        if len(classes_) > 2:

            raise ValueError("Only binary class data supported")

        # Negative class is set to -1 for decision surface

        y = preprocessing.LabelEncoder().fit_transform(y)

        y[y == 0] = -1

        return X, y

class LUPI_Classification_SVM(InitModel):

    HYPERPARAMETER = ["C", "scaling_lupi_w", "scaling_lupi_loss"]

    def __init__(self, C=1, scaling_lupi_w=1, scaling_lupi_loss=1, lupi_features=None):

        super().__init__()

        self.lupi_features = lupi_features

        self.scaling_lupi_loss = scaling_lupi_loss

        self.scaling_lupi_w = scaling_lupi_w

        self.C = C

    def fit(self, X_combined, y, lupi_features=None):

        """

        Parameters

        ----------

        lupi_features : int

            Number of features in dataset which are considered privileged information (PI).

            PI features are expected to be the last features in the dataset.

        """

        if lupi_features is None:

            try:

                lupi_features = self.lupi_features

                self.lupi_features = lupi_features

            except:

                raise ValueError("No amount of lupi features given.")

        X, X_priv = split_dataset(X_combined, self.lupi_features)

        (n, d) = X.shape

        # Get parameters from CV model without any feature contstraints

        C = self.get_params()["C"]

        scaling_lupi_w = self.get_params()["scaling_lupi_w"]

        scaling_lupi_loss = self.get_params()["scaling_lupi_loss"]

        # Initalize Variables in cvxpy

        w = cvx.Variable(shape=(d), name="w")

        w_priv = cvx.Variable(lupi_features, name="w_priv")

        b = cvx.Variable(name="bias")

        b_priv = cvx.Variable(name="bias_priv")

        # Define functions for better readability

        function = X @ w + b

        priv_function = X_priv @ w_priv + b_priv

        slack = cvx.Variable(shape=(n))

        # Combined loss of lupi function and normal slacks, scaled by two constants

        loss = scaling_lupi_loss * cvx.sum(priv_function) + cvx.sum(slack)

        # L1 norm regularization of both functions with 1 scaling constant

        w_l1 = cvx.norm(w, 1)

        w_priv_l1 = cvx.norm(w_priv, 1)

        weight_regularization = 0.5 * (w_l1 + scaling_lupi_w * w_priv_l1)

        constraints = [

            cvx.multiply(y.T, function) >= 1 - cvx.multiply(y.T, priv_function) - slack,

            priv_function >= 0,

            slack >= 0,

        ]

        objective = cvx.Minimize(C * loss + weight_regularization)

        # Solve problem.

        problem = cvx.Problem(objective, constraints)

        problem.solve(**self.SOLVER_PARAMS)

        w = w.value

        w_priv = w_priv.value

        b = b.value

        b_priv = b_priv.value

        self.model_state = {

            "w": w,

            "w_priv": w_priv,

            "b": b,

            "b_priv": b_priv,

            "lupi_features": lupi_features,  # Number of lupi features in the dataset TODO: Move this somewhere else

        }

        loss = loss.value

        w_l1 = w_l1.value

        w_priv_l1 = w_priv_l1.value

        self.constraints = {"loss": loss, "w_l1": w_l1, "w_priv_l1": w_priv_l1}

        return self

    def predict(self, X):

        X, X_priv = split_dataset(X, self.lupi_features)

        w = self.model_state["w"]

        b = self.model_state["b"]

        # Simple hyperplane classification rule

        f = np.dot(X, w) + b

        y = f >= 0

        y = y.astype(int)

        # Format binary as signed unit vector

        y[y == 0] = -1

        return y

    def score(self, X, y, **kwargs):

        prediction = self.predict(X)

        # Negative class is set to -1 for decision surface

        y = LabelEncoder().fit_transform(y)

        y[y == 0] = -1

        # Using weighted f1 score to have a stable score for imbalanced datasets

        score = fbeta_score(y, prediction, beta=1, average="weighted")

        if "verbose" in kwargs:

            return classification_report(y, prediction)

        return score

class LUPI_Classification_Relevance_Bound(

    LUPI_Relevance_CVXProblem, Classification_Relevance_Bound

):

    def _init_objective_UB_LUPI(self, sign=None, **kwargs):

        self.add_constraint(

            self.feature_relevance <= sign * self.w_priv[self.lupi_index]

        )

        self._objective = cvx.Maximize(self.feature_relevance)

    def _init_objective_LB_LUPI(self, **kwargs):

        self.add_constraint(

            cvx.abs(self.w_priv[self.lupi_index]) <= self.feature_relevance

        )

        self._objective = cvx.Minimize(self.feature_relevance)

    def _init_constraints(self, parameters, init_model_constraints):

        # Upper constraints from best initial model

        l1_w = init_model_constraints["w_l1"]

        l1_priv_w = init_model_constraints["w_priv_l1"]

        init_loss = init_model_constraints["loss"]

        # New Variables

        w = cvx.Variable(shape=(self.d), name="w")

        w_priv = cvx.Variable(shape=(self.d_priv), name="w_priv")

        b = cvx.Variable(name="b")

        b_priv = cvx.Variable(name="b_priv")

        slack = cvx.Variable(shape=(self.n))

        # New Constraints

        function = cvx.multiply(self.y.T, self.X @ w + b)

        priv_function = self.X_priv @ w_priv + b_priv

        loss = cvx.sum(priv_function) + cvx.sum(slack)

        weight_norm = cvx.norm(w, 1)

        weight_norm_priv = cvx.norm(w_priv, 1)

        self.add_constraint(

            function >= 1 - cvx.multiply(self.y.T, priv_function) - slack

        )

        self.add_constraint(priv_function >= 0)

        self.add_constraint(loss <= init_loss)

        self.add_constraint(weight_norm + weight_norm_priv <= l1_w + l1_priv_w)

        self.add_constraint(slack >= 0)

        # Save values for object use later

        self.w = w

        self.w_priv = w_priv

        self.feature_relevance = cvx.Variable(nonneg=True, name="Feature Relevance")

Classes

LUPI_Classification

class LUPI_Classification(
    **kwargs
)

Helper class that provides a standard way to create an ABC using inheritance.

View Source
class LUPI_Classification(ProblemType):

    def __init__(self, **kwargs):

        super().__init__(**kwargs)

        self._lupi_features = None

    @property

    def lupi_features(self):

        return self._lupi_features

    @classmethod

    def parameters(cls):

        return ["C", "scaling_lupi_w", "scaling_lupi_loss"]

    @property

    def get_initmodel_template(cls):

        return LUPI_Classification_SVM

    @property

    def get_cvxproblem_template(cls):

        return LUPI_Classification_Relevance_Bound

    def relax_factors(cls):

        return ["loss_slack", "w_l1_slack"]

    def preprocessing(self, data, lupi_features=None):

        X, y = data

        d = X.shape[1]

        if lupi_features is None:

            raise ValueError("Argument 'lupi_features' missing in fit() call.")

        if not isinstance(lupi_features, int):

            raise ValueError("Argument 'lupi_features' is not type int.")

        if not 0 < lupi_features < d:

            raise ValueError(

                "Argument 'lupi_features' looks wrong. We need at least 1 priviliged feature (>0) or at least one normal feature."

            )

        self._lupi_features = lupi_features

        # Check that X and y have correct shape

        X, y = check_X_y(X, y)

        # Store the classes seen during fit

        classes_ = unique_labels(y)

        if len(classes_) > 2:

            raise ValueError("Only binary class data supported")

        # Negative class is set to -1 for decision surface

        y = preprocessing.LabelEncoder().fit_transform(y)

        y[y == 0] = -1

        return X, y

Ancestors (in MRO)

  • fri.model.base_type.ProblemType
  • abc.ABC

Static methods

parameters
def parameters(

)
View Source
    @classmethod

    def parameters(cls):

        return ["C", "scaling_lupi_w", "scaling_lupi_loss"]

Instance variables

get_cvxproblem_template
get_initmodel_template
lupi_features

Methods

get_all_parameters
def get_all_parameters(
    self
)
View Source
    def get_all_parameters(self):

        return {p: self.get_chosen_parameter(p) for p in self.parameters()}
get_all_relax_factors
def get_all_relax_factors(
    self
)
View Source
    def get_all_relax_factors(self):

        return {p: self.get_chosen_relax_factors(p) for p in self.relax_factors()}
get_chosen_parameter
def get_chosen_parameter(
    self,
    p
)
View Source
    def get_chosen_parameter(self, p):

        try:

            return [

                self.chosen_parameters_[p]

            ]  # We return list for param search function

        except:

            # # TODO: rewrite the parameter logic

            # # TODO: move this to subclass

            if p == "scaling_lupi_w":

                # return [0.1, 1, 10, 100, 1000]

                return scipy.stats.reciprocal(a=1e-15, b=1e10)

            # if p == "scaling_lupi_loss":

            #    # value 0>p<1 causes standard svm solution

            #    # p>1 encourages usage of lupi function

            #    return scipy.stats.reciprocal(a=1e-15, b=1e15)

            if p == "C":

                return scipy.stats.reciprocal(a=1e-5, b=1e5)

            if p == "epsilon":

                return [0, 0.001, 0.01, 0.1, 1, 10, 100]

            else:

                return scipy.stats.reciprocal(a=1e-10, b=1e10)
get_chosen_relax_factors
def get_chosen_relax_factors(
    self,
    p
)
View Source
    def get_chosen_relax_factors(self, p):

        try:

            factor = self.relax_factors_[p]

        except KeyError:

            try:

                factor = self.relax_factors_[p + "_slack"]

            except KeyError:

                factor = 0.1

        if factor < 0:

            raise ValueError("Slack Factor multiplier is positive!")

        return factor
get_relaxed_constraints
def get_relaxed_constraints(
    self,
    constraints
)
View Source
    def get_relaxed_constraints(self, constraints):

        return {c: self.relax_constraint(c, v) for c, v in constraints.items()}
postprocessing
def postprocessing(
    self,
    bounds
)
View Source
    def postprocessing(self, bounds):

        return bounds
preprocessing
def preprocessing(
    self,
    data,
    lupi_features=None
)
View Source
    def preprocessing(self, data, lupi_features=None):

        X, y = data

        d = X.shape[1]

        if lupi_features is None:

            raise ValueError("Argument 'lupi_features' missing in fit() call.")

        if not isinstance(lupi_features, int):

            raise ValueError("Argument 'lupi_features' is not type int.")

        if not 0 < lupi_features < d:

            raise ValueError(

                "Argument 'lupi_features' looks wrong. We need at least 1 priviliged feature (>0) or at least one normal feature."

            )

        self._lupi_features = lupi_features

        # Check that X and y have correct shape

        X, y = check_X_y(X, y)

        # Store the classes seen during fit

        classes_ = unique_labels(y)

        if len(classes_) > 2:

            raise ValueError("Only binary class data supported")

        # Negative class is set to -1 for decision surface

        y = preprocessing.LabelEncoder().fit_transform(y)

        y[y == 0] = -1

        return X, y
relax_constraint
def relax_constraint(
    self,
    key,
    value
)
View Source
    def relax_constraint(self, key, value):

        return value * (1 + self.get_chosen_relax_factors(key))
relax_factors
def relax_factors(
    cls
)
View Source
    def relax_factors(cls):

        return ["loss_slack", "w_l1_slack"]

LUPI_Classification_Relevance_Bound

class LUPI_Classification_Relevance_Bound(
    current_feature: int,
    data: tuple,
    hyperparameters,
    best_model_constraints,
    preset_model=None,
    best_model_state=None,
    probeID=-1
)

Helper class that provides a standard way to create an ABC using inheritance.

View Source
class LUPI_Classification_Relevance_Bound(

    LUPI_Relevance_CVXProblem, Classification_Relevance_Bound

):

    def _init_objective_UB_LUPI(self, sign=None, **kwargs):

        self.add_constraint(

            self.feature_relevance <= sign * self.w_priv[self.lupi_index]

        )

        self._objective = cvx.Maximize(self.feature_relevance)

    def _init_objective_LB_LUPI(self, **kwargs):

        self.add_constraint(

            cvx.abs(self.w_priv[self.lupi_index]) <= self.feature_relevance

        )

        self._objective = cvx.Minimize(self.feature_relevance)

    def _init_constraints(self, parameters, init_model_constraints):

        # Upper constraints from best initial model

        l1_w = init_model_constraints["w_l1"]

        l1_priv_w = init_model_constraints["w_priv_l1"]

        init_loss = init_model_constraints["loss"]

        # New Variables

        w = cvx.Variable(shape=(self.d), name="w")

        w_priv = cvx.Variable(shape=(self.d_priv), name="w_priv")

        b = cvx.Variable(name="b")

        b_priv = cvx.Variable(name="b_priv")

        slack = cvx.Variable(shape=(self.n))

        # New Constraints

        function = cvx.multiply(self.y.T, self.X @ w + b)

        priv_function = self.X_priv @ w_priv + b_priv

        loss = cvx.sum(priv_function) + cvx.sum(slack)

        weight_norm = cvx.norm(w, 1)

        weight_norm_priv = cvx.norm(w_priv, 1)

        self.add_constraint(

            function >= 1 - cvx.multiply(self.y.T, priv_function) - slack

        )

        self.add_constraint(priv_function >= 0)

        self.add_constraint(loss <= init_loss)

        self.add_constraint(weight_norm + weight_norm_priv <= l1_w + l1_priv_w)

        self.add_constraint(slack >= 0)

        # Save values for object use later

        self.w = w

        self.w_priv = w_priv

        self.feature_relevance = cvx.Variable(nonneg=True, name="Feature Relevance")

Ancestors (in MRO)

  • fri.model.base_lupi.LUPI_Relevance_CVXProblem
  • fri.model.classification.Classification_Relevance_Bound
  • fri.model.base_cvxproblem.Relevance_CVXProblem
  • abc.ABC

Static methods

aggregate_max_candidates
def aggregate_max_candidates(
    max_problems_candidates
)
View Source
    @classmethod

    def aggregate_max_candidates(cls, max_problems_candidates):

        vals = [candidate.solved_relevance for candidate in max_problems_candidates]

        max_value = max(vals)

        return max_value
aggregate_min_candidates
def aggregate_min_candidates(
    min_problems_candidates
)
View Source
    @classmethod

    def aggregate_min_candidates(cls, min_problems_candidates):

        vals = [candidate.solved_relevance for candidate in min_problems_candidates]

        min_value = min(vals)

        return min_value
generate_lower_bound_problem
def generate_lower_bound_problem(
    best_hyperparameters,
    init_constraints,
    best_model_state,
    data,
    di,
    preset_model,
    probeID=-1
)
View Source
    @classmethod

    def generate_lower_bound_problem(

        cls,

        best_hyperparameters,

        init_constraints,

        best_model_state,

        data,

        di,

        preset_model,

        probeID=-1,

    ):

        problem = cls(

            di,

            data,

            best_hyperparameters,

            init_constraints,

            preset_model=preset_model,

            best_model_state=best_model_state,

            probeID=probeID,

        )

        problem.init_objective_LB()

        problem.isLowerBound = True

        yield problem
generate_upper_bound_problem
def generate_upper_bound_problem(
    best_hyperparameters,
    init_constraints,
    best_model_state,
    data,
    di,
    preset_model,
    probeID=-1
)
View Source
    @classmethod

    def generate_upper_bound_problem(

        cls,

        best_hyperparameters,

        init_constraints,

        best_model_state,

        data,

        di,

        preset_model,

        probeID=-1,

    ):

        for sign in [-1, 1]:

            problem = cls(

                di,

                data,

                best_hyperparameters,

                init_constraints,

                preset_model=preset_model,

                best_model_state=best_model_state,

                probeID=probeID,

            )

            problem.init_objective_UB(sign=sign)

            problem.isLowerBound = False

            yield problem

Instance variables

accepted_status
constraints
cvx_problem
isProbe
is_solved
objective
probeID
solved_relevance
solver_kwargs

Methods

add_constraint
def add_constraint(
    self,
    new
)
View Source
    def add_constraint(self, new):

        self._constraints.append(new)
init_objective_LB
def init_objective_LB(
    self,
    **kwargs
)
View Source
    def init_objective_LB(self, **kwargs):

        # We have two models basically with different indexes

        if self.isPriv:

            self._init_objective_LB_LUPI(**kwargs)

        else:

            # We call sibling class of our lupi class, which is the normal problem

            super().init_objective_LB(**kwargs)
init_objective_UB
def init_objective_UB(
    self,
    **kwargs
)
View Source
    def init_objective_UB(self, **kwargs):

        # We have two models basically with different indexes

        if self.isPriv:

            self._init_objective_UB_LUPI(**kwargs)

        else:

            # We call sibling class of our lupi class, which is the normal problem

            super().init_objective_UB(**kwargs)
preprocessing_data
def preprocessing_data(
    self,
    data,
    best_model_state
)
View Source
    def preprocessing_data(self, data, best_model_state):

        lupi_features = best_model_state["lupi_features"]

        X_combined, y = data

        X, X_priv = split_dataset(X_combined, lupi_features)

        self.X_priv = X_priv

        super().preprocessing_data((X, y), best_model_state)

        assert lupi_features == X_priv.shape[1]

        self.d_priv = lupi_features

        # LUPI model, we need to offset the index

        self.lupi_index = self.current_feature - self.d

        if self.lupi_index >= 0:

            self.isPriv = True

        else:

            self.isPriv = False
solve
def solve(
    self
) -> object
View Source
    def solve(self) -> object:

        # We init cvx problem here because pickling LP solver objects is problematic

        # by deferring it to here, worker threads do the problem building themselves and we spare the serialization

        self._cvx_problem = cvx.Problem(

            objective=self.objective, constraints=self.constraints

        )

        try:

            # print("Solve", self)

            self._cvx_problem.solve(**self.solver_kwargs)

        except SolverError:

            # We ignore Solver Errors, which are common with our framework:

            # We solve multiple problems per bound and choose a feasible solution later (see '_create_interval')

            pass

        self._solver_status = self._cvx_problem.status

        # self._cvx_problem = None

        return self

LUPI_Classification_SVM

class LUPI_Classification_SVM(
    C=1,
    scaling_lupi_w=1,
    scaling_lupi_loss=1,
    lupi_features=None
)

Helper class that provides a standard way to create an ABC using inheritance.

View Source
class LUPI_Classification_SVM(InitModel):

    HYPERPARAMETER = ["C", "scaling_lupi_w", "scaling_lupi_loss"]

    def __init__(self, C=1, scaling_lupi_w=1, scaling_lupi_loss=1, lupi_features=None):

        super().__init__()

        self.lupi_features = lupi_features

        self.scaling_lupi_loss = scaling_lupi_loss

        self.scaling_lupi_w = scaling_lupi_w

        self.C = C

    def fit(self, X_combined, y, lupi_features=None):

        """

        Parameters

        ----------

        lupi_features : int

            Number of features in dataset which are considered privileged information (PI).

            PI features are expected to be the last features in the dataset.

        """

        if lupi_features is None:

            try:

                lupi_features = self.lupi_features

                self.lupi_features = lupi_features

            except:

                raise ValueError("No amount of lupi features given.")

        X, X_priv = split_dataset(X_combined, self.lupi_features)

        (n, d) = X.shape

        # Get parameters from CV model without any feature contstraints

        C = self.get_params()["C"]

        scaling_lupi_w = self.get_params()["scaling_lupi_w"]

        scaling_lupi_loss = self.get_params()["scaling_lupi_loss"]

        # Initalize Variables in cvxpy

        w = cvx.Variable(shape=(d), name="w")

        w_priv = cvx.Variable(lupi_features, name="w_priv")

        b = cvx.Variable(name="bias")

        b_priv = cvx.Variable(name="bias_priv")

        # Define functions for better readability

        function = X @ w + b

        priv_function = X_priv @ w_priv + b_priv

        slack = cvx.Variable(shape=(n))

        # Combined loss of lupi function and normal slacks, scaled by two constants

        loss = scaling_lupi_loss * cvx.sum(priv_function) + cvx.sum(slack)

        # L1 norm regularization of both functions with 1 scaling constant

        w_l1 = cvx.norm(w, 1)

        w_priv_l1 = cvx.norm(w_priv, 1)

        weight_regularization = 0.5 * (w_l1 + scaling_lupi_w * w_priv_l1)

        constraints = [

            cvx.multiply(y.T, function) >= 1 - cvx.multiply(y.T, priv_function) - slack,

            priv_function >= 0,

            slack >= 0,

        ]

        objective = cvx.Minimize(C * loss + weight_regularization)

        # Solve problem.

        problem = cvx.Problem(objective, constraints)

        problem.solve(**self.SOLVER_PARAMS)

        w = w.value

        w_priv = w_priv.value

        b = b.value

        b_priv = b_priv.value

        self.model_state = {

            "w": w,

            "w_priv": w_priv,

            "b": b,

            "b_priv": b_priv,

            "lupi_features": lupi_features,  # Number of lupi features in the dataset TODO: Move this somewhere else

        }

        loss = loss.value

        w_l1 = w_l1.value

        w_priv_l1 = w_priv_l1.value

        self.constraints = {"loss": loss, "w_l1": w_l1, "w_priv_l1": w_priv_l1}

        return self

    def predict(self, X):

        X, X_priv = split_dataset(X, self.lupi_features)

        w = self.model_state["w"]

        b = self.model_state["b"]

        # Simple hyperplane classification rule

        f = np.dot(X, w) + b

        y = f >= 0

        y = y.astype(int)

        # Format binary as signed unit vector

        y[y == 0] = -1

        return y

    def score(self, X, y, **kwargs):

        prediction = self.predict(X)

        # Negative class is set to -1 for decision surface

        y = LabelEncoder().fit_transform(y)

        y[y == 0] = -1

        # Using weighted f1 score to have a stable score for imbalanced datasets

        score = fbeta_score(y, prediction, beta=1, average="weighted")

        if "verbose" in kwargs:

            return classification_report(y, prediction)

        return score

Ancestors (in MRO)

  • fri.model.base_initmodel.InitModel
  • abc.ABC
  • sklearn.base.BaseEstimator

Class variables

HYPERPARAMETER
SOLVER_PARAMS

Instance variables

L1_factor

Methods

fit
def fit(
    self,
    X_combined,
    y,
    lupi_features=None
)

Parameters

lupi_features : int Number of features in dataset which are considered privileged information (PI). PI features are expected to be the last features in the dataset.

View Source
    def fit(self, X_combined, y, lupi_features=None):

        """

        Parameters

        ----------

        lupi_features : int

            Number of features in dataset which are considered privileged information (PI).

            PI features are expected to be the last features in the dataset.

        """

        if lupi_features is None:

            try:

                lupi_features = self.lupi_features

                self.lupi_features = lupi_features

            except:

                raise ValueError("No amount of lupi features given.")

        X, X_priv = split_dataset(X_combined, self.lupi_features)

        (n, d) = X.shape

        # Get parameters from CV model without any feature contstraints

        C = self.get_params()["C"]

        scaling_lupi_w = self.get_params()["scaling_lupi_w"]

        scaling_lupi_loss = self.get_params()["scaling_lupi_loss"]

        # Initalize Variables in cvxpy

        w = cvx.Variable(shape=(d), name="w")

        w_priv = cvx.Variable(lupi_features, name="w_priv")

        b = cvx.Variable(name="bias")

        b_priv = cvx.Variable(name="bias_priv")

        # Define functions for better readability

        function = X @ w + b

        priv_function = X_priv @ w_priv + b_priv

        slack = cvx.Variable(shape=(n))

        # Combined loss of lupi function and normal slacks, scaled by two constants

        loss = scaling_lupi_loss * cvx.sum(priv_function) + cvx.sum(slack)

        # L1 norm regularization of both functions with 1 scaling constant

        w_l1 = cvx.norm(w, 1)

        w_priv_l1 = cvx.norm(w_priv, 1)

        weight_regularization = 0.5 * (w_l1 + scaling_lupi_w * w_priv_l1)

        constraints = [

            cvx.multiply(y.T, function) >= 1 - cvx.multiply(y.T, priv_function) - slack,

            priv_function >= 0,

            slack >= 0,

        ]

        objective = cvx.Minimize(C * loss + weight_regularization)

        # Solve problem.

        problem = cvx.Problem(objective, constraints)

        problem.solve(**self.SOLVER_PARAMS)

        w = w.value

        w_priv = w_priv.value

        b = b.value

        b_priv = b_priv.value

        self.model_state = {

            "w": w,

            "w_priv": w_priv,

            "b": b,

            "b_priv": b_priv,

            "lupi_features": lupi_features,  # Number of lupi features in the dataset TODO: Move this somewhere else

        }

        loss = loss.value

        w_l1 = w_l1.value

        w_priv_l1 = w_priv_l1.value

        self.constraints = {"loss": loss, "w_l1": w_l1, "w_priv_l1": w_priv_l1}

        return self
get_params
def get_params(
    self,
    deep=True
)

Get parameters for this estimator.

Parameters

deep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators.

Returns

params : dict Parameter names mapped to their values.

View Source
    def get_params(self, deep=True):

        """

        Get parameters for this estimator.

        Parameters

        ----------

        deep : bool, default=True

            If True, will return the parameters for this estimator and

            contained subobjects that are estimators.

        Returns

        -------

        params : dict

            Parameter names mapped to their values.

        """

        out = dict()

        for key in self._get_param_names():

            value = getattr(self, key)

            if deep and hasattr(value, 'get_params'):

                deep_items = value.get_params().items()

                out.update((key + '__' + k, val) for k, val in deep_items)

            out[key] = value

        return out
make_scorer
def make_scorer(
    self
)
View Source
    def make_scorer(self):

        return None, None
predict
def predict(
    self,
    X
)
View Source
    def predict(self, X):

        X, X_priv = split_dataset(X, self.lupi_features)

        w = self.model_state["w"]

        b = self.model_state["b"]

        # Simple hyperplane classification rule

        f = np.dot(X, w) + b

        y = f >= 0

        y = y.astype(int)

        # Format binary as signed unit vector

        y[y == 0] = -1

        return y
score
def score(
    self,
    X,
    y,
    **kwargs
)
View Source
    def score(self, X, y, **kwargs):

        prediction = self.predict(X)

        # Negative class is set to -1 for decision surface

        y = LabelEncoder().fit_transform(y)

        y[y == 0] = -1

        # Using weighted f1 score to have a stable score for imbalanced datasets

        score = fbeta_score(y, prediction, beta=1, average="weighted")

        if "verbose" in kwargs:

            return classification_report(y, prediction)

        return score
set_params
def set_params(
    self,
    **params
)

Set the parameters of this estimator.

The method works on simple estimators as well as on nested objects (such as :class:~sklearn.pipeline.Pipeline). The latter have parameters of the form <component>__<parameter> so that it's possible to update each component of a nested object.

Parameters

**params : dict Estimator parameters.

Returns

self : estimator instance Estimator instance.

View Source
    def set_params(self, **params):

        """

        Set the parameters of this estimator.

        The method works on simple estimators as well as on nested objects

        (such as :class:`~sklearn.pipeline.Pipeline`). The latter have

        parameters of the form ``<component>__<parameter>`` so that it's

        possible to update each component of a nested object.

        Parameters

        ----------

        **params : dict

            Estimator parameters.

        Returns

        -------

        self : estimator instance

            Estimator instance.

        """

        if not params:

            # Simple optimization to gain speed (inspect is slow)

            return self

        valid_params = self.get_params(deep=True)

        nested_params = defaultdict(dict)  # grouped by prefix

        for key, value in params.items():

            key, delim, sub_key = key.partition('__')

            if key not in valid_params:

                raise ValueError('Invalid parameter %s for estimator %s. '

                                 'Check the list of available parameters '

                                 'with `estimator.get_params().keys()`.' %

                                 (key, self))

            if delim:

                nested_params[key][sub_key] = value

            else:

                setattr(self, key, value)

                valid_params[key] = value

        for key, sub_params in nested_params.items():

            valid_params[key].set_params(**sub_params)

        return self