Module fri.model.lupi_classification
View Source
import cvxpy as cvx
import numpy as np
from sklearn import preprocessing
from sklearn.metrics import fbeta_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import check_X_y
from sklearn.utils.multiclass import unique_labels
from .base_initmodel import InitModel
from .base_lupi import LUPI_Relevance_CVXProblem, split_dataset
from .base_type import ProblemType
from .classification import Classification_Relevance_Bound
class LUPI_Classification(ProblemType):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self._lupi_features = None
@property
def lupi_features(self):
return self._lupi_features
@classmethod
def parameters(cls):
return ["C", "scaling_lupi_w", "scaling_lupi_loss"]
@property
def get_initmodel_template(cls):
return LUPI_Classification_SVM
@property
def get_cvxproblem_template(cls):
return LUPI_Classification_Relevance_Bound
def relax_factors(cls):
return ["loss_slack", "w_l1_slack"]
def preprocessing(self, data, lupi_features=None):
X, y = data
d = X.shape[1]
if lupi_features is None:
raise ValueError("Argument 'lupi_features' missing in fit() call.")
if not isinstance(lupi_features, int):
raise ValueError("Argument 'lupi_features' is not type int.")
if not 0 < lupi_features < d:
raise ValueError(
"Argument 'lupi_features' looks wrong. We need at least 1 priviliged feature (>0) or at least one normal feature."
)
self._lupi_features = lupi_features
# Check that X and y have correct shape
X, y = check_X_y(X, y)
# Store the classes seen during fit
classes_ = unique_labels(y)
if len(classes_) > 2:
raise ValueError("Only binary class data supported")
# Negative class is set to -1 for decision surface
y = preprocessing.LabelEncoder().fit_transform(y)
y[y == 0] = -1
return X, y
class LUPI_Classification_SVM(InitModel):
HYPERPARAMETER = ["C", "scaling_lupi_w", "scaling_lupi_loss"]
def __init__(self, C=1, scaling_lupi_w=1, scaling_lupi_loss=1, lupi_features=None):
super().__init__()
self.lupi_features = lupi_features
self.scaling_lupi_loss = scaling_lupi_loss
self.scaling_lupi_w = scaling_lupi_w
self.C = C
def fit(self, X_combined, y, lupi_features=None):
"""
Parameters
----------
lupi_features : int
Number of features in dataset which are considered privileged information (PI).
PI features are expected to be the last features in the dataset.
"""
if lupi_features is None:
try:
lupi_features = self.lupi_features
self.lupi_features = lupi_features
except:
raise ValueError("No amount of lupi features given.")
X, X_priv = split_dataset(X_combined, self.lupi_features)
(n, d) = X.shape
# Get parameters from CV model without any feature contstraints
C = self.get_params()["C"]
scaling_lupi_w = self.get_params()["scaling_lupi_w"]
scaling_lupi_loss = self.get_params()["scaling_lupi_loss"]
# Initalize Variables in cvxpy
w = cvx.Variable(shape=(d), name="w")
w_priv = cvx.Variable(lupi_features, name="w_priv")
b = cvx.Variable(name="bias")
b_priv = cvx.Variable(name="bias_priv")
# Define functions for better readability
function = X @ w + b
priv_function = X_priv @ w_priv + b_priv
slack = cvx.Variable(shape=(n))
# Combined loss of lupi function and normal slacks, scaled by two constants
loss = scaling_lupi_loss * cvx.sum(priv_function) + cvx.sum(slack)
# L1 norm regularization of both functions with 1 scaling constant
w_l1 = cvx.norm(w, 1)
w_priv_l1 = cvx.norm(w_priv, 1)
weight_regularization = 0.5 * (w_l1 + scaling_lupi_w * w_priv_l1)
constraints = [
cvx.multiply(y.T, function) >= 1 - cvx.multiply(y.T, priv_function) - slack,
priv_function >= 0,
slack >= 0,
]
objective = cvx.Minimize(C * loss + weight_regularization)
# Solve problem.
problem = cvx.Problem(objective, constraints)
problem.solve(**self.SOLVER_PARAMS)
w = w.value
w_priv = w_priv.value
b = b.value
b_priv = b_priv.value
self.model_state = {
"w": w,
"w_priv": w_priv,
"b": b,
"b_priv": b_priv,
"lupi_features": lupi_features, # Number of lupi features in the dataset TODO: Move this somewhere else
}
loss = loss.value
w_l1 = w_l1.value
w_priv_l1 = w_priv_l1.value
self.constraints = {"loss": loss, "w_l1": w_l1, "w_priv_l1": w_priv_l1}
return self
def predict(self, X):
X, X_priv = split_dataset(X, self.lupi_features)
w = self.model_state["w"]
b = self.model_state["b"]
# Simple hyperplane classification rule
f = np.dot(X, w) + b
y = f >= 0
y = y.astype(int)
# Format binary as signed unit vector
y[y == 0] = -1
return y
def score(self, X, y, **kwargs):
prediction = self.predict(X)
# Negative class is set to -1 for decision surface
y = LabelEncoder().fit_transform(y)
y[y == 0] = -1
# Using weighted f1 score to have a stable score for imbalanced datasets
score = fbeta_score(y, prediction, beta=1, average="weighted")
if "verbose" in kwargs:
return classification_report(y, prediction)
return score
class LUPI_Classification_Relevance_Bound(
LUPI_Relevance_CVXProblem, Classification_Relevance_Bound
):
def _init_objective_UB_LUPI(self, sign=None, **kwargs):
self.add_constraint(
self.feature_relevance <= sign * self.w_priv[self.lupi_index]
)
self._objective = cvx.Maximize(self.feature_relevance)
def _init_objective_LB_LUPI(self, **kwargs):
self.add_constraint(
cvx.abs(self.w_priv[self.lupi_index]) <= self.feature_relevance
)
self._objective = cvx.Minimize(self.feature_relevance)
def _init_constraints(self, parameters, init_model_constraints):
# Upper constraints from best initial model
l1_w = init_model_constraints["w_l1"]
l1_priv_w = init_model_constraints["w_priv_l1"]
init_loss = init_model_constraints["loss"]
# New Variables
w = cvx.Variable(shape=(self.d), name="w")
w_priv = cvx.Variable(shape=(self.d_priv), name="w_priv")
b = cvx.Variable(name="b")
b_priv = cvx.Variable(name="b_priv")
slack = cvx.Variable(shape=(self.n))
# New Constraints
function = cvx.multiply(self.y.T, self.X @ w + b)
priv_function = self.X_priv @ w_priv + b_priv
loss = cvx.sum(priv_function) + cvx.sum(slack)
weight_norm = cvx.norm(w, 1)
weight_norm_priv = cvx.norm(w_priv, 1)
self.add_constraint(
function >= 1 - cvx.multiply(self.y.T, priv_function) - slack
)
self.add_constraint(priv_function >= 0)
self.add_constraint(loss <= init_loss)
self.add_constraint(weight_norm + weight_norm_priv <= l1_w + l1_priv_w)
self.add_constraint(slack >= 0)
# Save values for object use later
self.w = w
self.w_priv = w_priv
self.feature_relevance = cvx.Variable(nonneg=True, name="Feature Relevance")
Classes
LUPI_Classification
class LUPI_Classification(
**kwargs
)
Helper class that provides a standard way to create an ABC using inheritance.
View Source
class LUPI_Classification(ProblemType):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self._lupi_features = None
@property
def lupi_features(self):
return self._lupi_features
@classmethod
def parameters(cls):
return ["C", "scaling_lupi_w", "scaling_lupi_loss"]
@property
def get_initmodel_template(cls):
return LUPI_Classification_SVM
@property
def get_cvxproblem_template(cls):
return LUPI_Classification_Relevance_Bound
def relax_factors(cls):
return ["loss_slack", "w_l1_slack"]
def preprocessing(self, data, lupi_features=None):
X, y = data
d = X.shape[1]
if lupi_features is None:
raise ValueError("Argument 'lupi_features' missing in fit() call.")
if not isinstance(lupi_features, int):
raise ValueError("Argument 'lupi_features' is not type int.")
if not 0 < lupi_features < d:
raise ValueError(
"Argument 'lupi_features' looks wrong. We need at least 1 priviliged feature (>0) or at least one normal feature."
)
self._lupi_features = lupi_features
# Check that X and y have correct shape
X, y = check_X_y(X, y)
# Store the classes seen during fit
classes_ = unique_labels(y)
if len(classes_) > 2:
raise ValueError("Only binary class data supported")
# Negative class is set to -1 for decision surface
y = preprocessing.LabelEncoder().fit_transform(y)
y[y == 0] = -1
return X, y
Ancestors (in MRO)
- fri.model.base_type.ProblemType
- abc.ABC
Static methods
parameters
def parameters(
)
View Source
@classmethod
def parameters(cls):
return ["C", "scaling_lupi_w", "scaling_lupi_loss"]
Instance variables
get_cvxproblem_template
get_initmodel_template
lupi_features
Methods
get_all_parameters
def get_all_parameters(
self
)
View Source
def get_all_parameters(self):
return {p: self.get_chosen_parameter(p) for p in self.parameters()}
get_all_relax_factors
def get_all_relax_factors(
self
)
View Source
def get_all_relax_factors(self):
return {p: self.get_chosen_relax_factors(p) for p in self.relax_factors()}
get_chosen_parameter
def get_chosen_parameter(
self,
p
)
View Source
def get_chosen_parameter(self, p):
try:
return [
self.chosen_parameters_[p]
] # We return list for param search function
except:
# # TODO: rewrite the parameter logic
# # TODO: move this to subclass
if p == "scaling_lupi_w":
# return [0.1, 1, 10, 100, 1000]
return scipy.stats.reciprocal(a=1e-15, b=1e10)
# if p == "scaling_lupi_loss":
# # value 0>p<1 causes standard svm solution
# # p>1 encourages usage of lupi function
# return scipy.stats.reciprocal(a=1e-15, b=1e15)
if p == "C":
return scipy.stats.reciprocal(a=1e-5, b=1e5)
if p == "epsilon":
return [0, 0.001, 0.01, 0.1, 1, 10, 100]
else:
return scipy.stats.reciprocal(a=1e-10, b=1e10)
get_chosen_relax_factors
def get_chosen_relax_factors(
self,
p
)
View Source
def get_chosen_relax_factors(self, p):
try:
factor = self.relax_factors_[p]
except KeyError:
try:
factor = self.relax_factors_[p + "_slack"]
except KeyError:
factor = 0.1
if factor < 0:
raise ValueError("Slack Factor multiplier is positive!")
return factor
get_relaxed_constraints
def get_relaxed_constraints(
self,
constraints
)
View Source
def get_relaxed_constraints(self, constraints):
return {c: self.relax_constraint(c, v) for c, v in constraints.items()}
postprocessing
def postprocessing(
self,
bounds
)
View Source
def postprocessing(self, bounds):
return bounds
preprocessing
def preprocessing(
self,
data,
lupi_features=None
)
View Source
def preprocessing(self, data, lupi_features=None):
X, y = data
d = X.shape[1]
if lupi_features is None:
raise ValueError("Argument 'lupi_features' missing in fit() call.")
if not isinstance(lupi_features, int):
raise ValueError("Argument 'lupi_features' is not type int.")
if not 0 < lupi_features < d:
raise ValueError(
"Argument 'lupi_features' looks wrong. We need at least 1 priviliged feature (>0) or at least one normal feature."
)
self._lupi_features = lupi_features
# Check that X and y have correct shape
X, y = check_X_y(X, y)
# Store the classes seen during fit
classes_ = unique_labels(y)
if len(classes_) > 2:
raise ValueError("Only binary class data supported")
# Negative class is set to -1 for decision surface
y = preprocessing.LabelEncoder().fit_transform(y)
y[y == 0] = -1
return X, y
relax_constraint
def relax_constraint(
self,
key,
value
)
View Source
def relax_constraint(self, key, value):
return value * (1 + self.get_chosen_relax_factors(key))
relax_factors
def relax_factors(
cls
)
View Source
def relax_factors(cls):
return ["loss_slack", "w_l1_slack"]
LUPI_Classification_Relevance_Bound
class LUPI_Classification_Relevance_Bound(
current_feature: int,
data: tuple,
hyperparameters,
best_model_constraints,
preset_model=None,
best_model_state=None,
probeID=-1
)
Helper class that provides a standard way to create an ABC using inheritance.
View Source
class LUPI_Classification_Relevance_Bound(
LUPI_Relevance_CVXProblem, Classification_Relevance_Bound
):
def _init_objective_UB_LUPI(self, sign=None, **kwargs):
self.add_constraint(
self.feature_relevance <= sign * self.w_priv[self.lupi_index]
)
self._objective = cvx.Maximize(self.feature_relevance)
def _init_objective_LB_LUPI(self, **kwargs):
self.add_constraint(
cvx.abs(self.w_priv[self.lupi_index]) <= self.feature_relevance
)
self._objective = cvx.Minimize(self.feature_relevance)
def _init_constraints(self, parameters, init_model_constraints):
# Upper constraints from best initial model
l1_w = init_model_constraints["w_l1"]
l1_priv_w = init_model_constraints["w_priv_l1"]
init_loss = init_model_constraints["loss"]
# New Variables
w = cvx.Variable(shape=(self.d), name="w")
w_priv = cvx.Variable(shape=(self.d_priv), name="w_priv")
b = cvx.Variable(name="b")
b_priv = cvx.Variable(name="b_priv")
slack = cvx.Variable(shape=(self.n))
# New Constraints
function = cvx.multiply(self.y.T, self.X @ w + b)
priv_function = self.X_priv @ w_priv + b_priv
loss = cvx.sum(priv_function) + cvx.sum(slack)
weight_norm = cvx.norm(w, 1)
weight_norm_priv = cvx.norm(w_priv, 1)
self.add_constraint(
function >= 1 - cvx.multiply(self.y.T, priv_function) - slack
)
self.add_constraint(priv_function >= 0)
self.add_constraint(loss <= init_loss)
self.add_constraint(weight_norm + weight_norm_priv <= l1_w + l1_priv_w)
self.add_constraint(slack >= 0)
# Save values for object use later
self.w = w
self.w_priv = w_priv
self.feature_relevance = cvx.Variable(nonneg=True, name="Feature Relevance")
Ancestors (in MRO)
- fri.model.base_lupi.LUPI_Relevance_CVXProblem
- fri.model.classification.Classification_Relevance_Bound
- fri.model.base_cvxproblem.Relevance_CVXProblem
- abc.ABC
Static methods
aggregate_max_candidates
def aggregate_max_candidates(
max_problems_candidates
)
View Source
@classmethod
def aggregate_max_candidates(cls, max_problems_candidates):
vals = [candidate.solved_relevance for candidate in max_problems_candidates]
max_value = max(vals)
return max_value
aggregate_min_candidates
def aggregate_min_candidates(
min_problems_candidates
)
View Source
@classmethod
def aggregate_min_candidates(cls, min_problems_candidates):
vals = [candidate.solved_relevance for candidate in min_problems_candidates]
min_value = min(vals)
return min_value
generate_lower_bound_problem
def generate_lower_bound_problem(
best_hyperparameters,
init_constraints,
best_model_state,
data,
di,
preset_model,
probeID=-1
)
View Source
@classmethod
def generate_lower_bound_problem(
cls,
best_hyperparameters,
init_constraints,
best_model_state,
data,
di,
preset_model,
probeID=-1,
):
problem = cls(
di,
data,
best_hyperparameters,
init_constraints,
preset_model=preset_model,
best_model_state=best_model_state,
probeID=probeID,
)
problem.init_objective_LB()
problem.isLowerBound = True
yield problem
generate_upper_bound_problem
def generate_upper_bound_problem(
best_hyperparameters,
init_constraints,
best_model_state,
data,
di,
preset_model,
probeID=-1
)
View Source
@classmethod
def generate_upper_bound_problem(
cls,
best_hyperparameters,
init_constraints,
best_model_state,
data,
di,
preset_model,
probeID=-1,
):
for sign in [-1, 1]:
problem = cls(
di,
data,
best_hyperparameters,
init_constraints,
preset_model=preset_model,
best_model_state=best_model_state,
probeID=probeID,
)
problem.init_objective_UB(sign=sign)
problem.isLowerBound = False
yield problem
Instance variables
accepted_status
constraints
cvx_problem
isProbe
is_solved
objective
probeID
solved_relevance
solver_kwargs
Methods
add_constraint
def add_constraint(
self,
new
)
View Source
def add_constraint(self, new):
self._constraints.append(new)
init_objective_LB
def init_objective_LB(
self,
**kwargs
)
View Source
def init_objective_LB(self, **kwargs):
# We have two models basically with different indexes
if self.isPriv:
self._init_objective_LB_LUPI(**kwargs)
else:
# We call sibling class of our lupi class, which is the normal problem
super().init_objective_LB(**kwargs)
init_objective_UB
def init_objective_UB(
self,
**kwargs
)
View Source
def init_objective_UB(self, **kwargs):
# We have two models basically with different indexes
if self.isPriv:
self._init_objective_UB_LUPI(**kwargs)
else:
# We call sibling class of our lupi class, which is the normal problem
super().init_objective_UB(**kwargs)
preprocessing_data
def preprocessing_data(
self,
data,
best_model_state
)
View Source
def preprocessing_data(self, data, best_model_state):
lupi_features = best_model_state["lupi_features"]
X_combined, y = data
X, X_priv = split_dataset(X_combined, lupi_features)
self.X_priv = X_priv
super().preprocessing_data((X, y), best_model_state)
assert lupi_features == X_priv.shape[1]
self.d_priv = lupi_features
# LUPI model, we need to offset the index
self.lupi_index = self.current_feature - self.d
if self.lupi_index >= 0:
self.isPriv = True
else:
self.isPriv = False
solve
def solve(
self
) -> object
View Source
def solve(self) -> object:
# We init cvx problem here because pickling LP solver objects is problematic
# by deferring it to here, worker threads do the problem building themselves and we spare the serialization
self._cvx_problem = cvx.Problem(
objective=self.objective, constraints=self.constraints
)
try:
# print("Solve", self)
self._cvx_problem.solve(**self.solver_kwargs)
except SolverError:
# We ignore Solver Errors, which are common with our framework:
# We solve multiple problems per bound and choose a feasible solution later (see '_create_interval')
pass
self._solver_status = self._cvx_problem.status
# self._cvx_problem = None
return self
LUPI_Classification_SVM
class LUPI_Classification_SVM(
C=1,
scaling_lupi_w=1,
scaling_lupi_loss=1,
lupi_features=None
)
Helper class that provides a standard way to create an ABC using inheritance.
View Source
class LUPI_Classification_SVM(InitModel):
HYPERPARAMETER = ["C", "scaling_lupi_w", "scaling_lupi_loss"]
def __init__(self, C=1, scaling_lupi_w=1, scaling_lupi_loss=1, lupi_features=None):
super().__init__()
self.lupi_features = lupi_features
self.scaling_lupi_loss = scaling_lupi_loss
self.scaling_lupi_w = scaling_lupi_w
self.C = C
def fit(self, X_combined, y, lupi_features=None):
"""
Parameters
----------
lupi_features : int
Number of features in dataset which are considered privileged information (PI).
PI features are expected to be the last features in the dataset.
"""
if lupi_features is None:
try:
lupi_features = self.lupi_features
self.lupi_features = lupi_features
except:
raise ValueError("No amount of lupi features given.")
X, X_priv = split_dataset(X_combined, self.lupi_features)
(n, d) = X.shape
# Get parameters from CV model without any feature contstraints
C = self.get_params()["C"]
scaling_lupi_w = self.get_params()["scaling_lupi_w"]
scaling_lupi_loss = self.get_params()["scaling_lupi_loss"]
# Initalize Variables in cvxpy
w = cvx.Variable(shape=(d), name="w")
w_priv = cvx.Variable(lupi_features, name="w_priv")
b = cvx.Variable(name="bias")
b_priv = cvx.Variable(name="bias_priv")
# Define functions for better readability
function = X @ w + b
priv_function = X_priv @ w_priv + b_priv
slack = cvx.Variable(shape=(n))
# Combined loss of lupi function and normal slacks, scaled by two constants
loss = scaling_lupi_loss * cvx.sum(priv_function) + cvx.sum(slack)
# L1 norm regularization of both functions with 1 scaling constant
w_l1 = cvx.norm(w, 1)
w_priv_l1 = cvx.norm(w_priv, 1)
weight_regularization = 0.5 * (w_l1 + scaling_lupi_w * w_priv_l1)
constraints = [
cvx.multiply(y.T, function) >= 1 - cvx.multiply(y.T, priv_function) - slack,
priv_function >= 0,
slack >= 0,
]
objective = cvx.Minimize(C * loss + weight_regularization)
# Solve problem.
problem = cvx.Problem(objective, constraints)
problem.solve(**self.SOLVER_PARAMS)
w = w.value
w_priv = w_priv.value
b = b.value
b_priv = b_priv.value
self.model_state = {
"w": w,
"w_priv": w_priv,
"b": b,
"b_priv": b_priv,
"lupi_features": lupi_features, # Number of lupi features in the dataset TODO: Move this somewhere else
}
loss = loss.value
w_l1 = w_l1.value
w_priv_l1 = w_priv_l1.value
self.constraints = {"loss": loss, "w_l1": w_l1, "w_priv_l1": w_priv_l1}
return self
def predict(self, X):
X, X_priv = split_dataset(X, self.lupi_features)
w = self.model_state["w"]
b = self.model_state["b"]
# Simple hyperplane classification rule
f = np.dot(X, w) + b
y = f >= 0
y = y.astype(int)
# Format binary as signed unit vector
y[y == 0] = -1
return y
def score(self, X, y, **kwargs):
prediction = self.predict(X)
# Negative class is set to -1 for decision surface
y = LabelEncoder().fit_transform(y)
y[y == 0] = -1
# Using weighted f1 score to have a stable score for imbalanced datasets
score = fbeta_score(y, prediction, beta=1, average="weighted")
if "verbose" in kwargs:
return classification_report(y, prediction)
return score
Ancestors (in MRO)
- fri.model.base_initmodel.InitModel
- abc.ABC
- sklearn.base.BaseEstimator
Class variables
HYPERPARAMETER
SOLVER_PARAMS
Instance variables
L1_factor
Methods
fit
def fit(
self,
X_combined,
y,
lupi_features=None
)
Parameters
lupi_features : int Number of features in dataset which are considered privileged information (PI). PI features are expected to be the last features in the dataset.
View Source
def fit(self, X_combined, y, lupi_features=None):
"""
Parameters
----------
lupi_features : int
Number of features in dataset which are considered privileged information (PI).
PI features are expected to be the last features in the dataset.
"""
if lupi_features is None:
try:
lupi_features = self.lupi_features
self.lupi_features = lupi_features
except:
raise ValueError("No amount of lupi features given.")
X, X_priv = split_dataset(X_combined, self.lupi_features)
(n, d) = X.shape
# Get parameters from CV model without any feature contstraints
C = self.get_params()["C"]
scaling_lupi_w = self.get_params()["scaling_lupi_w"]
scaling_lupi_loss = self.get_params()["scaling_lupi_loss"]
# Initalize Variables in cvxpy
w = cvx.Variable(shape=(d), name="w")
w_priv = cvx.Variable(lupi_features, name="w_priv")
b = cvx.Variable(name="bias")
b_priv = cvx.Variable(name="bias_priv")
# Define functions for better readability
function = X @ w + b
priv_function = X_priv @ w_priv + b_priv
slack = cvx.Variable(shape=(n))
# Combined loss of lupi function and normal slacks, scaled by two constants
loss = scaling_lupi_loss * cvx.sum(priv_function) + cvx.sum(slack)
# L1 norm regularization of both functions with 1 scaling constant
w_l1 = cvx.norm(w, 1)
w_priv_l1 = cvx.norm(w_priv, 1)
weight_regularization = 0.5 * (w_l1 + scaling_lupi_w * w_priv_l1)
constraints = [
cvx.multiply(y.T, function) >= 1 - cvx.multiply(y.T, priv_function) - slack,
priv_function >= 0,
slack >= 0,
]
objective = cvx.Minimize(C * loss + weight_regularization)
# Solve problem.
problem = cvx.Problem(objective, constraints)
problem.solve(**self.SOLVER_PARAMS)
w = w.value
w_priv = w_priv.value
b = b.value
b_priv = b_priv.value
self.model_state = {
"w": w,
"w_priv": w_priv,
"b": b,
"b_priv": b_priv,
"lupi_features": lupi_features, # Number of lupi features in the dataset TODO: Move this somewhere else
}
loss = loss.value
w_l1 = w_l1.value
w_priv_l1 = w_priv_l1.value
self.constraints = {"loss": loss, "w_l1": w_l1, "w_priv_l1": w_priv_l1}
return self
get_params
def get_params(
self,
deep=True
)
Get parameters for this estimator.
Parameters
deep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators.
Returns
params : dict Parameter names mapped to their values.
View Source
def get_params(self, deep=True):
"""
Get parameters for this estimator.
Parameters
----------
deep : bool, default=True
If True, will return the parameters for this estimator and
contained subobjects that are estimators.
Returns
-------
params : dict
Parameter names mapped to their values.
"""
out = dict()
for key in self._get_param_names():
value = getattr(self, key)
if deep and hasattr(value, 'get_params'):
deep_items = value.get_params().items()
out.update((key + '__' + k, val) for k, val in deep_items)
out[key] = value
return out
make_scorer
def make_scorer(
self
)
View Source
def make_scorer(self):
return None, None
predict
def predict(
self,
X
)
View Source
def predict(self, X):
X, X_priv = split_dataset(X, self.lupi_features)
w = self.model_state["w"]
b = self.model_state["b"]
# Simple hyperplane classification rule
f = np.dot(X, w) + b
y = f >= 0
y = y.astype(int)
# Format binary as signed unit vector
y[y == 0] = -1
return y
score
def score(
self,
X,
y,
**kwargs
)
View Source
def score(self, X, y, **kwargs):
prediction = self.predict(X)
# Negative class is set to -1 for decision surface
y = LabelEncoder().fit_transform(y)
y[y == 0] = -1
# Using weighted f1 score to have a stable score for imbalanced datasets
score = fbeta_score(y, prediction, beta=1, average="weighted")
if "verbose" in kwargs:
return classification_report(y, prediction)
return score
set_params
def set_params(
self,
**params
)
Set the parameters of this estimator.
The method works on simple estimators as well as on nested objects
(such as :class:~sklearn.pipeline.Pipeline
). The latter have
parameters of the form <component>__<parameter>
so that it's
possible to update each component of a nested object.
Parameters
**params : dict Estimator parameters.
Returns
self : estimator instance Estimator instance.
View Source
def set_params(self, **params):
"""
Set the parameters of this estimator.
The method works on simple estimators as well as on nested objects
(such as :class:`~sklearn.pipeline.Pipeline`). The latter have
parameters of the form ``<component>__<parameter>`` so that it's
possible to update each component of a nested object.
Parameters
----------
**params : dict
Estimator parameters.
Returns
-------
self : estimator instance
Estimator instance.
"""
if not params:
# Simple optimization to gain speed (inspect is slow)
return self
valid_params = self.get_params(deep=True)
nested_params = defaultdict(dict) # grouped by prefix
for key, value in params.items():
key, delim, sub_key = key.partition('__')
if key not in valid_params:
raise ValueError('Invalid parameter %s for estimator %s. '
'Check the list of available parameters '
'with `estimator.get_params().keys()`.' %
(key, self))
if delim:
nested_params[key][sub_key] = value
else:
setattr(self, key, value)
valid_params[key] = value
for key, sub_params in nested_params.items():
valid_params[key].set_params(**sub_params)
return self