Module fri.parameter_searcher

In this class we use hyperparameter search to find parameters needed in our model. Depending on the input model we sample parameters from a random distribution. The sampling rate can be increased. The model with the best internally defined accuracy is picked. To increase robustness we use cross validation.

View Source
"""

    In this class we use hyperparameter search to find parameters needed in our model.

    Depending on the input model we sample parameters from a random distribution.

    The sampling rate can be increased.

    The model with the best internally defined accuracy is picked.

    To increase robustness we use cross validation.

"""

import warnings

from sklearn.exceptions import FitFailedWarning

warnings.filterwarnings(action="ignore", category=FitFailedWarning)

from pprint import pprint

from typing import Tuple

import numpy as np

from sklearn.model_selection import RandomizedSearchCV

from fri.model.base_initmodel import InitModel

def find_best_model(

    model_template: InitModel,

    hyperparameters: dict,

    data: Tuple[np.ndarray, np.ndarray],

    random_state: np.random.RandomState,

    n_iter: int,

    n_jobs: int,

    verbose: int = 0,

    lupi_features=None,

    kwargs: dict = None,

) -> Tuple[InitModel, float]:

    """

    Search function which wraps `sklearns`  `RandomizedSearchCV` function.

    We use distributions and parameters defined in the `model_template`.

    Parameters

    ----------

    model_template : InitModel

        A model template which is used to fit data.

    hyperparameters : dict

        Dictionary of hyperparameters.

    data : tuple

        Tuple of data (X,y)

    random_state : RandomState

        numpy RandomState object

    n_iter : int

        Amount of search samples.

    n_jobs : int

        Allows multiprocessing with `n_jobs` threads.

    verbose : int

        Allows verbose output when `verbose>0`.

    lupi_features : int

        Amount of lupi_features

    kwargs : dict

        Placeholder, dict to pass into fit functions.

    """

    if lupi_features > 0:

        model = model_template(lupi_features=lupi_features)

    else:

        model = model_template()

    scorer, metric = model.make_scorer()

    if scorer is None:

        refit = True

    else:

        refit = metric

    searcher = RandomizedSearchCV(

        model,

        hyperparameters,

        scoring=scorer,

        random_state=random_state,

        refit=refit,

        cv=3,

        n_iter=n_iter,

        n_jobs=n_jobs,

        error_score=np.nan,

        verbose=verbose,

    )

    X, y = data

    # Ignore warnings for extremely bad model_state (when precision=0)

    with warnings.catch_warnings():

        warnings.simplefilter("ignore")

        searcher.fit(X, y)

    best_model: InitModel = searcher.best_estimator_

    best_score = best_model.score(X, y)

    if verbose > 0:

        print("*" * 20, "Best found baseline model", "*" * 20)

        pprint(best_model)

        print("score: ", best_score)

        for k, v in best_model.constraints.items():

            pprint((f"{k}: {v}"))

        for k, v in best_model.model_state.items():

            if hasattr(v, "shape"):

                pprint((f"{k}: shape {v.shape}"))

            else:

                if "slack" in k:

                    continue

                pprint((f"{k}: {v}"))

        print("*" * 30)

    return best_model, best_score

Functions

find_best_model

def find_best_model(
    model_template: fri.model.base_initmodel.InitModel,
    hyperparameters: dict,
    data: Tuple[numpy.ndarray, numpy.ndarray],
    random_state: numpy.random.mtrand.RandomState,
    n_iter: int,
    n_jobs: int,
    verbose: int = 0,
    lupi_features=None,
    kwargs: dict = None
) -> Tuple[fri.model.base_initmodel.InitModel, float]

Search function which wraps sklearns RandomizedSearchCV function. We use distributions and parameters defined in the model_template.

Parameters

model_template : InitModel A model template which is used to fit data. hyperparameters : dict Dictionary of hyperparameters. data : tuple Tuple of data (X,y) random_state : RandomState numpy RandomState object n_iter : int Amount of search samples. n_jobs : int Allows multiprocessing with n_jobs threads. verbose : int Allows verbose output when verbose>0. lupi_features : int Amount of lupi_features kwargs : dict Placeholder, dict to pass into fit functions.

View Source
def find_best_model(

    model_template: InitModel,

    hyperparameters: dict,

    data: Tuple[np.ndarray, np.ndarray],

    random_state: np.random.RandomState,

    n_iter: int,

    n_jobs: int,

    verbose: int = 0,

    lupi_features=None,

    kwargs: dict = None,

) -> Tuple[InitModel, float]:

    """

    Search function which wraps `sklearns`  `RandomizedSearchCV` function.

    We use distributions and parameters defined in the `model_template`.

    Parameters

    ----------

    model_template : InitModel

        A model template which is used to fit data.

    hyperparameters : dict

        Dictionary of hyperparameters.

    data : tuple

        Tuple of data (X,y)

    random_state : RandomState

        numpy RandomState object

    n_iter : int

        Amount of search samples.

    n_jobs : int

        Allows multiprocessing with `n_jobs` threads.

    verbose : int

        Allows verbose output when `verbose>0`.

    lupi_features : int

        Amount of lupi_features

    kwargs : dict

        Placeholder, dict to pass into fit functions.

    """

    if lupi_features > 0:

        model = model_template(lupi_features=lupi_features)

    else:

        model = model_template()

    scorer, metric = model.make_scorer()

    if scorer is None:

        refit = True

    else:

        refit = metric

    searcher = RandomizedSearchCV(

        model,

        hyperparameters,

        scoring=scorer,

        random_state=random_state,

        refit=refit,

        cv=3,

        n_iter=n_iter,

        n_jobs=n_jobs,

        error_score=np.nan,

        verbose=verbose,

    )

    X, y = data

    # Ignore warnings for extremely bad model_state (when precision=0)

    with warnings.catch_warnings():

        warnings.simplefilter("ignore")

        searcher.fit(X, y)

    best_model: InitModel = searcher.best_estimator_

    best_score = best_model.score(X, y)

    if verbose > 0:

        print("*" * 20, "Best found baseline model", "*" * 20)

        pprint(best_model)

        print("score: ", best_score)

        for k, v in best_model.constraints.items():

            pprint((f"{k}: {v}"))

        for k, v in best_model.model_state.items():

            if hasattr(v, "shape"):

                pprint((f"{k}: shape {v.shape}"))

            else:

                if "slack" in k:

                    continue

                pprint((f"{k}: {v}"))

        print("*" * 30)

    return best_model, best_score