Source code for mastml.metrics

"""
This module contains a metrics class for construction and evaluation of various regression score metrics between
true and model predicted data.

Metrics:
    Class to construct and evaluate a list of regression metrics of interest. The full list of available metrics
    can be obtained from Metrics()._metric_zoo()

"""

import numpy as np
import sklearn.metrics as sm
from sklearn.linear_model import LinearRegression

[docs]class Metrics():
    """
    Class containing access to a wide range of metrics from scikit-learn and a number of MAST-ML custom-written metrics

    Args:
        metrics_list: (list), list of strings of metric names to use

        metrics_type: (str), one of 'regression' or 'classification': whether to use set of common regression/classifier metrics

    Methods:
        evaluate: main method to evaluate the specified metrics and the provided true and pred data
            Args:
                y_true: (pd.Series), series of true y data

                y_pred: (pd.Series), series of predicted y data

            Returns:
                stats_dict: (dict), dictionary of calculated statistics for each metric

        _get_metrics: builds the metrics dict of metric names : metric instances based on the metrics specified in metrics_list
            Args:
                None

            Returns:
                None

        _metric_zoo: method to retrieve full dict of metric names : metric instance pairs
            Args:
                None

            Returns:
                all_metrics (dict), dictionary of all metric names and instances

    """
    def __init__(self, metrics_list, metrics_type='regression'):
        self.metrics_list = metrics_list
        self.metrics_type = metrics_type
        self.metrics_dict = dict()
        if self.metrics_type not in ['regression', 'classification']:
            raise AttributeError('metrics_type must be one of "regression" or "classification"')

[docs]    def evaluate(self, y_true, y_pred):
        # Evaluate all of the metrics between provided y_true and y_pred data
        stats_dict = dict()
        self._get_metrics()
        for metric_name, metric in self.metrics_dict.items():
            stats_dict[metric_name] = metric(y_true, y_pred)
        return stats_dict

    def _get_metrics(self):
        # Take the metric names and get the metric functions
        all_metrics = self._metric_zoo()
        for metric_name in self.metrics_list:
            if metric_name in all_metrics.keys():
                self.metrics_dict[metric_name] = all_metrics[metric_name][1]
            else:
                raise NameError(metric_name, 'is not a valid metric name')
        return

    def _metric_zoo(self):
        if self.metrics_type == 'regression':
            all_metrics =  {'explained_variance':     (True, sm.explained_variance_score),
                            'mean_absolute_error':    (False, sm.mean_absolute_error),
                            'mean_squared_error':     (False, sm.mean_squared_error),
                            'mean_squared_log_error': (False, sm.mean_squared_log_error),
                            'median_absolute_error':  (False, sm.median_absolute_error),
                            'r2_score': (True, sm.r2_score),
                            'r2_score_noint' : (True, r2_score_noint),
                            'r2_score_fitted' : (True, r2_score_fitted),
                            'r2_score_adjusted' : (True, r2_score_adjusted),
                            'root_mean_squared_error' : (False, root_mean_squared_error),
                            'rmse_over_stdev' : (False, rmse_over_stdev)
                            }
        elif self.metrics_type == 'classification':
            print('WARNING: new version of MAST-ML has not yet been reconfigured to handle classification tasks')
            exit()
            all_metrics = {'accuracy':           (True, sm.accuracy_score),
                            'f1_binary':          (True, lambda yt, yp: sm.f1_score(yt, yp, average='binary')),
                            'f1_macro':           (True, lambda yt, yp: sm.f1_score(yt, yp, average='macro')),
                            'f1_micro':           (True, lambda yt, yp: sm.f1_score(yt, yp, average='micro')),
                            'f1_samples':         (True, lambda yt, yp: sm.f1_score(yt, yp, average='samples')),
                            'f1_weighted':        (True, lambda yt, yp: sm.f1_score(yt, yp, average='weighted')),
                            'log_loss':           (False, sm.log_loss),
                            'precision_binary':   (True, lambda yt, yp: sm.precision_score(yt, yp, average='binary')),
                            'precision_macro':    (True, lambda yt, yp: sm.precision_score(yt, yp, average='macro')),
                            'precision_micro':    (True, lambda yt, yp: sm.precision_score(yt, yp, average='micro')),
                            'precision_samples':  (True, lambda yt, yp: sm.precision_score(yt, yp, average='samples')),
                            'precision_weighted': (True, lambda yt, yp: sm.precision_score(yt, yp, average='weighted')),
                            'recall_binary':      (True, lambda yt, yp: sm.recall_score(yt, yp, average='binary')),
                            'recall_macro':       (True, lambda yt, yp: sm.recall_score(yt, yp, average='macro')),
                            'recall_micro':       (True, lambda yt, yp: sm.recall_score(yt, yp, average='micro')),
                            'recall_samples':     (True, lambda yt, yp: sm.recall_score(yt, yp, average='samples')),
                            'recall_weighted':    (True, lambda yt, yp: sm.recall_score(yt, yp, average='weighted')),
                            'roc_auc':            (True, sm.roc_auc_score),
                        }
        return all_metrics

[docs]def r2_score_noint(y_true, y_pred):
    """
    Method that calculates the R^2 value without fitting the y-intercept

    Args:
        y_true: (numpy array), array of true y data values

        y_pred: (numpy array), array of predicted y data values

    Returns:
        (float): score of R^2 with no y-intercept

    """
    lr = LinearRegression(fit_intercept=False)
    y_true = np.array(y_true).reshape(-1,1) # turn it from an n-vector to nx1-matrix
    lr.fit(y_true, y_pred)
    return lr.score(y_true, y_pred)

[docs]def r2_score_fitted(y_true, y_pred):
    """
    Method that calculates the R^2 value

    Args:
        y_true: (numpy array), array of true y data values

        y_pred: (numpy array), array of predicted y data values

    Returns:
        (float): score of R^2

    """
    lr = LinearRegression(fit_intercept=True)
    y_true = np.array(y_true).reshape(-1,1) # turn it from an n-vector to nx1-matrix
    lr.fit(y_true, y_pred)
    return lr.score(y_true, y_pred)

[docs]def root_mean_squared_error(y_true, y_pred):
    """
    Method that calculates the root mean squared error (RMSE)

    Args:
        y_true: (numpy array), array of true y data values

        y_pred: (numpy array), array of predicted y data values

    Returns:
        (float): score of RMSE

    """
    return sm.mean_squared_error(y_true, y_pred)**0.5

[docs]def rmse_over_stdev(y_true, y_pred, train_y=None):
    """
    Method that calculates the root mean squared error (RMSE) of a set of data, divided by the standard deviation of
    the training data set.

    Args:
        y_true: (numpy array), array of true y data values

        y_pred: (numpy array), array of predicted y data values

        train_y: (numpy array), array of training y data values

    Returns:
        (float): score of RMSE divided by standard deviation of training data

    """
    if train_y is not None:
        stdev = np.std(train_y)
    else:
        stdev = np.std(y_true)
    rmse = root_mean_squared_error(y_true, y_pred)
    return rmse / stdev

[docs]def r2_score_adjusted(y_true, y_pred, n_features=None):
    """
    Method that calculates the adjusted R^2 value

    Args:
        y_true: (numpy array), array of true y data values

        y_pred: (numpy array), array of predicted y data values

        n_features: (int), number of features used in the fit

    Returns:
        (float): score of adjusted R^2

    """
    r2 = sm.r2_score(y_true, y_pred)
    # n is sample size
    n = np.array(y_true).shape[0]
    # p is number of features
    p = n_features
    try:
        r2_score_adj = 1 - (((1-r2)*(n-1))/(n-p-1))
    except:
        # No n_features given, just output NaN
        r2_score_adj = 'NaN'
    return r2_score_adj