Source code for holisticai.bias.metrics._regression

# Base Imports
import warnings

import numpy as np
import pandas as pd

# utils
from holisticai.utils._formatting import slice_arrays_by_quantile
from holisticai.utils._validation import _check_non_empty, _regression_checks


def _calc_success_rate(group_membership: np.array, threshold=float):
    return (group_membership > threshold).mean()


def success_rate_regression(group_a, group_b, y_pred, threshold=0.50):
    """Success rate (Regression version)

    Calculates the raw success rates for each group.

    Parameters
    ----------
    group_a : array-like
        Group membership vector (binary)
    group_b : array-like
        Group membership vector (binary)
    y_pred : array-like
        Predictions vector (numerical)
    threshold: float, str, optional
        The number above which the result is considered a success. Ranged between 0 and 1.
        Also accepts 'median' and 'mean'.

    Returns
    -------
    dict
        Dictionary with two keys, sr_a and sr_b (success rate for group a and b)
    """
    # Needs to be numpy array or the following operations won't be correct
    if isinstance(threshold, str) and (threshold not in {"median", "mean"}):
        msg = "Threshold not recognised"
        raise ValueError(msg)
    if threshold == "median":
        threshold = np.median(y_pred)
    if threshold == "mean":
        threshold = np.mean(y_pred)
    group_a = np.array(group_a)
    group_b = np.array(group_b)
    y_pred = np.array(y_pred)
    group_a_membership = y_pred[group_a == 1]
    group_b_membership = y_pred[group_b == 1]
    sr_a = _calc_success_rate(group_a_membership, threshold)
    sr_b = _calc_success_rate(group_b_membership, threshold)  # success rate group_b
    return {"sr_a": sr_a, "sr_b": sr_b}


[docs] def disparate_impact_regression(group_a, group_b, y_pred, q=0.8): r"""Disparate Impact quantile (Regression version) This function computes the ratio of success rates between group_a and group_b, where sucess means predicted score exceeds a given quantile (default = 0.8). If q is a vector, this function returns a vector with the respective result for each given quantile in q. Interpretation -------------- A value of 1 is desired. Values below 1 are unfair towards group_a. Values above 1 are unfair towards group_b. The range (0.8,1.2) is considered acceptable. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (regression) q : float, array-like, optional quantile of predictions considered, default=0.8 Returns ------- float Disparate Impact (top %) Notes ----- :math:`\frac{sr_a}{sr_b}` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import disparate_impact_regression >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([0.8, 0.9, 0.2, 0.1, 0.7, 0.9, 0.8, 0.6, 0.3, 0.5]) >>> disparate_impact_regression(group_a, group_b, y_pred, q=0.7) 1.5 """ # check and coerce inputs group_a, group_b, y_pred, _, q = _regression_checks(group_a, group_b, y_pred, None, q) # numbers n_a = group_a.sum() n_b = group_b.sum() _, group_a_list, group_b_list = slice_arrays_by_quantile(q, y_pred, [y_pred, group_a, group_b]) n_q = len(group_a_list) disp_impact = np.zeros(n_q) for i in range(n_q): group_a, group_b = group_a_list[i], group_b_list[i] _check_non_empty(group_a, name="group_a", quantile=q[i]) _check_non_empty(group_b, name="group_b", quantile=q[i]) sr_a = group_a.sum() / n_a # success rate group_a sr_b = group_b.sum() / n_b # success rate group_b disp_impact[i] = sr_a / sr_b return np.squeeze(disp_impact)[()]
[docs] def statistical_parity_regression(group_a, group_b, y_pred, q=0.5): """Statistical Parity quantile (Regression version) This function computes the difference of success rates between group_a and\ group_b, where sucess means that the predicted score exceeds a given quantile. If q is a vector, this function returns a vector with the\ respective result for each given quantile in q. Interpretation -------------- A value of 0 is desired. Values below 0 are unfair towards group_a.\ Values above 0 are unfair towards group_b. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (regression) q : float, array-like, optional quantile of predictions considered, default=0.5 Returns ------- float Statistical Parity (top %) : SR_a - SR_b Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import statistical_parity_regression >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([0.8, 0.9, 0.2, 0.1, 0.7, 0.9, 0.8, 0.6, 0.3, 0.5]) >>> statistical_parity_regression(group_a, group_b, y_pred, q=0.7) 0.16666666666666669 """ # check and coerce inputs group_a, group_b, y_pred, _, q = _regression_checks(group_a, group_b, y_pred, None, q) # numbers n_a = group_a.sum() n_b = group_b.sum() _, group_a_list, group_b_list = slice_arrays_by_quantile(q, y_pred, [y_pred, group_a, group_b]) n_q = len(group_a_list) stat_parity = np.zeros(n_q) for i in range(n_q): group_a, group_b = group_a_list[i], group_b_list[i] _check_non_empty(group_a, name="group_a", quantile=q[i]) _check_non_empty(group_b, name="group_b", quantile=q[i]) sr_a = group_a.sum() / n_a # success rate group_a sr_b = group_b.sum() / n_b # success rate group_b stat_parity[i] = sr_a - sr_b return np.squeeze(stat_parity)[()]
[docs] def no_disparate_impact_level(group_a, group_b, y_pred): """No disparate impact level This function computes the maximum score such that thresholding at that score\ does not allow adverse impact. Interpretation -------------- Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (regression) Returns ------- float No disparate impact level Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import no_disparate_impact_level >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([0.8, 0.9, 0.2, 0.1, 0.7, 0.9, 0.8, 0.6, 0.3, 0.5]) >>> no_disparate_impact_level(group_a, group_b, y_pred) 0.7 """ # check and coerce inputs group_a, group_b, y_pred, _, _ = _regression_checks(group_a, group_b, y_pred, None, None) # grid q = np.linspace(1.0, 0.0, 100) # try different values for v in q: pred = np.quantile(y_pred, v) pass_members = y_pred >= pred a = sum(group_a * pass_members) / group_a.sum() b = sum(group_b * pass_members) / group_b.sum() # find score that does not allow adverse impact lower_bound = 0.8 upper_bound = 1.2 if b > 0 and lower_bound < a / b < upper_bound: break return pred
[docs] def avg_score_diff(group_a, group_b, y_pred, q=0): """Average Score Difference This function computes the difference in average scores between\ group_a and group_b. If q is a vector, this function returns a vector with the\ respective result for each given quantile in q. Interpretation -------------- A value of 0 is desired. Negative values indicate the group_a\ has lower average score, so bias against group_a. Positive values\ indicate group_b has lower average score, so bias against group_b.\ Scale is relative to task. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (regression) q : float, array-like, optional quantile of predictions considered, default=0. Returns ------- float Average Score Spread Notes ----- :math:`\texttt{AVgroup_a - AVgroup_b}` Examples ------- >>> import numpy as np >>> from holisticai.bias.metrics import avg_score_diff >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([0.8, 0.9, 0.2, 0.1, 0.7, 0.9, 0.8, 0.6, 0.3, 0.5]) >>> avg_score_diff(group_a, group_b, y_pred) -0.13333333333333341 """ # check and coerce inputs group_a, group_b, y_pred, _, q = _regression_checks(group_a, group_b, y_pred, None, q) y_pred_list, group_a_list, group_b_list = slice_arrays_by_quantile(q, y_pred, [y_pred, group_a, group_b]) n_q = len(group_a_list) avg_score_diff = np.zeros(n_q) for i in range(n_q): group_a, group_b = group_a_list[i], group_b_list[i] _check_non_empty(group_a, name="group_a", quantile=q[i]) _check_non_empty(group_b, name="group_b", quantile=q[i]) y_pred = y_pred_list[i] avgroup_a = y_pred[group_a == 1].mean() avgroup_b = y_pred[group_b == 1].mean() avg_score_diff[i] = avgroup_a - avgroup_b return np.squeeze(avg_score_diff)[()]
def avg_score_ratio(group_a, group_b, y_pred, q=0): """Average Score Ratio This function computes the ratio in average scores between\ group_a and group_b. If q is a vector, this function returns a vector with the\ respective result for each given quantile in q. Interpretation -------------- A value of 1 is desired. Values below 1 indicate the group_a\ has lower average score, so bias against group_a. Values above 1\ indicate group_b has lower average score, so bias against group_b.\ (0.8, 1.25) range is considered fair. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (regression) q : float, array-like, optional quantile of predictions considered, default=0. Returns ------- float Average Score Ratio Notes ----- :math:`\texttt{AVgroup_a / AVgroup_b}` Examples ------- >>> import numpy as np >>> from holisticai.bias.metrics import avg_score_ratio >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([0.8, 0.9, 0.2, 0.1, 0.7, 0.9, 0.8, 0.6, 0.3, 0.5]) >>> avg_score_ratio(group_a, group_b, y_pred) 0.7894736842 """ # check and coerce inputs group_a, group_b, y_pred, _, q = _regression_checks(group_a, group_b, y_pred, None, q) y_pred_list, group_a_list, group_b_list = slice_arrays_by_quantile(q, y_pred, [y_pred, group_a, group_b]) n_q = len(group_a_list) avg_score_ratio = np.zeros(n_q) for i in range(n_q): group_a, group_b = group_a_list[i], group_b_list[i] _check_non_empty(group_a, name="group_a", quantile=q[i]) _check_non_empty(group_b, name="group_b", quantile=q[i]) y_pred = y_pred_list[i] avgroup_a = y_pred[group_a == 1].mean() avgroup_b = y_pred[group_b == 1].mean() avg_score_ratio[i] = avgroup_a / avgroup_b return np.squeeze(avg_score_ratio)[()]
[docs] def zscore_diff(group_a, group_b, y_pred, q=0): """ZScore Difference This function computes the spread in Zscores between\ group_a and group_b. The Zscore is a normalised\ version of Disparate Impact. If q is a vector, this function returns a vector with the\ respective result for each given quantile in q. Interpretation -------------- A value of 0 is desired. The Zscore will approximate the number\ of standard deviations away from the mean. In particular values that\ exceed 2 are statistically significant with 95% probability. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (regression) q : float, array-like, optional quantile of predictions considered, default=0. Returns ------- float ZScore Difference Notes ----- :math:`\frac{\texttt{AVgroup_a} - \texttt{AVgroup_b}}{\texttt{STD_pool}}` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import zscore_diff >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([0.8, 0.9, 0.2, 0.1, 0.2, 0.9, 0.3, 0.6, 0.3, 0.5]) >>> zscore_diff(group_a, group_b, y_pred) 0.1166919931983158 """ # check and coerce inputs group_a, group_b, y_pred, _, q = _regression_checks(group_a, group_b, y_pred, None, q) y_pred_list, group_a_list, group_b_list = slice_arrays_by_quantile(q, y_pred, [y_pred, group_a, group_b]) n_q = len(group_a_list) zscore_diff = np.zeros(n_q) for i in range(n_q): group_a, group_b = group_a_list[i], group_b_list[i] _check_non_empty(group_a, name="group_a", quantile=q[i]) _check_non_empty(group_b, name="group_b", quantile=q[i]) # Get means y_pred = y_pred_list[i] av_group_a = y_pred[group_a == 1].mean() av_group_b = y_pred[group_b == 1].mean() # Get n_a and n_b n_a = group_a.sum() n_b = group_b.sum() # STD pooled std_pool = np.sqrt( ((n_a - 1) * y_pred[group_a == 1].std() ** 2.0 + (n_b - 1) * y_pred[group_b == 1].std() ** 2.0) / (n_b + n_a - 2) ) zscore_diff[i] = (av_group_a - av_group_b) / std_pool return np.squeeze(zscore_diff)[()]
[docs] def statistical_parity_auc(group_a, group_b, y_pred): """Statistical parity (AUC) This function computes the area under the statistical parity\ versus threshold curve. Interpretation -------------- A value of 0 is desired. Values below 0.075 are considered\ acceptable. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (regression) Returns ------- float statistical parity (AUC) Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import statistical_parity_auc >>> group_a = np.array([1] * 50 + [0] * 50) >>> group_b = np.array([0] * 50 + [1] * 50) >>> y_pred = np.concatenate((np.linspace(-1, 1, 50), np.linspace(-1, 1, 50) ** 3)) >>> statistical_parity_auc(group_a, group_b, y_pred) 0.12106666666666668 """ # check and coerce inputs group_a, group_b, y_pred, _, _ = _regression_checks(group_a, group_b, y_pred, None, None) # thresholds thresh = np.linspace(1, 0, 150) pass_value = np.quantile(y_pred, thresh) y_binary = y_pred.reshape(-1, 1) >= pass_value.reshape(1, -1) pass_a = y_binary[group_a == 1].sum(axis=0) / group_a.sum() pass_b = y_binary[group_b == 1].sum(axis=0) / group_b.sum() di_arr = np.abs(pass_a - pass_b) # AUC return np.sum(di_arr * np.array([1 / 150] * 150))
def _weighed_statistical_parity_auc(group_a, group_b, y_pred): """Weighed Statistical parity (AUC) This function computes the area under the statistical\ parity versus threshold curve, weighed by the 2t distribution. Interpretation -------------- A value of 0 is desired. Values below 0.1 are considered\ acceptable. Parameters ---------- group_a : numpy array Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (regression) Returns ------- float Weighed statistical parity (AUC) Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import _weighed_statistical_parity_auc >>> group_a = np.array([1] * 50 + [0] * 50) >>> group_b = np.array([0] * 50 + [1] * 50) >>> y_pred = np.concatenate((np.linspace(-1, 1, 50), np.linspace(-1, 1, 50) ** 3)) >>> _weighed_statistical_parity_auc(group_a, group_b, y_pred) 0.12106666666666666 """ # check and coerce inputs group_a, group_b, y_pred, _, _ = _regression_checks(group_a, group_b, y_pred, None, None) # thresholds thresh = np.linspace(1, 0, 150) pass_value = np.quantile(y_pred, thresh) y_binary = y_pred.reshape(-1, 1) >= pass_value.reshape(1, -1) pass_a = y_binary[group_a == 1].sum(axis=0) / group_a.sum() pass_b = y_binary[group_b == 1].sum(axis=0) / group_b.sum() di_arr = np.abs(pass_a - pass_b) differentials = np.linspace(2, 0, 150) # Weighed AUC return np.sum(di_arr * differentials / 150)
[docs] def max_statistical_parity(group_a, group_b, y_pred): """Max absolute statistical parity This function computes the maximum over all thresholds of\ the absolute statistical parity between group_a and group_b. Interpretation -------------- A value of 0 is desired. Values below 0.1 in absolute value are\ considered acceptable. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (regression) Returns ------- float max absolute statistical parity Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import max_statistical_parity >>> group_a = np.array([1] * 50 + [0] * 50) >>> group_b = np.array([0] * 50 + [1] * 50) >>> y_pred = np.concatenate((np.linspace(-1, 1, 50), np.linspace(-1, 1, 50) ** 3)) >>> max_statistical_parity(group_a, group_b, y_pred) 0.20000000000000007 """ # check and coerce inputs group_a, group_b, y_pred, _, _ = _regression_checks(group_a, group_b, y_pred, None, None) # thresholds thresh = np.linspace(1, 0, 150) pass_value = np.quantile(y_pred, thresh) y_binary = y_pred.reshape(-1, 1) >= pass_value.reshape(1, -1) pass_a = y_binary[group_a == 1].sum(axis=0) / group_a.sum() pass_b = y_binary[group_b == 1].sum(axis=0) / group_b.sum() di_arr = np.abs(pass_a - pass_b) # MAX return np.max(di_arr)
[docs] def correlation_diff(group_a, group_b, y_pred, y_true, q=0): """Correlation difference This function computes the difference in correlation between predictions\ and targets for group_a and group_b. If q is a vector, this function returns a vector with the\ respective result for each given quantile in q. Interpretation -------------- A value of 0 is desired. This metric ranges between -2 and 2,\ with -1 indicating strong bias against group_a, and +1\ indicating strong bias against group_b. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (regression) y_true : numpy array Target vector (regression) q : float, array-like, optional quantile of predictions considered, default=0. Returns ------- float correlation difference Notes ----- :math:`CV_a - CV_b` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import correlation_diff >>> group_a = np.array([1] * 50 + [0] * 50) >>> group_b = np.array([0] * 50 + [1] * 50) >>> y_pred = np.concatenate((np.linspace(-1, 1, 50), np.linspace(-1, 1, 50) ** 3)) >>> y_true = np.concatenate((np.linspace(-1, 1, 50), np.linspace(-1, 1, 50) ** 2)) >>> correlation_diff(group_a, group_b, y_pred, y_true, q=0) 1.0000000000000002 """ # check and coerce inputs group_a, group_b, y_pred, _, q = _regression_checks(group_a, group_b, y_pred, None, q) y_pred_list, y_true_list, group_a_list, group_b_list = slice_arrays_by_quantile( q, y_pred, [y_pred, y_true, group_a, group_b] ) n_q = len(group_a_list) corr_diff = np.zeros(n_q) for i in range(n_q): group_a, group_b = group_a_list[i], group_b_list[i] _check_non_empty(group_a, name="group_a", quantile=q[i]) _check_non_empty(group_b, name="group_b", quantile=q[i]) # Compute Pearson correlations y_pred = y_pred_list[i] y_true = y_true_list[i] cv_a = np.corrcoef(y_pred[group_a == 1], y_true[group_a == 1])[1, 0] cv_b = np.corrcoef(y_pred[group_b == 1], y_true[group_b == 1])[1, 0] corr_diff[i] = cv_a - cv_b return np.squeeze(corr_diff)[()]
[docs] def rmse_ratio(group_a, group_b, y_pred, y_true, q=0): """RMSE ratio This function computes the ratio of the RMSE for group_a and group_b. If q is a vector, this function returns a vector with the\ respective result for each given quantile in q. Interpretation -------------- A value of 1 is desired. Lower values show bias against group_a. Higher values show bias against group_b. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (regression) y_true : numpy array Target vector (regression) q : float, array-like, optional quantile of predictions considered, default=0. Returns ------- float RMSE ratio Notes ----- :math:`\frac{RMSE_a}{RMSE_b}` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import rmse_ratio >>> group_a = np.array([1] * 50 + [0] * 50) >>> group_b = np.array([0] * 50 + [1] * 50) >>> y_pred = np.concatenate((np.linspace(-1, 1, 50), np.linspace(-1, 1, 50))) >>> y_true = np.concatenate( ... (np.linspace(-1, 1, 50) ** 2, np.linspace(-1, 1, 50) ** 3) ... ) >>> rmse_ratio(group_a, group_b, y_pred, y_true) 2.7471209467641367 """ # check and coerce inputs group_a, group_b, y_pred, y_true, q = _regression_checks(group_a, group_b, y_pred, y_true, q) y_pred_list, y_true_list, group_a_list, group_b_list = slice_arrays_by_quantile( q, y_pred, [y_pred, y_true, group_a, group_b] ) n_q = len(group_a_list) rmse_ratio = np.zeros(n_q) for i in range(n_q): group_a, group_b = group_a_list[i], group_b_list[i] _check_non_empty(group_a, name="group_a", quantile=q[i]) _check_non_empty(group_b, name="group_b", quantile=q[i]) # Compute RMSE for both groups y_pred = y_pred_list[i] y_true = y_true_list[i] rmse_a = np.sqrt(((y_true[group_a == 1] - y_pred[group_a == 1]) ** 2.0).mean()) rmse_b = np.sqrt(((y_true[group_b == 1] - y_pred[group_b == 1]) ** 2.0).mean()) rmse_ratio[i] = rmse_a / rmse_b return np.squeeze(rmse_ratio)[()]
[docs] def mae_ratio(group_a, group_b, y_pred, y_true, q=0): """MAE ratio This function computes the ratio of the MAE for group_a and group_b. If q is a vector, this function returns a vector with the\ respective result for each given quantile in q. Interpretation -------------- A value of 1 is desired. Lower values show bias against group_a. Higher values show bias against group_b. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (regression) y_true : numpy array Target vector (regression) q : float, array-like, optional quantile of predictions considered, default=0. Returns ------- float MAE ratio Notes ----- :math:`\frac{MAE_a}{MAE_b}` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import mae_ratio >>> group_a = np.array([1] * 50 + [0] * 50) >>> group_b = np.array([0] * 50 + [1] * 50) >>> y_pred = np.concatenate((np.linspace(-1, 1, 50), np.linspace(-1, 1, 50))) >>> y_true = np.concatenate( ... (np.linspace(-1, 1, 50) ** 2, np.linspace(-1, 1, 50) ** 3) ... ) >>> mae_ratio(group_a, group_b, y_pred, y_true) 2.084201388888889 """ # check and coerce inputs group_a, group_b, y_pred, y_true, q = _regression_checks(group_a, group_b, y_pred, y_true, q) y_pred_list, y_true_list, group_a_list, group_b_list = slice_arrays_by_quantile( q, y_pred, [y_pred, y_true, group_a, group_b] ) n_q = len(group_a_list) mae_ratio = np.zeros(n_q) for i in range(n_q): group_a, group_b = group_a_list[i], group_b_list[i] _check_non_empty(group_a, name="group_a", quantile=q[i]) _check_non_empty(group_b, name="group_b", quantile=q[i]) # Compute MAE for both groups y_pred = y_pred_list[i] y_true = y_true_list[i] mae_a = (np.abs(y_true[group_a == 1] - y_pred[group_a == 1])).mean() mae_b = (np.abs(y_true[group_b == 1] - y_pred[group_b == 1])).mean() mae_ratio[i] = mae_a / mae_b return np.squeeze(mae_ratio)[()]
[docs] def regression_bias_metrics(group_a, group_b, y_pred, y_true=None, metric_type="group"): """Regression bias metrics batch computation This function computes all the relevant regression bias metrics,\ and displays them as a pandas dataframe. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (regression) y_true : numpy array, optional Target vector (regression) metric_type : str, optional Specifies which metrics we compute : individual, group, equal_outcome, equal_opportunity, both Returns ------- pandas DataFrame Metrics | Values | Reference """ individual_metrics = { "Jain Index": jain_index, } equal_outcome_metrics = { "Disparate Impact Q90": disparate_impact_regression, "Disparate Impact Q80": disparate_impact_regression, "Disparate Impact Q50": disparate_impact_regression, "Statistical Parity Q50": statistical_parity_regression, "No Disparate Impact Level": no_disparate_impact_level, "Average Score Difference": avg_score_diff, "Average Score Ratio": avg_score_ratio, "Z Score Difference": zscore_diff, "Max Statistical Parity": max_statistical_parity, "Statistical Parity AUC": statistical_parity_auc, } equal_opportunity_metrics = { "RMSE Ratio": rmse_ratio, "RMSE Ratio Q80": rmse_ratio, "MAE Ratio": mae_ratio, "MAE Ratio Q80": mae_ratio, "Correlation Difference": correlation_diff, } ref_vals = { "Disparate Impact Q90": 1, "Disparate Impact Q80": 1, "Disparate Impact Q50": 1, "Statistical Parity Q50": 0, "No Disparate Impact Level": "-", "Average Score Difference": 0, "Average Score Ratio": 1, "Z Score Difference": 0, "Max Statistical Parity": 0, "Statistical Parity AUC": 0, "RMSE Ratio": 1, "RMSE Ratio Q80": 1, "MAE Ratio": 1, "MAE Ratio Q80": 1, "Correlation Difference": 0, "Jain Index": 1, } hypers = { "Disparate Impact Q90": {"q": 0.9}, "Disparate Impact Q80": {"q": 0.8}, "Disparate Impact Q50": {"q": 0.5}, "Statistical Parity Q50": {"q": 0.8}, "No Disparate Impact Level": {}, "Average Score Difference": {}, "Average Score Ratio": {}, "Z Score Difference": {}, "Max Statistical Parity": {}, "Statistical Parity AUC": {}, "RMSE Ratio": {}, "RMSE Ratio Q80": {"q": 0.8}, "MAE Ratio": {}, "MAE Ratio Q80": {"q": 0.8}, "Correlation Difference": {}, } y_pred = np.squeeze(y_pred) if y_true is not None: y_true = np.squeeze(y_true) has_group_parameters = all((p is not None) for p in [group_a, group_b, y_pred]) if has_group_parameters: out_metrics = [ [pf, fn(group_a, group_b, y_pred, **hypers[pf]), ref_vals[pf]] for pf, fn in equal_outcome_metrics.items() ] if y_true is not None: opp_metrics = [ [ pf, fn(group_a, group_b, y_pred, y_true, **hypers[pf]), ref_vals[pf], ] for pf, fn in equal_opportunity_metrics.items() ] if metric_type == "individual": indv_metrics = [] if y_pred is not None and y_true is not None: indv_metrics += [[pf, fn(y_pred, y_true), ref_vals[pf]] for pf, fn in individual_metrics.items()] else: # in case of missing y_pred or y_true msg = "y_pred and y_true must be provided for individual metrics" raise ValueError(msg) if metric_type in ["group", "both"]: if metric_type == "both": # TODO: remove both for next version warnings.warn( "`both` option will be depreciated in the next versions, use group", DeprecationWarning, stacklevel=2 ) metrics = out_metrics + opp_metrics return pd.DataFrame(metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric") if metric_type == "equal_outcome": return pd.DataFrame(out_metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric") if metric_type == "equal_opportunity": return pd.DataFrame(opp_metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric") if metric_type == "individual": return pd.DataFrame(indv_metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric") msg = "metric_type is not one of : both, equal_outcome, equal_opportunity" raise ValueError(msg)
#### Individual metrics def jain_index(y_pred: np.ndarray, y_true: np.ndarray) -> float: """The Jain index (Fairness index) The Jain index is an index proposed for resources allocation that measures the "equality" of user allocation [1]. For our purposes, from the point of view of fairness, it measures the equality of the error distributed in the\ model outcomes. Empirically, we could say that a model with a Jain index of 1 is a model that distributes the error\ equally among all the samples. Please, use this metric with caution, as it is not a metric that has been proposed for fairness in machine learning\ models, but for resources allocation. Interpretation -------------- From the point of view of fairness, it measures the equality of the error distributed among the samples. A fairer\ model will have a Jain index closer to 1. Parameters ---------- y_true : array-like of shape (n_samples,) The true target values. y_pred : array-like of shape (n_samples,) The predicted target values. Returns ------- float The Jain index of the input array. References ---------- .. [1] Jain, R. (1984). A Quantitative Measure of Fairness and Discrimination for Resource Allocation in Shared Computer Systems. Eastern Research Laboratory, Digital Equipment Corporation. Examples ------- >>> import numpy as np >>> from holisticai.bias.metrics import jain_index >>> y_true = np.array([1, 2, 3, 4, 5]) >>> y_pred = np.array([1, 2, 3, 4, 4]) >>> jain_index(y_pred, y_true) 0.2 """ error = np.abs(y_true - y_pred) jain = ((error.sum()) ** 2) / (len(error) * (error**2).sum()) if np.isnan(jain): return 1.0 return jain