Source code for holisticai.bias.metrics._classification

# Base Imports
import warnings

import numpy as np
import pandas as pd

# utils
from holisticai.utils._validation import (
    _array_like_to_numpy,
    _check_binary,
    _classification_checks,
    _matrix_like_to_numpy,
    _regression_checks,
)

# Efficacy metrics
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score
from sklearn.neighbors import NearestNeighbors


def _group_success_rate(g, y):
    """Group success rate.

    This function computes the success rate for a given subgroup.

    Parameters
    ----------
    g : array-like
        subgroup vector (binary)
    y : array-like
        predictions vector (binary)

    Returns
    -------
    float
        group success rate

    """
    return y[g == 1].sum() / g.sum()  # success rate group_a


[docs] def statistical_parity(group_a, group_b, y_pred): """Statistical parity. This function computes the statistical parity (difference of success rates)\ between group_a and group_b. Interpretation -------------- A value of 0 is desired. Negative values are unfair towards group_a.\ Positive values are unfair towards group_b. The range (-0.1,0.1)\ is considered acceptable. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (binary) Returns ------- float Statistical Parity Notes ----- :math:`sr_a - sr_b` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import statistical_parity >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([1, 1, 1, 0, 1, 1, 0, 0, 0, 0]) >>> statistical_parity(group_a, group_b, y_pred) 0.4166666666666667 """ # check and coerce group_a, group_b, y_pred, _ = _classification_checks(group_a, group_b, y_pred, y_true=None) # calculate sr_a and sr_b sr_a = _group_success_rate(group_a, y_pred) # success rate group_a sr_b = _group_success_rate(group_b, y_pred) # success rate group_b return sr_a - sr_b
def success_rate(group_a, group_b, y_pred): """Success Rate Calculates the raw success rates for each group. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (binary) Returns ------- dict Dictionary with two keys, sr_a and sr_b (success rate for group a and b) """ sr_a = _group_success_rate(group_a, y_pred) # success rate group_a sr_b = _group_success_rate(group_b, y_pred) # success rate group_b return {"sr_a": sr_a, "sr_b": sr_b}
[docs] def disparate_impact(group_a, group_b, y_pred): """Disparate Impact. This function computes the disparate impact (ratio of success rates)\ between group_a and group_b class. Interpretation -------------- A value of 1 is desired. Values below 1 are unfair towards group_a.\ Values above 1 are unfair towards group_b. The range (0.8,1.2)\ is considered acceptable. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (binary) Returns ------- float Disparate Impact Notes ----- :math:`sr_a/sr_b` References ---------- .. [1] `M B Zafar, I Valera, M G Rodriguez, K P. Gummadi (2017). Fairness Constraints: Mechanisms for Fair Classification, MPI-SWS <https://arxiv.org/pdf/1507.05259.pdf>` Examples ------- >>> import numpy as np >>> from holisticai.bias.metrics import disparate_impact >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([1, 1, 1, 0, 1, 1, 0, 0, 0, 0]) >>> disparate_impact(group_a, group_b, y_pred) 2.25 """ # check and coerce group_a, group_b, y_pred, _ = _classification_checks(group_a, group_b, y_pred, y_true=None) # calculate sr_a and sr_b sr_a = _group_success_rate(group_a, y_pred) # success rate group_a sr_b = _group_success_rate(group_b, y_pred) # success rate group_b return sr_a / sr_b
[docs] def four_fifths(group_a, group_b, y_pred): """Four Fifths This function computes the four fifths rule (ratio of success rates)\ between group_a and group_b. We return the minimum of the ratio taken both ways. Interpretation -------------- A value of 1 is desired. Values below 1 are unfair. The range (0.8,1) is considered acceptable. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (binary) Returns ------- float Four Fifths Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import four_fifths >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([1, 1, 1, 0, 1, 1, 0, 0, 0, 0]) >>> four_fifths(group_a, group_b, y_pred) 0.4444444444444444 """ # check and coerce group_a, group_b, y_pred, _ = _classification_checks(group_a, group_b, y_pred, y_true=None) # calculate sr_a and sr_b sr_a = _group_success_rate(group_a, y_pred) # success rate group_a sr_b = _group_success_rate(group_b, y_pred) # success rate group_b return min(sr_a / sr_b, sr_b / sr_a)
[docs] def cohen_d(group_a, group_b, y_pred): """Cohen D This function computes the Cohen D statistic (normalised statistical parity)\ between group_a and group_b. Interpretation -------------- A value of 0 is desired. Negative values are unfair towards group_a. Positive values are unfair towards group_b. Reference values: 0.2 is\ considered a small effect size, 0.5 is considered medium, 0.8 is considered large. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (binary) Returns ------- float Cohen D : Notes ----- :math:`\frac{sr_a-sr_b}{\texttt{std_pool}}` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import cohen_d >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([1, 1, 0, 0, 1, 1, 0, 1, 1, 1]) >>> cohen_d(group_a, group_b, y_pred) -0.7844645405527363 """ # check and coerce group_a, group_b, y_pred, _ = _classification_checks(group_a, group_b, y_pred, y_true=None) # calculate sr_a and sr_b sr_a = _group_success_rate(group_a, y_pred) # success rate group_a sr_b = _group_success_rate(group_b, y_pred) # success rate group_b # calculate STD_a and STD_b std_b = np.sqrt(sr_b * (1 - sr_b)) std_a = np.sqrt(sr_a * (1 - sr_a)) # calculate n_a and n_b n_a = group_a.sum() n_b = group_b.sum() # calculate poolSTD std_pool = np.sqrt(((n_b - 1) * std_b**2 + (n_a - 1) * std_a**2) / (n_a + n_b - 2)) return (sr_a - sr_b) / std_pool
[docs] def z_test_diff(group_a, group_b, y_pred): """Z Test (Difference) This function computes the Z-test statistic for the difference\ in success rates. Also known as 2-SD Statistic. Interpretation -------------- A value of 0 is desired. This test considers the data unfair if\ the computed value is greater than 2 or smaller than -2, indicating\ a statistically significant difference in success rates. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (binary) Returns ------- float Z test (difference version) Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import z_test_diff >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([1, 1, 1, 0, 1, 1, 0, 0, 0, 0]) >>> z_test_diff(group_a, group_b, y_pred) 1.290994449 References ---------- .. [1] `Morris (2001). Sample size requirements for adverse impact analysis <https://www.semanticscholar.org/paper/Sample-Size-Required-for-Adverse-Impact-Analysis-Morris/877f7acd7c646a21f4947166a07f41664dcabe95>` """ # check and coerce group_a, group_b, y_pred, _ = _classification_checks(group_a, group_b, y_pred, y_true=None) # calculate sr_a and sr_b sr_a = _group_success_rate(group_a, y_pred) # success rate group_a sr_b = _group_success_rate(group_b, y_pred) # success rate group_b n_a = group_a.sum() n_b = group_b.sum() sr_tot = (sr_a * n_a + sr_b * n_b) / (n_a + n_b) n_tot = n_a + n_b # calculate p_a p_a = n_a / n_tot return (sr_a - sr_b) / np.sqrt((sr_tot * (1 - sr_tot)) / (n_tot * p_a * (1 - p_a)))
[docs] def z_test_ratio(group_a, group_b, y_pred): """Z Test (Ratio) This function computes the Z-test statistic for the ratio\ in success rates. Also known as 2-SD Statistic. Interpretation -------------- A value of 0 is desired. This test considers the data unfair if\ the computed value is greater than 2 or smaller than -2, indicating\ a statistically significant ratio in success rates. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (binary) Returns ------- float Z-test (ratio version) Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import z_test_ratio >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([1, 1, 1, 0, 1, 1, 0, 0, 0, 0]) >>> z_test_ratio(group_a, group_b, y_pred) 1.256287689 References ---------- .. [1] `Morris (2001). Sample size requirements for adverse impact analysis <https://www.semanticscholar.org/paper/Sample-Size-Required-for-Adverse-Impact-Analysis-Morris/877f7acd7c646a21f4947166a07f41664dcabe95>` """ # check and coerce group_a, group_b, y_pred, _ = _classification_checks(group_a, group_b, y_pred, y_true=None) # calculate sr_a and sr_b sr_a = _group_success_rate(group_a, y_pred) # success rate group_a sr_b = _group_success_rate(group_b, y_pred) # success rate group_b n_a = group_a.sum() n_b = group_b.sum() sr_tot = (sr_a * n_a + sr_b * n_b) / (n_a + n_b) n_tot = n_a + n_b # calculate p_a p_a = n_a / n_tot return (np.log(sr_a / sr_b)) / np.sqrt((1 - sr_tot) / (sr_tot * n_tot * p_a * (1 - p_a)))
def _correlation_diff(group_a, group_b, y_pred, y_true): """Correlation difference This function computes the difference in correlation between predicted\ and true labels for group_a and group_b. Interpretation -------------- A value of 0 is desired. This metric ranges between -2 and 2, with\ negative values indicating bias against group_a, and positive values indicating bias against group_b. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (binary) y_true : array-like Target vector (binary) Returns ------- float Correlation Difference Notes ----- :math:`CV_a - CV_b` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import correlation_diff >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([1, 1, 0, 0, 1, 1, 0, 1, 1, 1]) >>> y_true = np.array([1, 1, 0, 0, 1, 0, 1, 0, 0, 1]) >>> correlation_diff(group_a, group_b, y_pred, y_true) 1.4472135954999579 """ # check and coerce group_a, group_b, y_pred, y_true = _classification_checks(group_a, group_b, y_pred, y_true) # Calculate Pearson correlations cv_a = np.corrcoef(y_pred[group_a == 1], y_true[group_a == 1])[1, 0] cv_b = np.corrcoef(y_pred[group_b == 1], y_true[group_b == 1])[1, 0] return cv_a - cv_b
[docs] def equal_opportunity_diff(group_a, group_b, y_pred, y_true): """Equality of opportunity difference This function computes the difference in true positive\ rates for group_a and group_b. Interpretation -------------- A value of 0 is desired. This metric ranges between -1 and 1,\ with negative values indicating bias against group_a, and\ positive values indicating bias against group_b. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (binary) y_true : array-like Target vector (binary) Returns ------- float Equal opportunity difference Notes ----- :math:`tpr_a - tpr_b` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import equal_opportunity_diff >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([1, 1, 0, 0, 1, 1, 0, 1, 1, 1]) >>> y_true = np.array([1, 1, 0, 0, 1, 0, 1, 0, 0, 1]) >>> equal_opportunity_diff(group_a, group_b, y_pred, y_true) 0.33333333333333337 """ # check and coerce group_a, group_b, y_pred, y_true = _classification_checks(group_a, group_b, y_pred, y_true) # Calculate true positive rates tpr_a = confusion_matrix(y_true[group_a == 1], y_pred[group_a == 1], normalize="true")[1, 1] tpr_b = confusion_matrix(y_true[group_b == 1], y_pred[group_b == 1], normalize="true")[1, 1] return tpr_a - tpr_b
[docs] def false_positive_rate_diff(group_a, group_b, y_pred, y_true): """False positive rate difference This function computes the difference in false positive\ rates between group_a and group_b. Interpretation -------------- A value of 0 is desired. This metric ranges between -1 and 1,\ with negative values indicating bias against group_a, and\ positive values indicating bias against group_b. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (binary) y_true : array-like Target vector (binary) Returns ------- float FPR_diff Notes ----- :math:`fpr_a - fpr_b` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import false_positive_diff >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([1, 1, 0, 0, 1, 1, 0, 1, 1, 1]) >>> y_true = np.array([1, 1, 0, 0, 1, 0, 1, 0, 0, 1]) >>> false_positive_diff(group_a, group_b, y_pred, y_true) -1.0 """ # check and coerce group_a, group_b, y_pred, y_true = _classification_checks(group_a, group_b, y_pred, y_true) # Calculate false positive rates fpr_a = confusion_matrix(y_true[group_a == 1], y_pred[group_a == 1], normalize="true")[0, 1] fpr_b = confusion_matrix(y_true[group_b == 1], y_pred[group_b == 1], normalize="true")[0, 1] return fpr_a - fpr_b
[docs] def false_negative_rate_diff(group_a, group_b, y_pred, y_true): """False negative Rate difference This function computes the difference in false negative\ rates for group_a and group_b. Interpretation ---------- A value of 0 is desired. This metric ranges between -1 and 1,\ with negative values indicating bias against group_b, and\ positive values indicating bias against group_a. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (binary) y_true : array-like Target vector (binary) Returns ------- float False Negative Rate difference Notes ----- :math:`fnr_a - fnr_b` Examples ------- >>> import numpy as np >>> from holisticai.bias.metrics import fnr_diff >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([1, 1, 0, 0, 1, 1, 0, 1, 1, 1]) >>> y_true = np.array([1, 1, 0, 0, 1, 0, 1, 0, 0, 1]) >>> fnr_diff(group_a, group_b, y_pred, y_true) -0.3333333333333333 """ # check and coerce group_a, group_b, y_pred, y_true = _classification_checks(group_a, group_b, y_pred, y_true) # Calculate false negative rates fnr_a = confusion_matrix(y_true[group_a == 1], y_pred[group_a == 1], normalize="true")[1, 0] fnr_b = confusion_matrix(y_true[group_b == 1], y_pred[group_b == 1], normalize="true")[1, 0] return fnr_a - fnr_b
[docs] def true_negative_rate_diff(group_a, group_b, y_pred, y_true): """True negative Rate difference This function computes the difference in true negative\ rates for group_a and group_b. Interpretation ---------- A value of 0 is desired. This metric ranges between -1 and 1,\ with negative values indicating bias against group_a, and\ positive values indicating bias against group_b. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (binary) y_true : array-like Target vector (binary) Returns ------- float True Negative Rate difference Notes ----- :math:`tnr_a - tnr_b` Examples ------- >>> import numpy as np >>> from holisticai.bias.metrics import tnr_diff >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([1, 1, 0, 0, 1, 1, 0, 1, 1, 1]) >>> y_true = np.array([1, 1, 0, 0, 1, 0, 1, 0, 0, 1]) >>> tnr_diff(group_a, group_b, y_pred, y_true) 1 """ # check and coerce group_a, group_b, y_pred, y_true = _classification_checks(group_a, group_b, y_pred, y_true) # Calculate false negative rates tnr_a = confusion_matrix(y_true[group_a == 1], y_pred[group_a == 1], normalize="true")[0, 0] tnr_b = confusion_matrix(y_true[group_b == 1], y_pred[group_b == 1], normalize="true")[0, 0] return tnr_a - tnr_b
[docs] def average_odds_diff(group_a, group_b, y_pred, y_true): """Average Odds Difference This function computes the difference in average odds\ between group_a and group_b. Interpretation -------------- A value of 0 is desired. This metric ranges between -1 and 1,\ with negative values indicating bias against group_a,\ and positive values indicating bias against group_b.\ The range (-0.1,0.1) is considered acceptable. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (binary) y_true : array-like Target vector (binary) Returns ------- float AOD score Notes ----- :math:`0.5 * (fpr_a-fpr_b + tpr_a-tpr_b)` Examples ------- >>> import numpy as np >>> from holisticai.bias.metrics import average_odds_diff >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([1, 1, 0, 0, 1, 1, 0, 1, 1, 1]) >>> y_true = np.array([1, 1, 0, 0, 1, 0, 1, 0, 0, 1]) >>> average_odds_diff(group_a, group_b, y_pred, y_true) -0.3333333333333333 """ # check and coerce group_a, group_b, y_pred, y_true = _classification_checks(group_a, group_b, y_pred, y_true) # Compute AOD return 0.5 * ( equal_opportunity_diff(group_a, group_b, y_pred, y_true) + false_positive_rate_diff(group_a, group_b, y_pred, y_true) )
[docs] def accuracy_diff(group_a, group_b, y_score, y_true): """Accuracy Difference This function computes the difference in accuracy\ of predictions for group_a and group_b Interpretation -------------- A value of 0 is desired. This metric ranges between -1 and 1,\ with negative values indicating bias against group_a,\ and positive values indicating bias against group_b. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_score : numpy array Probability estimates (regression) y_true : array-like Target vector (binary) Returns ------- float acc_diff : acc_a - acc_b Examples ------- >>> import numpy as np >>> from holisticai.bias.metrics import accuracy_diff >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> y_pred = np.array([1, 1, 0, 0, 1, 1, 0, 1, 1, 1]) >>> y_true = np.array([1, 1, 0, 0, 1, 0, 1, 0, 0, 1]) >>> accuracy_diff(group_a, group_b, y_pred, y_true) 0.6666666666666667 """ # check and coerce group_a, group_b, y_score, y_true, _ = _regression_checks(group_a, group_b, y_score, y_true, None) _check_binary(y_true, "y_true") # split data by groups y_true_a = y_true[group_a == 1] y_score_a = y_score[group_a == 1] y_true_b = y_true[group_b == 1] y_score_b = y_score[group_b == 1] # compute abroca return accuracy_score(y_true_a, y_score_a) - accuracy_score(y_true_b, y_score_b)
[docs] def abroca(group_a, group_b, y_score, y_true): """ABROCA (area between roc curves) This function computes the area between the roc curve\ of group_a and the roc curve of group_b Interpretation -------------- A value of 0 is desired. This metric ranges between -1 and 1,\ with negative values indicating bias against group_a,\ and positive values indicating bias against group_b. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_score : array-like Probability estimates (regression) y_true : array-like Target vector (binary) Returns ------- float ABROCA : roc_auc_a - roc_auc_b Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import abroca >>> group_a = np.array([1] * 50 + [0] * 50) >>> group_b = np.array([0] * 50 + [1] * 50) >>> y_score = np.concatenate((np.linspace(0, 1, 50), np.linspace(0, 1, 50) ** 2)) >>> y_true = y_score + np.random.random(y_score.shape) > 0.5 >>> abroca(group_a, group_b, y_score, y_true) 0.11806478405315601 """ # check and coerce group_a, group_b, y_score, y_true, _ = _regression_checks(group_a, group_b, y_score, y_true, None) _check_binary(y_true, "y_true") # split data by groups y_true_a = y_true[group_a == 1] y_score_a = y_score[group_a == 1] y_true_b = y_true[group_b == 1] y_score_b = y_score[group_b == 1] # compute abroca return roc_auc_score(y_true_a, y_score_a) - roc_auc_score(y_true_b, y_score_b)
[docs] def classification_bias_metrics( group_a=None, group_b=None, y_pred=None, y_true=None, y_score=None, X=None, metric_type="group", **kargs ): """Classification bias metrics batch computation This function computes all the relevant classification bias metrics,\ and displays them as a pandas dataframe. It also includes a fair reference\ value for comparison. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) y_pred : array-like Predictions vector (binary) y_true : array-like, optional Target vector (binary) y_score : array-like, optional Probability estimates (regression) X : array-like, optional Feature matrix metric_type : str, optional Specifies which metrics we compute 'group', 'individual', 'equal_outcome' , 'equal_opportunity' **kargs : dict Additional keyword arguments for individual metrics Returns ------- pandas DataFrame Metrics | Values | Reference """ individual_metrics_format_1 = { "Theil Index": theil_index, "Generalized Entropy Index": generalized_entropy_index, "Coefficient of Variation": coefficient_of_variation, } individual_metrics_format_2 = { "Consistency Score": consistency_score, } equal_outcome_metrics = { "Statistical Parity": statistical_parity, "Disparate Impact": disparate_impact, "Four Fifths Rule": four_fifths, "Cohen D": cohen_d, "2SD Rule": z_test_diff, } equal_opportunity_metrics = { "Equality of Opportunity Difference": equal_opportunity_diff, "False Positive Rate Difference": false_positive_rate_diff, "Average Odds Difference": average_odds_diff, "Accuracy Difference": accuracy_diff, } soft_metrics = { "ABROCA": abroca, } ref_vals = { "Statistical Parity": 0, "Disparate Impact": 1, "Four Fifths Rule": 1, "Cohen D": 0, "Equality of Opportunity Difference": 0, "False Positive Rate Difference": 0, "Average Odds Difference": 0, "Accuracy Difference": 0, "ABROCA": 0, "2SD Rule": 0, "Theil Index": 0, "Generalized Entropy Index": 0, "Consistency Score": 1, "Coefficient of Variation": 0, } has_group_parameters = all((p is not None) for p in [group_a, group_b, y_pred]) if has_group_parameters: out_metrics = [[pf, fn(group_a, group_b, y_pred), ref_vals[pf]] for pf, fn in equal_outcome_metrics.items()] opp_metrics = [] if y_true is not None: opp_metrics += [ [pf, fn(group_a, group_b, y_pred, y_true), ref_vals[pf]] for pf, fn in equal_opportunity_metrics.items() ] if y_score is not None: opp_metrics += [ [pf, fn(group_a, group_b, y_score, y_true), ref_vals[pf]] for pf, fn in soft_metrics.items() ] if metric_type == "individual": from collections import defaultdict metric_kargs = defaultdict(dict) for k, value in kargs.items(): metric, arg = k.split("__") metric_kargs[metric][arg] = value indv_metrics = [] if y_pred is not None: if y_true is not None: indv_metrics += [ [pf, fn(y_pred, y_true, **metric_kargs[fn.__name__]), ref_vals[pf]] for pf, fn in individual_metrics_format_1.items() ] if X is not None: indv_metrics += [ [pf, fn(X, y_pred, **metric_kargs[fn.__name__]), ref_vals[pf]] for pf, fn in individual_metrics_format_2.items() ] if metric_type in ["group", "both"]: if metric_type == "both": # TODO: remove both for next version warnings.warn( # noqa: B028 "`both` option will be depreciated in the next versions, use group", DeprecationWarning, ) metrics = out_metrics + opp_metrics return pd.DataFrame(metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric") if metric_type == "equal_outcome": return pd.DataFrame(out_metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric") if metric_type == "equal_opportunity": return pd.DataFrame(opp_metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric") if metric_type == "individual": return pd.DataFrame(indv_metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric") msg = "metric_type is not one of : group, individual, equal_outcome, equal_opportunity" raise ValueError(msg)
### Individual Metrics def benefit_function(y_pred, y_true): """Benefit function This function computes the benefit function\ used in the generalized entropy index. Parameters ---------- y_true : array-like True target values y_pred : array-like Predicted target values Returns ------- np.ndarray Benefit function """ return y_pred - y_true + 1 def theil_index(y_pred: np.ndarray, y_true: np.ndarray) -> float: """The Theil index The Theil index is a measure of inequality that is commonly used in economics.\ It is used to measure the inequality of a distribution, such as the distribution\ of income or wealth. The Theil index is a special case of general entropy indices\ that allows to observe inequalities at group level and individual level. Interpretation -------------- A high value implies high inequality while a value of 0 indicates perfect equality. Parameters ---------- y_true : array-like of shape (n_samples,) The true target values. y_pred : array-like of shape (n_samples,) The predicted target values. Returns ------- float The Theil index of the input array. Examples ------- >>> import numpy as np >>> from holisticai.bias.metrics import theil_index >>> y_true = np.array([1, 2, 3, 4, 5]) >>> y_pred = np.array([1, 2, 3, 4, 5]) >>> theil_index(y_pred, y_true) 0.0 """ return generalized_entropy_index(y_pred, y_true, alpha=1) def generalized_entropy_index(y_pred: np.ndarray, y_true: np.ndarray, alpha: int = 2) -> float: """Generalized entropy index Generalized entropy index is a measure of inequality that is proposed as a unified\ individual and group fairness measure [1]. It is used to measure the extent of inequality\ in the distribution of a variable. The measure is based on the idea that inequality can\ be measured by the extent to which the distribution of a variable deviates from a hypothetical\ distribution in which everyone has an equal share. Interpretation -------------- A high value implies high inequality while a value of 0 indicates perfect equality. Parameters ---------- y_true : array-like of shape (n_samples,) The true target values. y_pred : array-like of shape (n_samples,) The predicted target values. alpha : int, optional, default=2 Parameter that regulates the weight given to distances between values at different parts\ of the distribution. Default=2. Returns ------- float The generalized entropy index of the input arrays. References ---------- .. [1] Speicher, T., Heidari, H., Grgic-Hlaca, N., Gummadi, K. P., & Weller, A. (2018, April).\ A Unified Approach to Quantifying Algorithmic Unfairness: Measuring Individual & Group Unfairness\ via Inequality Indices. In Proceedings of the 24th ACM SIGKDD International Conference on Knowledge\ Discovery & Data Mining (pp. 2239-2248). Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import generalized_entropy_index >>> y_true = np.array([1, 2, 3, 4, 5]) >>> y_pred = np.array([1, 2, 3, 4, 5]) >>> generalized_entropy_index(y_true, y_pred, alpha=2) 0.0 """ y_pred = _array_like_to_numpy(y_pred) y_true = _array_like_to_numpy(y_true) _check_binary(y_true) b = benefit_function(y_pred, y_true) u = np.mean(b) bu = b / u if alpha == 1: # rewrite equation to allow 0 b values return np.mean(np.log(bu**b) / u) if alpha == 0: return -np.mean(np.log(bu) / u) return (1 / (alpha * (alpha - 1))) * np.mean(bu**alpha - 1) def coefficient_of_variation(y_pred: np.ndarray, y_true: np.ndarray) -> float: """Coefficient of variation Calculates the coefficient of variation, which is a special case that calculates two times the square\ root of the generalized entropy index with alpha=2. Interpretation -------------- A high value implies high inequality while a value of 0 indicates perfect equality. Parameters ---------- y_true : array-like True target values. y_pred : array-like Predicted target values. Returns -------- float The coefficient of variation. """ y_pred = _array_like_to_numpy(y_pred) y_true = _array_like_to_numpy(y_true) _check_binary(y_true) return 2 * np.sqrt(generalized_entropy_index(y_pred, y_true, alpha=2)) def consistency_score(X: np.ndarray, y_pred: np.ndarray, n_neighbors: int = 5) -> float: """Consistency score Compares a model's classification prediction of a given data item X to its k-nearest neighbors. Parameters ---------- X : array-like The input features. y_pred : array-like The predicted labels. n_neighbors : int, optional Number of neighbors for the knn computation. Default is 5. Returns ------- float The consistency score. """ X = _matrix_like_to_numpy(X) y_pred = _array_like_to_numpy(y_pred) _check_binary(y_pred) # learn a KNN on the features nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm="ball_tree").fit(X) _, indices = nbrs.kneighbors(X) # compute consistency score return 1 - np.mean([np.abs(yi - np.mean(y_pred[ids])) for yi, ids in zip(y_pred, indices)])