Source code for holisticai.bias.metrics._regression

# Base Imports
import warnings

import numpy as np
import pandas as pd

# utils
from holisticai.utils._formatting import slice_arrays_by_quantile
from holisticai.utils._validation import _check_non_empty, _regression_checks


def _calc_success_rate(group_membership: np.array, threshold=float):
    return (group_membership > threshold).mean()


def success_rate_regression(group_a, group_b, y_pred, threshold=0.50):
    """Success rate (Regression version)

    Calculates the raw success rates for each group.

    Parameters
    ----------
    group_a : array-like
        Group membership vector (binary)
    group_b : array-like
        Group membership vector (binary)
    y_pred : array-like
        Predictions vector (numerical)
    threshold: float, str, optional
        The number above which the result is considered a success. Ranged between 0 and 1.
        Also accepts 'median' and 'mean'.

    Returns
    -------
    dict
        Dictionary with two keys, sr_a and sr_b (success rate for group a and b)
    """
    # Needs to be numpy array or the following operations won't be correct
    if isinstance(threshold, str) and (threshold not in {"median", "mean"}):
        msg = "Threshold not recognised"
        raise ValueError(msg)
    if threshold == "median":
        threshold = np.median(y_pred)
    if threshold == "mean":
        threshold = np.mean(y_pred)
    group_a = np.array(group_a)
    group_b = np.array(group_b)
    y_pred = np.array(y_pred)
    group_a_membership = y_pred[group_a == 1]
    group_b_membership = y_pred[group_b == 1]
    sr_a = _calc_success_rate(group_a_membership, threshold)
    sr_b = _calc_success_rate(group_b_membership, threshold)  # success rate group_b
    return {"sr_a": sr_a, "sr_b": sr_b}



[docs]
def disparate_impact_regression(group_a, group_b, y_pred, q=0.8):
    r"""Disparate Impact quantile (Regression version)

    This function computes the ratio of success rates between group_a and
    group_b, where sucess means predicted score exceeds a given quantile (default = 0.8).

    If q is a vector, this function returns a vector with the
    respective result for each given quantile in q.

    Interpretation
    --------------
    A value of 1 is desired. Values below 1 are unfair towards group_a.
    Values above 1 are unfair towards group_b. The range (0.8,1.2)
    is considered acceptable.

    Parameters
    ----------
    group_a : array-like
        Group membership vector (binary)
    group_b : array-like
        Group membership vector (binary)
    y_pred : array-like
        Predictions vector (regression)
    q : float, array-like, optional
        quantile of predictions considered, default=0.8

    Returns
    -------
    float
        Disparate Impact (top %)

    Notes
    -----
    :math:`\frac{sr_a}{sr_b}`

    Examples
    --------
    >>> import numpy as np
    >>> from holisticai.bias.metrics import disparate_impact_regression
    >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0])
    >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])
    >>> y_pred = np.array([0.8, 0.9, 0.2, 0.1, 0.7, 0.9, 0.8, 0.6, 0.3, 0.5])
    >>> disparate_impact_regression(group_a, group_b, y_pred, q=0.7)
    1.5
    """
    # check and coerce inputs
    group_a, group_b, y_pred, _, q = _regression_checks(group_a, group_b, y_pred, None, q)

    # numbers
    n_a = group_a.sum()
    n_b = group_b.sum()

    _, group_a_list, group_b_list = slice_arrays_by_quantile(q, y_pred, [y_pred, group_a, group_b])

    n_q = len(group_a_list)
    disp_impact = np.zeros(n_q)

    for i in range(n_q):
        group_a, group_b = group_a_list[i], group_b_list[i]
        _check_non_empty(group_a, name="group_a", quantile=q[i])
        _check_non_empty(group_b, name="group_b", quantile=q[i])

        sr_a = group_a.sum() / n_a  # success rate group_a
        sr_b = group_b.sum() / n_b  # success rate group_b

        disp_impact[i] = sr_a / sr_b

    return np.squeeze(disp_impact)[()]




[docs]
def statistical_parity_regression(group_a, group_b, y_pred, q=0.5):
    """Statistical Parity quantile (Regression version)

    This function computes the difference of success rates between group_a and\
    group_b, where sucess means that the predicted score exceeds a given quantile.

    If q is a vector, this function returns a vector with the\
    respective result for each given quantile in q.

    Interpretation
    --------------
    A value of 0 is desired. Values below 0 are unfair towards group_a.\
    Values above 0 are unfair towards group_b.

    Parameters
    ----------
    group_a : array-like
        Group membership vector (binary)
    group_b : array-like
        Group membership vector (binary)
    y_pred : array-like
        Predictions vector (regression)
    q : float, array-like, optional
        quantile of predictions considered, default=0.5

    Returns
    -------
    float
        Statistical Parity (top %) : SR_a - SR_b

    Examples
    --------
    >>> import numpy as np
    >>> from holisticai.bias.metrics import statistical_parity_regression
    >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0])
    >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])
    >>> y_pred = np.array([0.8, 0.9, 0.2, 0.1, 0.7, 0.9, 0.8, 0.6, 0.3, 0.5])
    >>> statistical_parity_regression(group_a, group_b, y_pred, q=0.7)
    0.16666666666666669
    """
    # check and coerce inputs
    group_a, group_b, y_pred, _, q = _regression_checks(group_a, group_b, y_pred, None, q)

    # numbers
    n_a = group_a.sum()
    n_b = group_b.sum()

    _, group_a_list, group_b_list = slice_arrays_by_quantile(q, y_pred, [y_pred, group_a, group_b])

    n_q = len(group_a_list)
    stat_parity = np.zeros(n_q)

    for i in range(n_q):
        group_a, group_b = group_a_list[i], group_b_list[i]
        _check_non_empty(group_a, name="group_a", quantile=q[i])
        _check_non_empty(group_b, name="group_b", quantile=q[i])

        sr_a = group_a.sum() / n_a  # success rate group_a
        sr_b = group_b.sum() / n_b  # success rate group_b

        stat_parity[i] = sr_a - sr_b

    return np.squeeze(stat_parity)[()]




[docs]
def no_disparate_impact_level(group_a, group_b, y_pred):
    """No disparate impact level

    This function computes the maximum score such that thresholding at that score\
    does not allow adverse impact.

    Interpretation
    --------------

    Parameters
    ----------
    group_a : array-like
        Group membership vector (binary)
    group_b : array-like
        Group membership vector (binary)
    y_pred : array-like
        Predictions vector (regression)

    Returns
    -------
    float
        No disparate impact level

    Examples
    --------
    >>> import numpy as np
    >>> from holisticai.bias.metrics import no_disparate_impact_level
    >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0])
    >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])
    >>> y_pred = np.array([0.8, 0.9, 0.2, 0.1, 0.7, 0.9, 0.8, 0.6, 0.3, 0.5])
    >>> no_disparate_impact_level(group_a, group_b, y_pred)
    0.7
    """
    # check and coerce inputs
    group_a, group_b, y_pred, _, _ = _regression_checks(group_a, group_b, y_pred, None, None)

    # grid
    q = np.linspace(1.0, 0.0, 100)

    # try different values
    for v in q:
        pred = np.quantile(y_pred, v)
        pass_members = y_pred >= pred
        a = sum(group_a * pass_members) / group_a.sum()
        b = sum(group_b * pass_members) / group_b.sum()
        # find score that does not allow adverse impact
        lower_bound = 0.8
        upper_bound = 1.2
        if b > 0 and lower_bound < a / b < upper_bound:
            break
    return pred




[docs]
def avg_score_diff(group_a, group_b, y_pred, q=0):
    """Average Score Difference

    This function computes the difference in average scores between\
    group_a and group_b.

    If q is a vector, this function returns a vector with the\
    respective result for each given quantile in q.

    Interpretation
    --------------
    A value of 0 is desired. Negative values indicate the group_a\
    has lower average score, so bias against group_a. Positive values\
    indicate group_b has lower average score, so bias against group_b.\
    Scale is relative to task.

    Parameters
    ----------
    group_a : array-like
        Group membership vector (binary)
    group_b : array-like
        Group membership vector (binary)
    y_pred : array-like
        Predictions vector (regression)
    q : float, array-like, optional
        quantile of predictions considered, default=0.

    Returns
    -------
    float
        Average Score Spread

    Notes
    -----
    :math:`\texttt{AVgroup_a - AVgroup_b}`

    Examples
    -------
    >>> import numpy as np
    >>> from holisticai.bias.metrics import avg_score_diff
    >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0])
    >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])
    >>> y_pred = np.array([0.8, 0.9, 0.2, 0.1, 0.7, 0.9, 0.8, 0.6, 0.3, 0.5])
    >>> avg_score_diff(group_a, group_b, y_pred)
    -0.13333333333333341
    """
    # check and coerce inputs
    group_a, group_b, y_pred, _, q = _regression_checks(group_a, group_b, y_pred, None, q)

    y_pred_list, group_a_list, group_b_list = slice_arrays_by_quantile(q, y_pred, [y_pred, group_a, group_b])

    n_q = len(group_a_list)
    avg_score_diff = np.zeros(n_q)

    for i in range(n_q):
        group_a, group_b = group_a_list[i], group_b_list[i]
        _check_non_empty(group_a, name="group_a", quantile=q[i])
        _check_non_empty(group_b, name="group_b", quantile=q[i])

        y_pred = y_pred_list[i]
        avgroup_a = y_pred[group_a == 1].mean()
        avgroup_b = y_pred[group_b == 1].mean()

        avg_score_diff[i] = avgroup_a - avgroup_b

    return np.squeeze(avg_score_diff)[()]



def avg_score_ratio(group_a, group_b, y_pred, q=0):
    """Average Score Ratio

    This function computes the ratio in average scores between\
    group_a and group_b.

    If q is a vector, this function returns a vector with the\
    respective result for each given quantile in q.

    Interpretation
    --------------
    A value of 1 is desired. Values below 1 indicate the group_a\
    has lower average score, so bias against group_a. Values above 1\
    indicate group_b has lower average score, so bias against group_b.\
    (0.8, 1.25) range is considered fair.

    Parameters
    ----------
    group_a : array-like
        Group membership vector (binary)
    group_b : array-like
        Group membership vector (binary)
    y_pred : array-like
        Predictions vector (regression)
    q : float, array-like, optional
        quantile of predictions considered, default=0.

    Returns
    -------
    float
        Average Score Ratio

    Notes
    -----
    :math:`\texttt{AVgroup_a / AVgroup_b}`

    Examples
    -------
    >>> import numpy as np
    >>> from holisticai.bias.metrics import avg_score_ratio
    >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0])
    >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])
    >>> y_pred = np.array([0.8, 0.9, 0.2, 0.1, 0.7, 0.9, 0.8, 0.6, 0.3, 0.5])
    >>> avg_score_ratio(group_a, group_b, y_pred)
    0.7894736842
    """
    # check and coerce inputs
    group_a, group_b, y_pred, _, q = _regression_checks(group_a, group_b, y_pred, None, q)

    y_pred_list, group_a_list, group_b_list = slice_arrays_by_quantile(q, y_pred, [y_pred, group_a, group_b])

    n_q = len(group_a_list)
    avg_score_ratio = np.zeros(n_q)

    for i in range(n_q):
        group_a, group_b = group_a_list[i], group_b_list[i]
        _check_non_empty(group_a, name="group_a", quantile=q[i])
        _check_non_empty(group_b, name="group_b", quantile=q[i])

        y_pred = y_pred_list[i]
        avgroup_a = y_pred[group_a == 1].mean()
        avgroup_b = y_pred[group_b == 1].mean()

        avg_score_ratio[i] = avgroup_a / avgroup_b

    return np.squeeze(avg_score_ratio)[()]



[docs]
def zscore_diff(group_a, group_b, y_pred, q=0):
    """ZScore Difference

    This function computes the spread in Zscores between\
    group_a and group_b. The Zscore is a normalised\
    version of Disparate Impact.

    If q is a vector, this function returns a vector with the\
    respective result for each given quantile in q.

    Interpretation
    --------------
    A value of 0 is desired. The Zscore will approximate the number\
    of standard deviations away from the mean. In particular values that\
    exceed 2 are statistically significant with 95% probability.

    Parameters
    ----------
    group_a : array-like
        Group membership vector (binary)
    group_b : array-like
        Group membership vector (binary)
    y_pred : array-like
        Predictions vector (regression)
    q : float, array-like, optional
        quantile of predictions considered, default=0.

    Returns
    -------
    float
        ZScore Difference

    Notes
    -----
    :math:`\frac{\texttt{AVgroup_a} - \texttt{AVgroup_b}}{\texttt{STD_pool}}`

    Examples
    --------
    >>> import numpy as np
    >>> from holisticai.bias.metrics import zscore_diff
    >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0])
    >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])
    >>> y_pred = np.array([0.8, 0.9, 0.2, 0.1, 0.2, 0.9, 0.3, 0.6, 0.3, 0.5])
    >>> zscore_diff(group_a, group_b, y_pred)
    0.1166919931983158
    """
    # check and coerce inputs
    group_a, group_b, y_pred, _, q = _regression_checks(group_a, group_b, y_pred, None, q)

    y_pred_list, group_a_list, group_b_list = slice_arrays_by_quantile(q, y_pred, [y_pred, group_a, group_b])

    n_q = len(group_a_list)
    zscore_diff = np.zeros(n_q)

    for i in range(n_q):
        group_a, group_b = group_a_list[i], group_b_list[i]
        _check_non_empty(group_a, name="group_a", quantile=q[i])
        _check_non_empty(group_b, name="group_b", quantile=q[i])

        # Get means
        y_pred = y_pred_list[i]
        av_group_a = y_pred[group_a == 1].mean()
        av_group_b = y_pred[group_b == 1].mean()

        # Get n_a and n_b
        n_a = group_a.sum()
        n_b = group_b.sum()

        # STD pooled
        std_pool = np.sqrt(
            ((n_a - 1) * y_pred[group_a == 1].std() ** 2.0 + (n_b - 1) * y_pred[group_b == 1].std() ** 2.0)
            / (n_b + n_a - 2)
        )

        zscore_diff[i] = (av_group_a - av_group_b) / std_pool

    return np.squeeze(zscore_diff)[()]




[docs]
def statistical_parity_auc(group_a, group_b, y_pred):
    """Statistical parity (AUC)

    This function computes the area under the statistical parity\
    versus threshold curve.

    Interpretation
    --------------
    A value of 0 is desired. Values below 0.075 are considered\
    acceptable.

    Parameters
    ----------
    group_a : array-like
        Group membership vector (binary)
    group_b : array-like
        Group membership vector (binary)
    y_pred : array-like
        Predictions vector (regression)

    Returns
    -------
    float
        statistical parity (AUC)

    Examples
    --------
    >>> import numpy as np
    >>> from holisticai.bias.metrics import statistical_parity_auc
    >>> group_a = np.array([1] * 50 + [0] * 50)
    >>> group_b = np.array([0] * 50 + [1] * 50)
    >>> y_pred = np.concatenate((np.linspace(-1, 1, 50), np.linspace(-1, 1, 50) ** 3))
    >>> statistical_parity_auc(group_a, group_b, y_pred)
    0.12106666666666668
    """
    # check and coerce inputs
    group_a, group_b, y_pred, _, _ = _regression_checks(group_a, group_b, y_pred, None, None)

    # thresholds
    thresh = np.linspace(1, 0, 150)
    pass_value = np.quantile(y_pred, thresh)
    y_binary = y_pred.reshape(-1, 1) >= pass_value.reshape(1, -1)
    pass_a = y_binary[group_a == 1].sum(axis=0) / group_a.sum()
    pass_b = y_binary[group_b == 1].sum(axis=0) / group_b.sum()
    di_arr = np.abs(pass_a - pass_b)

    # AUC
    return np.sum(di_arr * np.array([1 / 150] * 150))



def _weighed_statistical_parity_auc(group_a, group_b, y_pred):
    """Weighed Statistical parity (AUC)

    This function computes the area under the statistical\
    parity versus threshold curve, weighed by the 2t distribution.

    Interpretation
    --------------
    A value of 0 is desired. Values below 0.1 are considered\
    acceptable.

    Parameters
    ----------
    group_a : numpy array
        Group membership vector (binary)
    group_b : array-like
        Group membership vector (binary)
    y_pred : array-like
        Predictions vector (regression)

    Returns
    -------
    float
        Weighed statistical parity (AUC)

    Examples
    --------
    >>> import numpy as np
    >>> from holisticai.bias.metrics import _weighed_statistical_parity_auc
    >>> group_a = np.array([1] * 50 + [0] * 50)
    >>> group_b = np.array([0] * 50 + [1] * 50)
    >>> y_pred = np.concatenate((np.linspace(-1, 1, 50), np.linspace(-1, 1, 50) ** 3))
    >>> _weighed_statistical_parity_auc(group_a, group_b, y_pred)
    0.12106666666666666
    """
    # check and coerce inputs
    group_a, group_b, y_pred, _, _ = _regression_checks(group_a, group_b, y_pred, None, None)

    # thresholds
    thresh = np.linspace(1, 0, 150)
    pass_value = np.quantile(y_pred, thresh)
    y_binary = y_pred.reshape(-1, 1) >= pass_value.reshape(1, -1)
    pass_a = y_binary[group_a == 1].sum(axis=0) / group_a.sum()
    pass_b = y_binary[group_b == 1].sum(axis=0) / group_b.sum()
    di_arr = np.abs(pass_a - pass_b)
    differentials = np.linspace(2, 0, 150)

    # Weighed AUC
    return np.sum(di_arr * differentials / 150)



[docs]
def max_statistical_parity(group_a, group_b, y_pred):
    """Max absolute statistical parity

    This function computes the maximum over all thresholds of\
    the absolute statistical parity between group_a and group_b.

    Interpretation
    --------------
    A value of 0 is desired. Values below 0.1 in absolute value are\
    considered acceptable.

    Parameters
    ----------
    group_a : array-like
        Group membership vector (binary)
    group_b : array-like
        Group membership vector (binary)
    y_pred : array-like
        Predictions vector (regression)

    Returns
    -------
    float
        max absolute statistical parity

    Examples
    --------
    >>> import numpy as np
    >>> from holisticai.bias.metrics import max_statistical_parity
    >>> group_a = np.array([1] * 50 + [0] * 50)
    >>> group_b = np.array([0] * 50 + [1] * 50)
    >>> y_pred = np.concatenate((np.linspace(-1, 1, 50), np.linspace(-1, 1, 50) ** 3))
    >>> max_statistical_parity(group_a, group_b, y_pred)
    0.20000000000000007
    """
    # check and coerce inputs
    group_a, group_b, y_pred, _, _ = _regression_checks(group_a, group_b, y_pred, None, None)

    # thresholds
    thresh = np.linspace(1, 0, 150)
    pass_value = np.quantile(y_pred, thresh)
    y_binary = y_pred.reshape(-1, 1) >= pass_value.reshape(1, -1)
    pass_a = y_binary[group_a == 1].sum(axis=0) / group_a.sum()
    pass_b = y_binary[group_b == 1].sum(axis=0) / group_b.sum()
    di_arr = np.abs(pass_a - pass_b)

    # MAX
    return np.max(di_arr)




[docs]
def correlation_diff(group_a, group_b, y_pred, y_true, q=0):
    """Correlation difference

    This function computes the difference in correlation between predictions\
    and targets for group_a and group_b.

    If q is a vector, this function returns a vector with the\
    respective result for each given quantile in q.

    Interpretation
    --------------
    A value of 0 is desired. This metric ranges between -2 and 2,\
    with -1 indicating strong bias against group_a, and +1\
    indicating strong bias against group_b.

    Parameters
    ----------
    group_a : array-like
        Group membership vector (binary)
    group_b : array-like
        Group membership vector (binary)
    y_pred : array-like
        Predictions vector (regression)
    y_true : numpy array
        Target vector (regression)
    q : float, array-like, optional
        quantile of predictions considered, default=0.

    Returns
    -------
    float
        correlation difference

    Notes
    -----
    :math:`CV_a - CV_b`

    Examples
    --------
    >>> import numpy as np
    >>> from holisticai.bias.metrics import correlation_diff
    >>> group_a = np.array([1] * 50 + [0] * 50)
    >>> group_b = np.array([0] * 50 + [1] * 50)
    >>> y_pred = np.concatenate((np.linspace(-1, 1, 50), np.linspace(-1, 1, 50) ** 3))
    >>> y_true = np.concatenate((np.linspace(-1, 1, 50), np.linspace(-1, 1, 50) ** 2))
    >>> correlation_diff(group_a, group_b, y_pred, y_true, q=0)
    1.0000000000000002
    """
    # check and coerce inputs
    group_a, group_b, y_pred, _, q = _regression_checks(group_a, group_b, y_pred, None, q)

    y_pred_list, y_true_list, group_a_list, group_b_list = slice_arrays_by_quantile(
        q, y_pred, [y_pred, y_true, group_a, group_b]
    )

    n_q = len(group_a_list)
    corr_diff = np.zeros(n_q)

    for i in range(n_q):
        group_a, group_b = group_a_list[i], group_b_list[i]
        _check_non_empty(group_a, name="group_a", quantile=q[i])
        _check_non_empty(group_b, name="group_b", quantile=q[i])

        # Compute Pearson correlations
        y_pred = y_pred_list[i]
        y_true = y_true_list[i]
        cv_a = np.corrcoef(y_pred[group_a == 1], y_true[group_a == 1])[1, 0]
        cv_b = np.corrcoef(y_pred[group_b == 1], y_true[group_b == 1])[1, 0]

        corr_diff[i] = cv_a - cv_b

    return np.squeeze(corr_diff)[()]




[docs]
def rmse_ratio(group_a, group_b, y_pred, y_true, q=0):
    """RMSE ratio

    This function computes the ratio of the RMSE for group_a and group_b.

    If q is a vector, this function returns a vector with the\
    respective result for each given quantile in q.

    Interpretation
    --------------
    A value of 1 is desired. Lower values show bias against group_a.
    Higher values show bias against group_b.

    Parameters
    ----------
    group_a : array-like
        Group membership vector (binary)
    group_b : array-like
        Group membership vector (binary)
    y_pred : array-like
        Predictions vector (regression)
    y_true : numpy array
        Target vector (regression)
    q : float, array-like, optional
        quantile of predictions considered, default=0.

    Returns
    -------
    float
        RMSE ratio

    Notes
    -----
    :math:`\frac{RMSE_a}{RMSE_b}`

    Examples
    --------
    >>> import numpy as np
    >>> from holisticai.bias.metrics import rmse_ratio
    >>> group_a = np.array([1] * 50 + [0] * 50)
    >>> group_b = np.array([0] * 50 + [1] * 50)
    >>> y_pred = np.concatenate((np.linspace(-1, 1, 50), np.linspace(-1, 1, 50)))
    >>> y_true = np.concatenate(
    ...     (np.linspace(-1, 1, 50) ** 2, np.linspace(-1, 1, 50) ** 3)
    ... )
    >>> rmse_ratio(group_a, group_b, y_pred, y_true)
    2.7471209467641367
    """
    # check and coerce inputs
    group_a, group_b, y_pred, y_true, q = _regression_checks(group_a, group_b, y_pred, y_true, q)

    y_pred_list, y_true_list, group_a_list, group_b_list = slice_arrays_by_quantile(
        q, y_pred, [y_pred, y_true, group_a, group_b]
    )

    n_q = len(group_a_list)
    rmse_ratio = np.zeros(n_q)

    for i in range(n_q):
        group_a, group_b = group_a_list[i], group_b_list[i]
        _check_non_empty(group_a, name="group_a", quantile=q[i])
        _check_non_empty(group_b, name="group_b", quantile=q[i])

        # Compute RMSE for both groups
        y_pred = y_pred_list[i]
        y_true = y_true_list[i]
        rmse_a = np.sqrt(((y_true[group_a == 1] - y_pred[group_a == 1]) ** 2.0).mean())
        rmse_b = np.sqrt(((y_true[group_b == 1] - y_pred[group_b == 1]) ** 2.0).mean())

        rmse_ratio[i] = rmse_a / rmse_b

    return np.squeeze(rmse_ratio)[()]




[docs]
def mae_ratio(group_a, group_b, y_pred, y_true, q=0):
    """MAE ratio

    This function computes the ratio of the MAE for group_a and group_b.

    If q is a vector, this function returns a vector with the\
    respective result for each given quantile in q.

    Interpretation
    --------------
    A value of 1 is desired. Lower values show bias against group_a.
    Higher values show bias against group_b.

    Parameters
    ----------
    group_a : array-like
        Group membership vector (binary)
    group_b : array-like
        Group membership vector (binary)
    y_pred : array-like
        Predictions vector (regression)
    y_true : numpy array
        Target vector (regression)
    q : float, array-like, optional
        quantile of predictions considered, default=0.

    Returns
    -------
    float
        MAE ratio

    Notes
    -----
    :math:`\frac{MAE_a}{MAE_b}`

    Examples
    --------
    >>> import numpy as np
    >>> from holisticai.bias.metrics import mae_ratio
    >>> group_a = np.array([1] * 50 + [0] * 50)
    >>> group_b = np.array([0] * 50 + [1] * 50)
    >>> y_pred = np.concatenate((np.linspace(-1, 1, 50), np.linspace(-1, 1, 50)))
    >>> y_true = np.concatenate(
    ...     (np.linspace(-1, 1, 50) ** 2, np.linspace(-1, 1, 50) ** 3)
    ... )
    >>> mae_ratio(group_a, group_b, y_pred, y_true)
    2.084201388888889
    """
    # check and coerce inputs
    group_a, group_b, y_pred, y_true, q = _regression_checks(group_a, group_b, y_pred, y_true, q)

    y_pred_list, y_true_list, group_a_list, group_b_list = slice_arrays_by_quantile(
        q, y_pred, [y_pred, y_true, group_a, group_b]
    )

    n_q = len(group_a_list)
    mae_ratio = np.zeros(n_q)

    for i in range(n_q):
        group_a, group_b = group_a_list[i], group_b_list[i]
        _check_non_empty(group_a, name="group_a", quantile=q[i])
        _check_non_empty(group_b, name="group_b", quantile=q[i])

        # Compute MAE for both groups
        y_pred = y_pred_list[i]
        y_true = y_true_list[i]
        mae_a = (np.abs(y_true[group_a == 1] - y_pred[group_a == 1])).mean()
        mae_b = (np.abs(y_true[group_b == 1] - y_pred[group_b == 1])).mean()

        mae_ratio[i] = mae_a / mae_b

    return np.squeeze(mae_ratio)[()]




[docs]
def regression_bias_metrics(group_a, group_b, y_pred, y_true=None, metric_type="group"):
    """Regression bias metrics batch computation

    This function computes all the relevant regression bias metrics,\
    and displays them as a pandas dataframe.

    Parameters
    ----------
    group_a : array-like
        Group membership vector (binary)
    group_b : array-like
        Group membership vector (binary)
    y_pred : array-like
        Predictions vector (regression)
    y_true : numpy array, optional
        Target vector (regression)
    metric_type : str, optional
        Specifies which metrics we compute : individual, group, equal_outcome, equal_opportunity, both
    Returns
    -------
    pandas DataFrame
        Metrics | Values | Reference
    """

    individual_metrics = {
        "Jain Index": jain_index,
    }

    equal_outcome_metrics = {
        "Disparate Impact Q90": disparate_impact_regression,
        "Disparate Impact Q80": disparate_impact_regression,
        "Disparate Impact Q50": disparate_impact_regression,
        "Statistical Parity Q50": statistical_parity_regression,
        "No Disparate Impact Level": no_disparate_impact_level,
        "Average Score Difference": avg_score_diff,
        "Average Score Ratio": avg_score_ratio,
        "Z Score Difference": zscore_diff,
        "Max Statistical Parity": max_statistical_parity,
        "Statistical Parity AUC": statistical_parity_auc,
    }

    equal_opportunity_metrics = {
        "RMSE Ratio": rmse_ratio,
        "RMSE Ratio Q80": rmse_ratio,
        "MAE Ratio": mae_ratio,
        "MAE Ratio Q80": mae_ratio,
        "Correlation Difference": correlation_diff,
    }

    ref_vals = {
        "Disparate Impact Q90": 1,
        "Disparate Impact Q80": 1,
        "Disparate Impact Q50": 1,
        "Statistical Parity Q50": 0,
        "No Disparate Impact Level": "-",
        "Average Score Difference": 0,
        "Average Score Ratio": 1,
        "Z Score Difference": 0,
        "Max Statistical Parity": 0,
        "Statistical Parity AUC": 0,
        "RMSE Ratio": 1,
        "RMSE Ratio Q80": 1,
        "MAE Ratio": 1,
        "MAE Ratio Q80": 1,
        "Correlation Difference": 0,
        "Jain Index": 1,
    }

    hypers = {
        "Disparate Impact Q90": {"q": 0.9},
        "Disparate Impact Q80": {"q": 0.8},
        "Disparate Impact Q50": {"q": 0.5},
        "Statistical Parity Q50": {"q": 0.8},
        "No Disparate Impact Level": {},
        "Average Score Difference": {},
        "Average Score Ratio": {},
        "Z Score Difference": {},
        "Max Statistical Parity": {},
        "Statistical Parity AUC": {},
        "RMSE Ratio": {},
        "RMSE Ratio Q80": {"q": 0.8},
        "MAE Ratio": {},
        "MAE Ratio Q80": {"q": 0.8},
        "Correlation Difference": {},
    }

    y_pred = np.squeeze(y_pred)

    if y_true is not None:
        y_true = np.squeeze(y_true)

    has_group_parameters = all((p is not None) for p in [group_a, group_b, y_pred])

    if has_group_parameters:
        out_metrics = [
            [pf, fn(group_a, group_b, y_pred, **hypers[pf]), ref_vals[pf]] for pf, fn in equal_outcome_metrics.items()
        ]
        if y_true is not None:
            opp_metrics = [
                [
                    pf,
                    fn(group_a, group_b, y_pred, y_true, **hypers[pf]),
                    ref_vals[pf],
                ]
                for pf, fn in equal_opportunity_metrics.items()
            ]

    if metric_type == "individual":
        indv_metrics = []
        if y_pred is not None and y_true is not None:
            indv_metrics += [[pf, fn(y_pred, y_true), ref_vals[pf]] for pf, fn in individual_metrics.items()]
        else:
            # in case of missing y_pred or y_true
            msg = "y_pred and y_true must be provided for individual metrics"
            raise ValueError(msg)

    if metric_type in ["group", "both"]:
        if metric_type == "both":
            # TODO: remove both for next version
            warnings.warn(
                "`both` option will be depreciated in the next versions, use group", DeprecationWarning, stacklevel=2
            )

        metrics = out_metrics + opp_metrics
        return pd.DataFrame(metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric")

    if metric_type == "equal_outcome":
        return pd.DataFrame(out_metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric")

    if metric_type == "equal_opportunity":
        return pd.DataFrame(opp_metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric")

    if metric_type == "individual":
        return pd.DataFrame(indv_metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric")

    msg = "metric_type is not one of : both, equal_outcome, equal_opportunity"
    raise ValueError(msg)



#### Individual metrics


def jain_index(y_pred: np.ndarray, y_true: np.ndarray) -> float:
    """The Jain index (Fairness index)

    The Jain index is an index proposed for resources allocation that measures the "equality" of user allocation [1].
    For our purposes, from the point of view of fairness, it measures the equality of the error distributed in the\
    model outcomes. Empirically, we could say that a model with a Jain index of 1 is a model that distributes the error\
    equally among all the samples.

    Please, use this metric with caution, as it is not a metric that has been proposed for fairness in machine learning\
    models, but for resources allocation.

    Interpretation
    --------------
    From the point of view of fairness, it measures the equality of the error distributed among the samples. A fairer\
    model will have a Jain index closer to 1.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        The true target values.
    y_pred : array-like of shape (n_samples,)
        The predicted target values.

    Returns
    -------
    float
        The Jain index of the input array.

    References
    ----------
    .. [1] Jain, R. (1984). A Quantitative Measure of Fairness and Discrimination for Resource Allocation in Shared
    Computer Systems. Eastern Research Laboratory, Digital Equipment Corporation.

    Examples
    -------
    >>> import numpy as np
    >>> from holisticai.bias.metrics import jain_index
    >>> y_true = np.array([1, 2, 3, 4, 5])
    >>> y_pred = np.array([1, 2, 3, 4, 4])
    >>> jain_index(y_pred, y_true)
    0.2
    """
    error = np.abs(y_true - y_pred)
    jain = ((error.sum()) ** 2) / (len(error) * (error**2).sum())
    if np.isnan(jain):
        return 1.0
    return jain