Source code for holisticai.bias.metrics._recommender

# Base imports
import numpy as np
import pandas as pd

# utils
from holisticai.utils import mat_to_binary, normalize_tensor

# Recommender Efficacy Metrics
from holisticai.utils._recommender_tools import (
    avg_f1,
    avg_precision,
    avg_recall,
    entropy,
    recommender_mae,
    recommender_rmse,
)
from holisticai.utils._validation import _recommender_checks

# sklearn imports
from sklearn.metrics import mean_absolute_error


[docs] def aggregate_diversity(mat_pred, top=None, thresh=0.5, normalize=False): r"""Aggregate Diversity Given a matrix of scores, this function computes the recommended items for\ each user, selecting either the highest-scored items or those above an input\ threshold. It then returns the aggregate diversity: the proportion of recommended\ items out of all possible items. Interpretation -------------- A value of 1 is desired. We wish for a high proportion of items\ to be shown to avoid the 'rich get richer effect'. Parameters ---------- mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender\ score (binary or soft pred) for each user,item interaction. top : int, optional If not None, the number of items recommended to each user. thresh : float, optional Threshold indicating value at which a given item is shown to user (if top is None). normalize : bool, optional If True, normalises the data matrix to [0,1] range. Returns ------- float Aggregate Diversity Notes ----- :math:`\frac{|Items\; shown|}{|Items|}` References ---------- .. [1] `H Abdollahpouri and M Mansoury and R Burke and B Mobasher and E Malthouse (2021). User-centered Evaluation of Popularity Bias in Recommender Systems, ACM. <https://doi.org/10.1145%2F3450613.3456821>` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import aggregate_diversity >>> mat_pred = np.array([[0.9, 0.8, 0.4, 0.2], [0.7, 0.9, 0.1, 0.7], [0.3, 0.2, 0.3, 0.3], [0.2, 0.1, 0.7, 0.8], [0.8, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.9, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.7, 0.1, 0.2]]) >>> aggregate_diversity(mat_pred, top=None, thresh=0.8, normalize=True) 0.75 """ # input checks and coerce _, _, mat_pred, _, top, thresh, normalize = _recommender_checks( group_a=None, group_b=None, mat_pred=mat_pred, mat_true=None, top=top, thresh=thresh, normalize=normalize, ) # normalize scores if normalize: mat_pred = normalize_tensor(mat_pred) # Make matrix binary binary_mat_pred = mat_to_binary(mat_pred, top=top, thresh=thresh) # Count items by summing over users item_count = binary_mat_pred.sum(axis=0) # Proportion of all items shown return (item_count >= 1).sum() / len(item_count)
[docs] def gini_index(mat_pred, top=None, thresh=0.5, normalize=False): """GINI index Measures the inequality across the frequency distribution\ of the recommended items. Interpretation -------------- An algorithm that recommends each item the same number of\ times (uniform distribution) will have a Gini index of 0\ and the one with extreme inequality will have a Gini of 1. Parameters ---------- mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender\ score (binary or soft pred) for each user,item interaction. top : int, optional If not None, the number of items that are shown to each user. thresh : float, optional Threshold indicating value at which a given item is shown to user (if top is None). normalize : bool, optional If True, normalises the data matrix to [0,1] range. Returns ------- float GINI References ---------- .. [1] `M Mansoury, H Abdollahpouri, M Pechenizkiy, B Mobasher, R Burke (2020). FairMatch: A Graph-based Approach for Improving Aggregate Diversity in Recommender Systems, <https://doi.org/10.48550/arXiv.2005.01148>` .. [2] `Farzad Eskandanian, Bamshad Mobasher (2020). Using Stable Matching to Optimize the Balance between Accuracy and Diversity in Recommendation <https://doi.org/10.48550/arXiv.2006.03715>` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import gini_index >>> mat_pred = np.array([[0.9, 0.8, 0.4, 0.2], [0.7, 0.9, 0.1, 0.7], [0.3, 0.2, 0.3, 0.3], [0.2, 0.1, 0.7, 0.8], [0.8, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.9, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.7, 0.1, 0.2]]) >>> gini_index(mat_pred, top=2, thresh=None, normalize=False) 0.1333333333333333 """ # input check and coerce _, _, mat_pred, _, top, thresh, normalize = _recommender_checks( group_a=None, group_b=None, mat_pred=mat_pred, mat_true=None, top=top, thresh=thresh, normalize=normalize, ) # normalize score matrix if normalize: mat_pred = normalize_tensor(mat_pred) # Make matrix binary binary_mat_pred = mat_to_binary(mat_pred, top=top, thresh=thresh) # compute frequencies and sort them item_nums = binary_mat_pred.sum(axis=0) item_freqs = item_nums / item_nums.sum() item_freqs_s = np.sort(item_freqs) # compute gini sum num_items = mat_pred.shape[1] w = 2 * np.arange(num_items) - num_items + 1 return (w * item_freqs_s).sum() / (num_items - 1)
[docs] def exposure_entropy(mat_pred, top=None, thresh=0.5, normalize=False): r"""Exposure Entropy This function measures the entropy of the item exposure distribution. Interpretation -------------- A low entropy (close to 0) indicates high certainty as to which item\ will be shown. Higher entropies therefore ensure a more\ homogeneous distribution. Scale is relative to number of items. Parameters ---------- mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender\ score (binary or soft pred) for each user,item interaction. top : int, optional If not None, the number of items that are shown to each user. thresh : float, optional Threshold indicating value at which a given item is shown to user (if top is None). normalize : bool, optional If True, normalises the data matrix to [0,1] range. Returns ------- float Exposure Entropy Notes ----- :math:`-\sum_{k}{ p_k} \ln(p_k)` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import exposure_entropy >>> mat_pred = np.array([[0.9, 0.8, 0.4, 0.2], [0.7, 0.9, 0.1, 0.7], [0.3, 0.2, 0.3, 0.3], [0.2, 0.1, 0.7, 0.8], [0.8, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.9, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.7, 0.1, 0.2]]) >>> exposure_entropy(mat_pred, top=None, thresh=0.3, normalize=True) 1.3762266043445464 """ # input checks and coerce _, _, mat_pred, _, top, thresh, normalize = _recommender_checks( group_a=None, group_b=None, mat_pred=mat_pred, mat_true=None, top=top, thresh=thresh, normalize=normalize, ) # normalize score matrix if normalize: mat_pred = normalize_tensor(mat_pred) # Get the item exposures binary_mat_pred = mat_to_binary(mat_pred, top=top, thresh=thresh) item_exposures = binary_mat_pred.sum(axis=0) item_exposure_dist = item_exposures / item_exposures.sum() # Return entropy return entropy(item_exposure_dist)
[docs] def avg_recommendation_popularity(mat_pred, top=None, thresh=0.5, normalize=False): """Average Recommendation Popularity This function computes the average recommendation popularity\ of items over users. We define the recommendation popularity\ as the average amount of times an item is recommended. Interpretation -------------- A low value is desidered and suggests that items have been\ recommended equally across the population. Parameters ---------- mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender\ score (binary or soft pred) for each user,item interaction. top : int, optional If not None, the number of items that are shown to each user. thresh : float, optional Threshold indicating value at which a given item is shown to user (if top is None). normalize : bool, optional If True, normalises the data matrix to [0,1] range. Returns ------- float Average Recommendation Popularity References ---------- .. [1] `M Mansoury, H Abdollahpouri, M Pechenizkiy, B Mobasher, R Burke, E Malthouse (2020). User-centered Evaluation of Popularity Bias in Recommender Systems, <https://arxiv.org/pdf/2103.06364.pdf>` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import avg_recommendation_popularity >>> mat_pred = np.array([[0.9, 0.8, 0.4, 0.2], [0.7, 0.9, 0.1, 0.7], [0.3, 0.2, 0.3, 0.3], [0.2, 0.1, 0.7, 0.8], [0.8, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.9, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.7, 0.1, 0.2]]) >>> avg_recommendation_popularity(mat_pred, top=None, thresh=0.5, normalize=False) 5.037037037037036 """ # input checks and coerce _, _, mat_pred, _, top, thresh, normalize = _recommender_checks( group_a=None, group_b=None, mat_pred=mat_pred, mat_true=None, top=top, thresh=thresh, normalize=normalize, ) # normalize score matrix if normalize: mat_pred = normalize_tensor(mat_pred) # Make matrices binary binary_mat_pred = mat_to_binary(mat_pred, top=top, thresh=thresh) item_count = binary_mat_pred.sum(axis=0) val = (binary_mat_pred * item_count).sum(axis=1) / (binary_mat_pred.sum(axis=1)) return np.nanmean(val)
[docs] def mad_score(group_a, group_b, mat_pred, normalize=False): r"""Mean Absolute Deviation Difference in average score for group_a and group_b. Interpretation -------------- A large value of MAD indicates differential treatment of\ group a and group b. A positive value indicates that\ group a received higher scores on average, while\ a negative value indicates higher ratings for group b. Parameters ---------- group_a : array-like Group membership vector. group_b : array-like Group membership vector. mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender score (binary or soft pred) for each user,item interaction. normalize : bool, optional If True, normalises the data matrix to [0,1] range. Returns ------- float MAD Score Notes ----- :math:`\texttt{avg_group_a - avg_group_b}` References ---------- .. [1] `Ziwei Zhu, Xia Hu, and James Caverlee (2018). Fairness-Aware Tensor-Based Recommendation, <https://dl.acm.org/doi/pdf/10.1145/3269206.3271795>` .. [2] `Y Deldjoo, V W Anelli, H Zamani, A Bellogin, TDi, T D Noia (2021). A Flexible Framework for Evaluating User and Item Fairness in Recommender Systems, <https://link.springer.com/article/10.1007/s11257-020-09285-1>` .. [3] `Y Deldjooa, A Bellogin, T D Noiaa (2021). Explaining recommender systems fairness and accuracy through the lens of data characteristics, <https://www.sciencedirect.com/science/article/pii/S0306457321001503>` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import mad_score >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> mat_pred = np.array([[0.9, 0.8, 0.4, 0.2], [0.7, 0.9, 0.1, 0.7], [0.3, 0.2, 0.3, 0.3], [0.2, 0.1, 0.7, 0.8], [0.8, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.9, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.7, 0.1, 0.2]]) >>> mad_score(group_a, group_b, mat_pred, normalize=False) 0.00833333333333336 """ # input checks and coerce group_a, group_b, mat_pred, _, _, _, normalize = _recommender_checks( group_a=group_a, group_b=group_b, mat_pred=mat_pred, mat_true=None, top=None, thresh=None, normalize=normalize, ) # normalize if normalize: mat_pred = normalize_tensor(mat_pred) # Split by group mat_pred_a = mat_pred[group_a == 1] mat_pred_b = mat_pred[group_b == 1] # Get averages avg_a = np.nanmean(mat_pred_a) avg_b = np.nanmean(mat_pred_b) return avg_a - avg_b
[docs] def exposure_l1(group_a, group_b, mat_pred, top=None, thresh=0.5, normalize=False): """Exposure Total Variation This function computes the total variation norm between the group_a\ exposure distribution to the group_b exposure distribution. Interpretation -------------- A total variation divergence of 0 is desired, which occurs when the distributions\ are equal. The maximum value is 1 indicating the distributions are\ very far apart. Parameters ---------- group_a : array-like Group membership vector. group_b : array-like Group membership vector. mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender\ score (binary or soft pred) for each user,item interaction. top (optional) : int If not None, the number of items that are shown to each user. thresh (optional) : float Threshold indicating value at which a given item is shown to user (if top is None). normalize (optional) : bool If True, normalises the data matrix to [0,1] range. Returns ------- float Exposure Total Variation References ---------- .. [1] `T Giannakas, P Sermpezis, A Giovanidis, T Spyropoulos, G Arvanitakis (2021). Fairness in Network-Friendly Recommendations, <https://arxiv.org/pdf/2104.00959.pdf>` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import exposure_l1 >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> mat_pred = np.array([[0.9, 0.8, 0.4, 0.2], [0.7, 0.9, 0.1, 0.7], [0.3, 0.2, 0.3, 0.3], [0.2, 0.1, 0.7, 0.8], [0.8, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.9, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.7, 0.1, 0.2]]) >>> exposure_l1(group_a, group_b, mat_pred, top=1, thresh=None, normalize=False) 0.25 """ # input checks and coerce group_a, group_b, mat_pred, _, top, thresh, normalize = _recommender_checks( group_a=group_a, group_b=group_b, mat_pred=mat_pred, mat_true=None, top=top, thresh=thresh, normalize=normalize, ) # normalize score matrix if normalize: mat_pred = normalize_tensor(mat_pred) binary_mat_pred = mat_to_binary(mat_pred, top=top, thresh=thresh) # Split by group mat_pred_a = binary_mat_pred[group_a == 1] mat_pred_b = binary_mat_pred[group_b == 1] # Get the item exposure distribution for group_a item_count_a = mat_pred_a.sum(axis=0) item_dist_a = item_count_a / item_count_a.sum() # Get the item exposure distribution for group_b item_count_b = mat_pred_b.sum(axis=0) item_dist_b = item_count_b / item_count_b.sum() # Compute Total variation distance return 0.5 * mean_absolute_error(item_dist_a, item_dist_b) * len(item_dist_a)
[docs] def exposure_kl(group_a, group_b, mat_pred, top=None, thresh=0.5, normalize=False): """Exposure KL Divergence This function computes the KL divergence from the group_a\ exposure distribution to the group_b exposure distribution. Interpretation -------------- A KL divergence of 0 is desired, which occurs when the distributions\ are equal. Higher values of the KL divergence indicate difference\ in exposure distributions of group a and group b. Parameters ---------- group_a : array-like Group membership vector. group_b : array-like Group membership vector. mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender\ score (binary or soft pred) for each user,item interaction. top : int, optional If not None, the number of items that are shown to each user. thresh : float, optional Threshold indicating value at which a given item is shown to user (if top is None). normalize : bool, optional If True, normalises the data matrix to [0,1] range. Returns ------- float Exposure KL Divergence References ---------- .. [1] `A Dash, A Chakraborty, S Ghosh, A Mukherjee, K P. Gummadi (2021). When the Umpire is also a Player: Bias in Private Label Product Recommendations on E-commerce Marketplaces, <https://arxiv.org/pdf/2102.00141.pdf >` .. [2] `T Giannakas, P Sermpezis, A Giovanidis, T Spyropoulos, G Arvanitakis (2021). Fairness in Network-Friendly Recommendations, <https://arxiv.org/pdf/2102.00141.pdf >` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import exposure_kl >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> mat_pred = np.array([[0.9, 0.8, 0.4, 0.2], [0.7, 0.9, 0.1, 0.7], [0.3, 0.2, 0.3, 0.3], [0.2, 0.1, 0.7, 0.8], [0.8, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.9, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.7, 0.1, 0.2]]) >>> exposure_kl(group_a, group_b, mat_pred, top=1, thresh=None, normalize=False) 0.23217831296817806 """ # input checks and coerce group_a, group_b, mat_pred, _, top, thresh, normalize = _recommender_checks( group_a=group_a, group_b=group_b, mat_pred=mat_pred, mat_true=None, top=top, thresh=thresh, normalize=normalize, ) # normalize score matrix if normalize: mat_pred = normalize_tensor(mat_pred) # Split by group mat_pred_a = mat_pred[group_a == 1] mat_pred_b = mat_pred[group_b == 1] # Get the item exposure distribution for group_a binary_mat_pred_a = mat_to_binary(mat_pred_a, top=top, thresh=thresh) item_count_a = binary_mat_pred_a.sum(axis=0) item_dist_a = item_count_a / item_count_a.sum() # Get the item exposure distribution for group_b binary_mat_pred_b = mat_to_binary(mat_pred_b, top=top, thresh=thresh) item_count_b = binary_mat_pred_b.sum(axis=0) item_dist_b = item_count_b / item_count_b.sum() # Compute KL divergence between dists return entropy(item_dist_a, item_dist_b)
def _recommender_metric_ratio(metric, group_a, group_b, mat_pred, mat_true, top=None, thresh=0.5, normalize=False): """Metric ratio for recommender systems This function computes the ratio of a given metric on minority and majority group. Parameters ---------- metric : function Metric to compute group_a : array-like Group membership vector. group_b : array-like Group membership vector. mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender\ score (binary or soft pred) for each user,item interaction. mat_true : matrix-like Matrix with shape (num_users, num_items). A target score\ (binary or soft pred) for each user,item pair. top : int, optional If not None, the number of items that are shown to each user. thresh : float, optional Threshold indicating value at which a given item is shown to user (if top is None). normalize : bool, optional If True, normalises the data matrix to [0,1] range. Returns ------- float Ratio of metrics : Metric(min)/Metric(maj) """ # input checks and coerce group_a, group_b, mat_pred, mat_true, top, thresh, normalize = _recommender_checks( group_a=group_a, group_b=group_b, mat_pred=mat_pred, mat_true=mat_true, top=top, thresh=thresh, normalize=normalize, ) # if normalize if normalize: tens = np.stack((mat_pred, mat_true)) norm_tens = normalize_tensor(tens) mat_pred, mat_true = norm_tens # Split by group mat_pred_a = mat_pred[group_a == 1] mat_pred_b = mat_pred[group_b == 1] mat_true_a = mat_true[group_a == 1] mat_true_b = mat_true[group_b == 1] # compute metrics if top is None and thresh is None: metric_a = metric(mat_pred_a, mat_true_a) metric_b = metric(mat_pred_b, mat_true_b) else: # metrics that have top and thresh as input metric_a = metric(mat_pred_a, mat_true_a, top, thresh) metric_b = metric(mat_pred_b, mat_true_b, top, thresh) # ratio return metric_a / metric_b
[docs] def avg_precision_ratio(group_a, group_b, mat_pred, mat_true, top=None, thresh=0.5, normalize=False): r"""Average precision ratio This function computes the ratio of average precision (over users)\ on minority and majority group. Interpretation -------------- A value of 1 is desired. Lower values show bias against minority group.\ Higher values show bias against majority group. Parameters ---------- group_a : array-like Group membership vector. group_b : array-like Group membership vector. mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender\ score (binary or soft pred) for each user,item interaction. mat_true : matrix-like Matrix with shape (num_users, num_items). A target score\ (binary or soft pred) for each user,item pair. top : int, optional If not None, the number of items that are shown to each user. thresh : float, optional Threshold indicating value at which a given item is shown to user (if top is None). normalize : bool, optional If True, normalises the data matrix to [0,1] range. Returns ------- float Ratio of average precisions Notes ----- :math:`\frac{\texttt{AVg_precision_min}}{\texttt{AVg_precision_maj}}` References ---------- .. [1] `Yunqi Li, Hanxiong Chen, Zuohui Fu, Yingqiang Ge, Yongfeng Zhang (2021). User-oriented Fairness in Recommendation. <https://arxiv.org/pdf/2104.10671.pdf>` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import avg_precision_ratio >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> mat_pred = np.array([[0.9, 0.8, 0.4, 0.2], [0.7, 0.9, 0.1, 0.7], [0.3, 0.2, 0.3, 0.3], [0.2, 0.1, 0.7, 0.8], [0.8, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.9, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.7, 0.1, 0.2]]) >>> mat_true = np.array([[0.7, 0.8, 0.4, 0.2], [0.9, 0.9, 0.1, 0.2], [0.3, 0.8, 0.2, 0.6], [0.2, 0.1, 0.7, 0.8], [0.6, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.1, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.1, 0.1, 0.8]]) >>> avg_precision_ratio( ... group_a, group_b, mat_pred, mat_true, top=None, thresh=0.2, normalize=False ... ) 1.161290322580645 """ return _recommender_metric_ratio(avg_precision, group_a, group_b, mat_pred, mat_true, top, thresh, normalize)
[docs] def avg_recall_ratio(group_a, group_b, mat_pred, mat_true, top=None, thresh=0.5, normalize=False): r"""Average recall ratio This function computes the ratio of average recall (over users) on minority and majority group. Parameters ---------- group_a : array-like Group membership vector. group_b : array-like Group membership vector. mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender score (binary or soft pred) for each user,item interaction. mat_true : matrix-like Matrix with shape (num_users, num_items). A target score (binary or soft pred) for each user,item pair. top : int, optional If not None, the number of items that are shown to each user. thresh : float, optional Threshold indicating value at which a given item is shown to user (if top is None). normalize : bool, optional If True, normalises the data matrix to [0,1] range. Returns ------- float Ratio of average recalls Notes ----- :math:`\frac{\texttt{AVg_recall_min}}{\texttt{AVg_recall_maj}}` References ---------- .. [1] `Yunqi Li, Hanxiong Chen, Zuohui Fu, Yingqiang Ge, Yongfeng Zhang (2021). User-oriented Fairness in Recommendation. <https://arxiv.org/pdf/2104.10671.pdf>` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import avg_recall_ratio >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> mat_pred = np.array([[0.9, 0.8, 0.4, 0.2], [0.7, 0.9, 0.1, 0.7], [0.3, 0.2, 0.3, 0.3], [0.2, 0.1, 0.7, 0.8], [0.8, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.9, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.7, 0.1, 0.2]]) >>> mat_true = np.array([[0.7, 0.8, 0.4, 0.2], [0.9, 0.9, 0.1, 0.2], [0.3, 0.8, 0.2, 0.6], [0.2, 0.1, 0.7, 0.8], [0.6, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.1, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.1, 0.1, 0.8]]) >>> avg_recall_ratio( ... group_a, group_b, mat_pred, mat_true, top=2, thresh=0.5, normalize=False ... ) 1.0 """ return _recommender_metric_ratio(avg_recall, group_a, group_b, mat_pred, mat_true, top, thresh, normalize)
[docs] def avg_f1_ratio(group_a, group_b, mat_pred, mat_true, top=None, thresh=0.5, normalize=False): r"""Average f1 ratio This function computes the ratio of average f1 (over users) on minority and majority group. Interpretation -------------- A value of 1 is desired. Lower values show bias against minority group. Higher values show bias against majority group. Parameters ---------- group_a : array-like Group membership vector. group_b : array-like Group membership vector. mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender score (binary or soft pred) for each user,item interaction. mat_true : matrix-like Matrix with shape (num_users, num_items). A target score (binary or soft pred) for each user,item pair. top : int, optional If not None, the number of items that are shown to each user. thresh : float, optional Threshold indicating value at which a given item is shown to user (if top is None). normalize : bool, optional If True, normalises the data matrix to [0,1] range. Returns ------- float Ratio of average f1 Notes ----- :math:`\frac{\texttt{AVg_f1_min}}{\texttt{AVg_f1_maj}}` References ---------- .. [1] `Yunqi Li, Hanxiong Chen, Zuohui Fu, Yingqiang Ge, Yongfeng Zhang (2021). User-oriented Fairness in Recommendation. <https://arxiv.org/pdf/2104.10671.pdf>` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import avg_f1_ratio >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> mat_pred = np.array([[0.9, 0.8, 0.4, 0.2], [0.7, 0.9, 0.1, 0.7], [0.3, 0.2, 0.3, 0.3], [0.2, 0.1, 0.7, 0.8], [0.8, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.9, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.7, 0.1, 0.2]]) >>> mat_true = np.array([[0.7, 0.8, 0.4, 0.2], [0.9, 0.9, 0.1, 0.2], [0.3, 0.8, 0.2, 0.6], [0.2, 0.1, 0.7, 0.8], [0.6, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.1, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.1, 0.1, 0.8]]) >>> avg_f1_ratio( ... group_a, group_b, mat_pred, mat_true, top=None, thresh=0.5, normalize=False ... ) 0.9285714285714286 """ return _recommender_metric_ratio(avg_f1, group_a, group_b, mat_pred, mat_true, top, thresh, normalize)
[docs] def recommender_rmse_ratio(group_a, group_b, mat_pred, mat_true, normalize=False): """Recommender RMSE ratio This function computes the ratio of rmse between predictions and scores for group_a and group_b. Interpretation -------------- A value of 1 is desired. Lower values show bias against group_a. Higher values show bias against group_b. Parameters ---------- group_a : array-like Group membership vector. group_b : array-like Group membership vector. mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender score (binary or soft pred) for each user,item interaction. mat_true : matrix-like Matrix with shape (num_users, num_items). A target score (binary or soft pred) for each user,item pair. normalize : bool, optional If True, normalises the data matrix to [0,1] range. Returns ------- float Recommender RMSE ratio Notes ----- :math:`\frac{\texttt{AVg_rmse_min}}{\texttt{AVg_rmse_maj}}` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import recommender_rmse_ratio >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> mat_pred = np.array([[0.9, 0.8, 0.4, 0.2], [0.7, 0.9, 0.1, 0.7], [0.3, 0.2, 0.3, 0.3], [0.2, 0.1, 0.7, 0.8], [0.8, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.9, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.7, 0.1, 0.2]]) >>> mat_true = np.array([[0.7, 0.8, 0.4, 0.2], [0.9, 0.9, 0.1, 0.2], [0.3, 0.8, 0.2, 0.6], [0.2, 0.1, 0.7, 0.8], [0.6, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.1, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.1, 0.1, 0.8]]) >>> recommender_rmse_ratio(group_a, group_b, mat_pred, mat_true) 1.149630441384884 """ return _recommender_metric_ratio( recommender_rmse, group_a, group_b, mat_pred, mat_true, top=None, thresh=None, normalize=normalize, )
[docs] def recommender_mae_ratio(group_a, group_b, mat_pred, mat_true, normalize=False): """Recommender MAE ratio This function computes the ratio of mae between predictions and scores for group_a and group_b. Interpretation -------------- A value of 1 is desired. Lower values show bias against group_a. Higher values show bias against group_b. Parameters ---------- group_a : array-like Group membership vector. group_b : array-like Group membership vector. mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender score (binary or soft pred) for each user,item interaction. mat_true : matrix-like Matrix with shape (num_users, num_items). A target score (binary or soft pred) for each user,item pair. normalize : bool, optional If True, normalises the data matrix to [0,1] range. Returns ------- float Recommender MAE ratio Notes ----- :math:`\frac{\texttt{AVg_mae_min}}{\texttt{AVg_mae_maj}}` Examples -------- >>> import numpy as np >>> from holisticai.bias.metrics import recommender_mae_ratio >>> group_a = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) >>> group_b = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> mat_pred = np.array([[0.9, 0.8, 0.4, 0.2], [0.7, 0.9, 0.1, 0.7], [0.3, 0.2, 0.3, 0.3], [0.2, 0.1, 0.7, 0.8], [0.8, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.9, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.7, 0.1, 0.2]]) >>> mat_true = np.array([[0.7, 0.8, 0.4, 0.2], [0.9, 0.9, 0.1, 0.2], [0.3, 0.8, 0.2, 0.6], [0.2, 0.1, 0.7, 0.8], [0.6, 0.7, 0.9, 0.1], [1. , 0.9, 0.3, 0.6], [0.8, 0.1, 0.1, 0.1], [0.2, 0.3, 0.1, 0.5], [0.1, 0.2, 0.7, 0.7], [0.2, 0.1, 0.1, 0.8]]) >>> recommender_mae_ratio(group_a, group_b, mat_pred, mat_true) 1.2954545454545452 """ return _recommender_metric_ratio( recommender_mae, group_a, group_b, mat_pred, mat_true, top=None, thresh=None, normalize=normalize, )
[docs] def recommender_bias_metrics( group_a=None, group_b=None, mat_pred=None, mat_true=None, top=None, thresh=0.5, normalize=False, metric_type="equal_outcome", ): """Recommender bias metrics batch computation This function computes all the relevant recommender bias metrics, and displays them as a pandas dataframe. Parameters ---------- group_a : array-like Group membership vector. group_b : array-like Group membership vector. mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender score (binary or soft pred) for each user,item interaction. mat_true : matrix-like Matrix with shape (num_users, num_items). A target score (binary or soft pred) for each user,item pair. top : int, optional If not None, the number of items that are shown to each user. thresh : float, optional Threshold indicating value at which a given item is shown to user (if top is None). normalize : bool, optional If True, normalises the data matrix to [0,1] range. metric_type : str, optional Specifies which metrics we compute: 'all', 'item_based', 'equal_outcome' or 'equal_opportunity' Returns ------- pandas DataFrame Metrics | Values | Reference """ item_perform = { "Aggregate Diversity": aggregate_diversity, "GINI index": gini_index, "Exposure Distribution Entropy": exposure_entropy, "Average Recommendation Popularity": avg_recommendation_popularity, } group_perform = { "Mean Absolute Deviation": mad_score, } group2_perform = { "Exposure Total Variation": exposure_l1, "Exposure KL Divergence": exposure_kl, } group_true_perform = { "Average Precision Ratio": avg_precision_ratio, "Average Recall Ratio": avg_recall_ratio, "Average F1 Ratio": avg_f1_ratio, } group_true_reg_perform = { "Recommender RMSE Ratio": recommender_rmse_ratio, "Recommender MAE Ratio": recommender_mae_ratio, } ref_vals = { "Aggregate Diversity": 1, "GINI index": 0, "Exposure Distribution Entropy": "-", "Average Recommendation Popularity": "-", "Mean Absolute Deviation": 0, "Exposure Total Variation": 0, "Exposure KL Divergence": 0, "Average Precision Ratio": 1, "Average Recall Ratio": 1, "Average F1 Ratio": 1, "Recommender RMSE Ratio": 1, "Recommender MAE Ratio": 1, } item_metrics = [] out_metrics = [] opp_metrics = [] item_metrics += [[pf, fn(mat_pred, top, thresh, normalize), ref_vals[pf]] for pf, fn in item_perform.items()] if group_a is not None: out_metrics += [ [pf, fn(group_a, group_b, mat_pred, normalize), ref_vals[pf]] for pf, fn in group_perform.items() ] out_metrics += [ [pf, fn(group_a, group_b, mat_pred, top, thresh, normalize), ref_vals[pf]] for pf, fn in group2_perform.items() ] if mat_true is not None: opp_metrics += [ [ pf, fn(group_a, group_b, mat_pred, mat_true, top, thresh, normalize), ref_vals[pf], ] for pf, fn in group_true_perform.items() ] opp_metrics += [ [pf, fn(group_a, group_b, mat_pred, mat_true, normalize), ref_vals[pf]] for pf, fn in group_true_reg_perform.items() ] if metric_type == "all": metrics = item_metrics + out_metrics + opp_metrics return pd.DataFrame(metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric") if metric_type == "item_based": return pd.DataFrame(item_metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric") if metric_type == "equal_outcome": return pd.DataFrame(out_metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric") if metric_type == "equal_opportunity": return pd.DataFrame(opp_metrics, columns=["Metric", "Value", "Reference"]).set_index("Metric") msg = "metric_type is not one of : all, item_based, equal_outcome, equal_opportunity" raise ValueError(msg)