Source code for holisticai.bias.plots._recommender

# Base Imports
import numpy as np
import seaborn as sns

# utils
from holisticai.utils import get_colors, mat_to_binary, normalize_tensor
from holisticai.utils._validation import _recommender_checks
from matplotlib import pyplot as plt


[docs] def long_tail_plot(mat_pred, top=None, thresh=0.5, normalize=False, ax=None, size=None, title=None): """ Long Tail Plot. Description ----------- This function plots the counts in the predictions for all items. Parameters ---------- mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender score (binary or soft pred) for each user,item pair. top (optional) : int If not None, the number of items that are shown to each user. thresh (optional) : float Threshold indicating value at which a given item is shown to user (if top is None). normalize (optional) : bool If True, normalises the data matrix to [0,1] range. ax (optional) : matplotlib axes Pre-existing axes for the plot size (optional) : (int, int) Size of the figure title (optional) : str Title of the figure Returns ------- matplotlib ax """ # input checks and coerce _, _, mat_pred, _, top, thresh, normalize = _recommender_checks( group_a=None, group_b=None, mat_pred=mat_pred, mat_true=None, top=top, thresh=thresh, normalize=normalize, ) # normalize if normalize: mat_pred = normalize_tensor(mat_pred) # make binary (ie shown / not shown) binary_mat_pred = mat_to_binary(mat_pred, top=top, thresh=thresh) # item counts and sort item_counts = list(binary_mat_pred.sum(axis=0)) item_counts_sorted = sorted(item_counts, reverse=True) # setup sns.set_theme() if ax is None: fig, ax = plt.subplots(figsize=size) # chart ax.set_xlabel("Items (sorted by popularity)") ax.set_ylabel("Item Count") hai_color = get_colors(1, extended_colors=False) ax.plot( range(len(item_counts_sorted)), item_counts_sorted, linewidth=2, color=hai_color[0], ) if title is not None: ax.set_title(title) else: ax.set_title("Long Tail Plot") return ax
[docs] def exposure_diff_plot( group_a, group_b, mat_pred, top=None, thresh=0.5, normalize=False, ax=None, size=None, title=None, ): """ Exposure Difference plot. Description ----------- This function plots the difference in the exposure distributions between group_a and group_b. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender score (binary or soft pred) for each user,item pair. top (optional) : int If not None, the number of items that are shown to each user. thresh (optional) : float Threshold in (0,1) range indicating value at which a given item is shown to user (if top is None). normalize (optional) : bool If True, normalises the data matrix to [0,1] range. ax (optional) : matplotlib axes Pre-existing axes for the plot size (optional) : (int, int) Size of the figure title (optional) : str Title of the figure Returns ------- matplotlib ax """ # input checks and coerce group_a, group_b, mat_pred, _, top, thresh, normalize = _recommender_checks( group_a=group_a, group_b=group_b, mat_pred=mat_pred, mat_true=None, top=top, thresh=thresh, normalize=normalize, ) # normalise if normalize: mat_pred = normalize_tensor(mat_pred) # make binary (ie shown / not shown) binary_mat_pred = mat_to_binary(mat_pred, top=top, thresh=thresh) # Split by group mat_pred_a = binary_mat_pred[group_a == 1] mat_pred_b = binary_mat_pred[group_b == 1] # Get the item exposure distribution for min item_count_a = np.nansum(mat_pred_a, axis=0) item_dist_a = item_count_a / item_count_a.sum() # Get the item exposure distribution for maj item_count_b = np.nansum(mat_pred_b, axis=0) item_dist_b = item_count_b / item_count_b.sum() # take difference item_dist_diff = item_dist_a - item_dist_b # sort item_dist_diff_sorted = sorted(item_dist_diff, reverse=True) item_dist_diff_sorted = list(item_dist_diff_sorted) # setup sns.set_theme() if ax is None: fig, ax = plt.subplots(figsize=size) # chart ax.set_xlabel("Items (sorted by exposure difference)") ax.set_ylabel("$Exposure_a - Exposure_b$") hai_color = get_colors(1, extended_colors=False) ax.plot( range(len(item_dist_diff_sorted)), item_dist_diff_sorted, linewidth=2, color=hai_color[0], ) if title is not None: ax.set_title(title) else: ax.set_title("Exposure Difference Plot") return ax
[docs] def exposure_ratio_plot( group_a, group_b, mat_pred, top=None, thresh=0.5, normalize=False, ax=None, size=None, title=None, ): """ Exposure Ratio plot. Description ----------- This function plots the ratio in the exposure distributions between group_a and group_b. Parameters ---------- group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) mat_pred : matrix-like Matrix with shape (num_users, num_items). A recommender score (binary or soft pred) for each user,item pair. top (optional) : int If not None, the number of items that are shown to each user. thresh (optional) : float Threshold in (0,1) range indicating value at which a given item is shown to user (if top is None). normalize (optional) : bool If True, normalises the data matrix to [0,1] range. ax (optional) : matplotlib axes Pre-existing axes for the plot size (optional) : (int, int) Size of the figure title (optional) : str Title of the figure Returns ------- matplotlib ax """ # input checks and coerce group_a, group_b, mat_pred, _, top, thresh, normalize = _recommender_checks( group_a=group_a, group_b=group_b, mat_pred=mat_pred, mat_true=None, top=top, thresh=thresh, normalize=normalize, ) # normalise if normalize: mat_pred = normalize_tensor(mat_pred) # make binary (ie shown / not shown) binary_mat_pred = mat_to_binary(mat_pred, top=top, thresh=thresh) # Split by group mat_pred_a = binary_mat_pred[group_a == 1] mat_pred_b = binary_mat_pred[group_b == 1] # Get the item exposure distribution for min item_count_a = np.nansum(mat_pred_a, axis=0) item_dist_a = item_count_a / item_count_a.sum() # Get the item exposure distribution for maj item_count_b = np.nansum(mat_pred_b, axis=0) item_dist_b = item_count_b / item_count_b.sum() # take ratio item_dist_rat = item_dist_a / item_dist_b # sort by absolute value item_dist_rat_sorted = sorted(item_dist_rat, reverse=True) item_dist_rat_sorted = list(item_dist_rat_sorted) # setup sns.set_theme() if ax is None: fig, ax = plt.subplots(figsize=size) # chart ax.set_xlabel("Items (sorted by exposure ratio)") ax.set_ylabel("$Exposure_a/Exposure_b$") hai_color = get_colors(1, extended_colors=False) ax.plot( range(len(item_dist_rat_sorted)), item_dist_rat_sorted, linewidth=2, color=hai_color[0], ) if title is not None: ax.set_title(title) else: ax.set_title("Exposure Ratio Plot") return ax