Source code for holisticai.bias.plots._report

# base imports
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# utils
from holisticai.utils import get_colors

# range metrics
RANGE_METRICS_REGRESSION = {
    "RMSE Ratio": 0.2,
    "RMSE Ratio Q80": 0.2,
    "MAE Ratio": 0.2,
    "MAE Ratio Q80": 0.2,
    "Correlation Difference": 0.05,
    "Disparate Impact Q90": 0.05,
    "Disparate Impact Q80": 0.05,
    "Disparate Impact Q50": 0.05,
    "Statistical Parity Q50": 0.05,
    "Average Score Ratio": 0.05,
    "Average Score Difference": 0.1,
    "Z Score Difference": 0.1,
    "Max Statistical Parity": 0.1,
    "Statistical Parity AUC": 0.1,
}

RANGE_METRICS_CLASSIFICATION = {
    "Statistical Parity": 0.05,
    "Disparate Impact": 0.05,
    "Four Fifths Rule": 0.05,
    "Cohen D": 0.05,
    "2SD Rule": 0.05,
    "Equality of Opportunity Difference": 0.05,
    "False Positive Rate Difference": 0.05,
    "Average Odds Difference": 0.05,
    "Accuracy Difference": 0.05,
}

RANGE_METRICS_CLUSTERING = {
    "Cluster Balance": 0.05,
    "Minimum Cluster Ratio": 0.05,
    "Cluster Distribution Total Variation": 0.05,
    "Cluster Distribution KL Div": 0.05,
    "Social Fairness Ratio": 0.05,
    "Silhouette Difference": 0.05,
}


RANGE_METRICS = {
    "binary_classification": RANGE_METRICS_CLASSIFICATION,
    "regression": RANGE_METRICS_REGRESSION,
    "clustering": RANGE_METRICS_CLUSTERING,
}



[docs]
def bias_metrics_report(
    model_type: str,
    table_metrics: pd.DataFrame,
    table_metrics_mitigated: pd.DataFrame = None,
):
    """
    Plot bias report for different model types.

    Parameters
    ----------
    model_type : str
        Type of model: 'binary_classification', 'regression', 'clustering'
    table_metrics : pandas.DataFrame
        Dataframe containing bias metrics.
    table_metrics_mitigated : bool, optional
        Whether the bias metrics are for mitigated model or not, by default False
    """
    metric_names = list(table_metrics.index)

    if table_metrics_mitigated is None:
        metrics_biased = table_metrics.copy()
        columns = ["Baseline", "Reference"]
        columns_plot = ["Baseline"]
        metrics_biased.columns = columns
        fill_range = [-1, 1]

    else:
        metrics_biased = pd.concat(
            [table_metrics["Value"], table_metrics_mitigated[["Value", "Reference"]]],
            axis=1,
        )
        columns = ["Baseline", "Mitigator", "Reference"]
        columns_plot = ["Baseline", "Mitigator"]
        metrics_biased.columns = columns
        fill_range = [-0.5, 1.5]

    cols = 4
    rows = len(metric_names) // cols
    if len(metric_names) % cols != 0:
        rows += 1

    threshold = 3
    fig_size = (12, 7) if rows >= threshold else (12, 4)

    sns.set_style("darkgrid")
    fig, axes = plt.subplots(ncols=cols, nrows=rows, figsize=fig_size)

    for i, name in enumerate(metric_names):
        metric_data = metrics_biased[metrics_biased.index == name]
        row, col = divmod(i, cols)
        sns.barplot(
            data=metric_data[columns_plot],
            palette=get_colors(2),
            ax=axes[row, col],
        )
        axes[row, col].set_title(name)
        axes[row, col].axhline(y=metric_data["Reference"].values[0], color="black", linestyle="--")
        axes[row, col].set_ylabel("Score")

        if i == len(metric_names) - 1 and i % cols != cols - 1:
            for j in range(i % cols + 1, cols):
                axes[row, j].axis("off")

        elif i == len(metric_names) - 1 and i % cols == cols - 1:
            for j in range(cols):
                axes[row + 1, j].axis("off")

        if name != "No Disparate Impact Level":
            axes[row, col].fill_between(
                fill_range,
                metric_data["Reference"].values[0] - RANGE_METRICS[model_type][name],
                metric_data["Reference"].values[0] + RANGE_METRICS[model_type][name],
                color="slategray",
                alpha=0.4,
            )

        if i == 0:
            axes[row, col].legend(
                [
                    plt.Line2D([0], [0], linestyle="--", color="black", lw=2, label="Reference"),
                    plt.Rectangle(
                        [0, 0],
                        1,
                        1,
                        fc="slategray",
                        alpha=0.4,
                        ec="None",
                        label="Range",
                    ),
                ],
                ["Reference", "Fair Area"],
                loc="upper left",
                bbox_to_anchor=(-1, 1.0),
            )

        plt.tight_layout()