Source code for holisticai.bias.mitigation.preprocessing.reweighing

from __future__ import annotations

from typing import Optional

import numpy as np
import pandas as pd
from holisticai.utils.transformers.bias import BMPreprocessing as BMPre
from holisticai.utils.transformers.bias import SensitiveGroups


[docs] class Reweighing(BMPre): """ Reweighing preprocessing [1]_ weights the examples in each group-label combination to ensure fairness before\ classification. Examples -------- >>> from holisticai.bias.mitigation import Reweighing >>> mitigator = Reweighing() >>> train_data_transformed = mitigator.fit_transform(train_data, group_a, group_b) >>> test_data_transformed = mitigator.transform(test_data, group_a, group_b) References ---------- .. [1] Kamiran, Faisal, and Toon Calders. "Data preprocessing techniques for classification\ without discrimination." Knowledge and information systems 33.1 (2012): 1-33. """ def __init__(self): self._sensgroups = SensitiveGroups()
[docs] def fit( self, y: np.ndarray, group_a: np.ndarray, group_b: np.ndarray, sample_weight: Optional[np.ndarray] = None, ): """ Fit the Reweighing model to the data. This method calculates the sample weights to ensure that the \ data is fair with respect to the specified sensitive groups before classification. Parameters ---------- y : array-like Target vector group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) sample_weight : array-like, optional Samples weights vector. Default is None. Returns ------- Self """ params = self._load_data(y=y, sample_weight=sample_weight, group_a=group_a, group_b=group_b) y = params["y"] sample_weight = params["sample_weight"] group_a = params["group_a"] group_b = params["group_b"] group_lbs = self._sensgroups.fit_transform(np.stack([group_a, group_b], axis=1)) classes = np.unique(y) df = pd.DataFrame() df["LABEL"] = pd.Series(y) df["GROUP_ID"] = group_lbs df["COUNT"] = 1 for g in self._sensgroups.group_names: for c in classes: df[f"{g}-{c}"] = (df["GROUP_ID"] == g) & (df["LABEL"] == c) df_group_values = df.groupby(["GROUP_ID", "LABEL"])["COUNT"].sum() df_values = df_group_values.groupby(level="LABEL").sum() df_groups = df_group_values.groupby(level="GROUP_ID").sum() df_group_values_prob = df_group_values / df_groups df_values_prob = df_values / df_values.sum() df_group_values_weights = df_values_prob / df_group_values_prob self.sample_weight = np.ones_like(y, dtype=np.float32) for g in self._sensgroups.group_names: for c in classes: mask = df[f"{g}-{c}"] self.sample_weight[mask] = df_group_values_weights.at[g, c] self._update_estimator_param("sample_weight", self.sample_weight) return self
[docs] def transform(self, X: np.ndarray): """passthrough""" return X
[docs] def fit_transform( self, X: np.ndarray, y: np.ndarray, group_a: np.ndarray, group_b: np.ndarray, sample_weight: Optional[np.ndarray] = None, ): """ Fit the Reweighing model to the data. This method calculates the sample weights to ensure that the \ data is fair with respect to the specified sensitive groups before classification. The transform function returns the same object inputed. Parameters ---------- X : matrix-like Input matrix y : array-like Target vector group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) sample_weight : array-like, optional Samples weights vector. Default is None. Returns ------- self """ return self.fit(y, group_a, group_b, sample_weight).transform(X)