Source code for holisticai.bias.mitigation.preprocessing.disparate_impact_remover
import numpy as np
from holisticai.bias.mitigation.commons.disparate_impact_remover._numerical_repairer import (
NumericalRepairer,
)
from holisticai.utils.transformers.bias import BMPreprocessing as BMPre
from holisticai.utils.transformers.bias import SensitiveGroups
[docs]
class DisparateImpactRemover(BMPre):
"""
Disparate impact remover [1]_ edits feature values to increase group fairness
while preserving rank-ordering within groups.
Parameters
----------
repair_level : float, optional
The amount of repair to be applied. It should be between 0.0 and 1.0. Default is 1.
Examples
--------
>>> from holisticai.bias.mitigation import DisparateImpactRemover
>>> mitigator = DisparateImpactRemover()
>>> train_data_transformed = mitigator.fit_transform(train_data, group_a, group_b)
>>> test_data_transformed = mitigator.transform(test_data, group_a, group_b)
References
----------
.. [1] Feldman, Michael, et al. "Certifying and removing disparate impact."
proceedings of the 21th ACM SIGKDD international conference on knowledge
discovery and data mining. 2015.
"""
def __init__(self, repair_level=1.0):
self._assert_parameters(repair_level)
self.repair_level = repair_level
self._sensgroups = SensitiveGroups()
def _assert_parameters(self, repair_level):
"""
Assert parameters
Parameters
----------
repair_level : float
The amount of repair to be applied
Raises
------
ValueError
If repair_level is not between 0.0 and 1.0
"""
if not 0.0 <= repair_level <= 1.0:
raise ValueError("'repair_level' must be between 0.0 and 1.0.")
[docs]
def repair_data(self, X, group_a, group_b):
"""
Repair data to a fair representation
Parameters
----------
X : array-like
Input data
group_a : array-like
mask vector
group_b : array-like
mask vector
Returns
-------
array-like
Repaired input data matrix
"""
# Combine the two group masks into a single matrix
sensitive_features = np.c_[group_a, group_b]
# Convert the sensitive feature matrix to a numeric representation
p_attr = self._sensgroups.fit_transform(sensitive_features, convert_numeric=True).to_numpy()
# Combine the sensitive feature matrix with the input data matrix
data = np.c_[p_attr, X].tolist()
# Apply numerical repair to the first column of the combined matrix
repairer = NumericalRepairer(feature_to_repair=0, repair_level=self.repair_level, kdd=False)
new_data_matrix_np = repairer.repair(data)
# Return only the repaired input data matrix (without the sensitive feature column)
return np.array([np.array(row[1:]) for row in new_data_matrix_np])
[docs]
def fit(self):
"""
Fit the model
Returns
-------
Self
"""
return self