Source code for holisticai.bias.mitigation.inprocessing.grid_search.transformer

from __future__ import annotations

from typing import Literal

import numpy as np
from holisticai.bias.mitigation.inprocessing.commons.classification import _constraints as cc
from holisticai.bias.mitigation.inprocessing.commons.regression import _constraints as rc
from holisticai.bias.mitigation.inprocessing.commons.regression import _losses as rl
from holisticai.bias.mitigation.inprocessing.grid_search.algorithm import GridSearchAlgorithm
from holisticai.utils.transformers.bias import BMInprocessing as BMImp
from sklearn.base import BaseEstimator, clone


[docs] class GridSearchReduction(BMImp, BaseEstimator): """Grid Search Reduction technique can be used for fair classification or fair regression. (1) For classification it reduces fair classification to a sequence of cost-sensitive classification problems,\ returning the deterministic classifier with the lowest empirical error subject to fair classification constraints among the\ candidates searched [1]_. (2) For regression it uses the same priniciple to return a deterministic regressor with the lowest empirical error subject to the\ constraint of bounded group loss [2]_. Parameters ---------- constraints : string The disparity constraints expressed as string: - "DemographicParity", - "EqualizedOdds", - "TruePositiveRateParity", - "FalsePositiveRateParity", - "ErrorRateParity" - "BoundedGroupLoss" constraint_weight : float Specifies the relative weight put on the constraint violation when selecting the\ best model. The weight placed on the error rate will be :code:`1-constraint_weight` loss : str String identifying loss function for constraints. Options include "ZeroOne", "Square", and "Absolute." min_val : float Loss function parameter for "Square" and "Absolute," typically the minimum of the range of y values. max_val: float Loss function parameter for "Square" and "Absolute," typically the maximum of the range of y values. grid_size : int The number of Lagrange multipliers to generate in the grid grid_limit : float The largest Lagrange multiplier to generate. The grid will contain\ values distributed between :code:`-grid_limit` and :code:`grid_limit`\ by default verbose : int If >0, will show progress percentage. Examples -------- >>> from holisticai.bias.mitigation import GridSearchReduction >>> mitigator = GridSearchReduction(**params) >>> mitigator.fit(train_data, y, group_a, group_b) >>> test_data_transformed = mitigator.predict(test_data) References ---------- .. [1] Agarwal, Alekh, et al. "A reductions approach to fair classification."\ International Conference on Machine Learning. PMLR, 2018. .. [2] Agarwal, Alekh, Miroslav Dudík, and Zhiwei Steven Wu.\ "Fair regression: Quantitative definitions and reduction-based algorithms."\ International Conference on Machine Learning. PMLR, 2019. """ CONSTRAINTS = Literal[ "DemographicParity", "EqualizedOdds", "TruePositiveRateParity", "FalsePositiveRateParity", "ErrorRateParity", "BoundedGroupLoss", ] def __init__( self, constraints: str = "EqualizedOdds", constraint_weight: float = 0.5, loss: str = "ZeroOne", min_val: float = 0.5, max_val: float = 0.5, grid_size: int = 10, grid_limit: float = 2, verbose: int = 0, ): self.constraints = constraints self.constraint_weight = constraint_weight self.loss = loss self.min_val = min_val self.max_val = max_val self.grid_size = grid_size self.grid_limit = grid_limit self.verbose = verbose def transform_estimator(self, estimator): self.estimator = estimator return self
[docs] def fit( self, X: np.ndarray, y: np.ndarray, group_a: np.ndarray, group_b: np.ndarray, ): """ Fit model using Grid Search Reduction. Parameters ---------- X : matrix-like Input matrix y : array-like Target vector group_a : array-like Group membership vector (binary) group_b : array-like Group membership vector (binary) Returns ------- Self """ params = self._load_data(X=X, y=y, group_a=group_a, group_b=group_b) group_a = params["group_a"] group_b = params["group_b"] X = params["X"] y = params["y"] sensitive_features = np.stack([group_a, group_b], axis=1) self.estimator_ = clone(self.estimator) constraints_catalog = { "DemographicParity": cc.DemographicParity, "EqualizedOdds": cc.EqualizedOdds, "TruePositiveRateParity": cc.TruePositiveRateParity, "FalsePositiveRateParity": cc.FalsePositiveRateParity, "ErrorRateParity": cc.ErrorRateParity, "BoundedGroupLoss": rc.BoundedGroupLoss, } constraint_kargs = self._constraint_parameters() self.constraint_ = constraints_catalog[self.constraints](**constraint_kargs) self.model_ = GridSearchAlgorithm( estimator=self.estimator_, constraint=self.constraint_, constraint_weight=self.constraint_weight, grid_size=self.grid_size, grid_limit=self.grid_limit, verbose=self.verbose, ) self.model_.fit(X, y, sensitive_features=sensitive_features) return self
def _constraint_parameters(self): kargs = {} if self.constraints == "BoundedGroupLoss": losses = { "ZeroOne": rl.ZeroOneLoss, "Square": rl.SquareLoss, "Absolute": rl.AbsoluteLoss, } if self.loss == "ZeroOne": self.loss_ = losses[self.loss]() else: self.loss_ = losses[self.loss](self.min_val, self.max_val) kargs.update({"loss": self.loss_}) return kargs
[docs] def predict(self, X): """ Prediction Description ---------- Predict output for the given samples. Parameters ---------- X : matrix-like Input Matrix Returns ------- numpy.ndarray Predicted output """ return self.model_.predict(X)
[docs] def predict_proba(self, X): """ Probability Prediction Description ---------- Probability estimate for the given samples. Parameters ---------- X : matrix-like Input Matrix Returns ------- numpy.ndarray probability output """ return self.model_.predict_proba(X)