Data Loading and Packages Installation#

First, install the holisticai package if you haven’t already:

!pip install holisticai[all]

Then, import the necessary libraries.

[1]:
import warnings

import pandas as pd
from holisticai.bias.metrics import classification_bias_metrics
from holisticai.datasets import load_dataset
from holisticai.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

warnings.filterwarnings("ignore")

Dataset loading#

[2]:
dataset = load_dataset('adult', protected_attribute='sex')
train_test = dataset.train_test_split(test_size=0.2, random_state=42)

train = train_test['train']
test = train_test['test']

dataset
[2]:
[Dataset]
Instances: 45222
Features: X , y , p_attrs , group_a , group_b
Metadata: sex: {'group_a': 'Male', 'group_b': 'Female'}

1 . Calibrated Equalized Odds#

Traditional Implementation#

[4]:
# Define postprocessing model
from holisticai.bias.mitigation import CalibratedEqualizedOdds

mitigator = CalibratedEqualizedOdds(cost_constraint="fnr")
mitigator
[4]:
[CalibratedEqualizedOdds]
CalibratedEqualizedOdds(cost_constraint=fnr, alpha=None)

Type: Bias Mitigation Postprocessing
[5]:

model = LogisticRegression() # Standardize data and fit model scaler = StandardScaler() X_train = scaler.fit_transform(train['X']) model.fit(X_train, train['y']) # Fit postprocessing model y_pred = model.predict_proba(X_train) mitigator.fit(train['y'], y_pred, group_a=train['group_a'], group_b=train['group_b']) # Predict and mitigate bias X_test = scaler.transform(test['X']) y_pred = model.predict_proba(X_test) y_pred = mitigator.transform(test['y'], y_pred, test['group_a'], test['group_b'])["y_pred"] # Evaluate bias metrics metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both') metrics
[5]:
Value Reference
Metric
Statistical Parity 0.106757 0
Disparate Impact 2.374672 1
Four Fifths Rule 0.421111 1
Cohen D 0.302012 0
2SD Rule 13.301377 0
Equality of Opportunity Difference -0.078878 0
False Positive Rate Difference 0.050882 0
Average Odds Difference -0.013998 0
Accuracy Difference -0.147467 0

2. Equalized Odds#

Traditional Implementation#

[6]:
# Define postprocessing model
from holisticai.bias.mitigation import EqualizedOdds

mitigator = EqualizedOdds(solver='highs', seed=42)
mitigator
[6]:
[EqualizedOdds]
EqualizedOdds(solver=highs, seed=42)

Type: Bias Mitigation Postprocessing
[7]:

# Standardize data and fit model scaler = StandardScaler() X_train = scaler.fit_transform(train['X']) model.fit(X_train, train['y']) y_pred = model.predict(X_train) # Fit postprocessing model mitigator.fit(train['y'], y_pred, group_a=train['group_a'], group_b=train['group_b']) # Predict and mitigate bias X_test = scaler.transform(test['X']) y_pred = model.predict(X_test) y_pred = mitigator.transform(y_pred, test['group_a'], test['group_b'])["y_pred"] # Evaluate bias metrics metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both') metrics
[7]:
Value Reference
Metric
Statistical Parity 0.089406 0
Disparate Impact 1.622222 1
Four Fifths Rule 0.616439 1
Cohen D 0.222988 0
2SD Rule 9.864874 0
Equality of Opportunity Difference 0.004214 0
False Positive Rate Difference -0.003179 0
Average Odds Difference 0.000518 0
Accuracy Difference -0.063250 0

3. LP Debiaser#

Traditional Implementation#

[3]:
# Define postprocessing model
from holisticai.bias.mitigation import LPDebiaserBinary

mitigator = LPDebiaserBinary()
mitigator
[3]:
[LPDebiaserBinary]
LPDebiaserBinary(constraint=EqualizedOdds)

Type: Bias Mitigation Postprocessing
[4]:
model = LogisticRegression()

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
model.fit(X_train, train['y'])
y_proba = model.predict_proba(X_train)

# Fit postprocessing model
mitigator.fit(y=train['y'], y_proba=y_proba, group_a=train['group_a'], group_b=train['group_b'])

# Predict and mitigate bias
X_test = scaler.transform(test['X'])
y_pred = model.predict_proba(X_test)
y_pred = mitigator.transform(y_proba=y_pred, group_a=test['group_a'], group_b=test['group_b'])["y_pred"]

# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics
[4]:
Value Reference
Metric
Statistical Parity 0.110736 0
Disparate Impact 1.359246 1
Four Fifths Rule 0.735702 1
Cohen D 0.229077 0
2SD Rule 10.131199 0
Equality of Opportunity Difference -0.006369 0
False Positive Rate Difference 0.000221 0
Average Odds Difference -0.003074 0
Accuracy Difference 0.011336 0

4. ML Debiaser#

Traditional Implementation#

[10]:
# Define postprocessing model
from holisticai.bias.mitigation import MLDebiaser

mitigator = MLDebiaser()
mitigator
[10]:
[MLDebiaser]
MLDebiaser(gamma=1.0, eps=0, eta=0.5, sgd_steps=10000, ...)

Type: Bias Mitigation Postprocessing
[11]:
model = LogisticRegression()

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
model.fit(X_train, train['y'])
y_pred = model.predict_proba(X_train)

# Fit postprocessing model
mitigator.fit(y_pred, group_a=train['group_a'], group_b=train['group_b'])

# Predict and mitigate bias
X_test = scaler.transform(test['X'])
y_pred = model.predict_proba(X_test)
y_pred = mitigator.transform(y_pred, test['group_a'], test['group_b'])["y_pred"]

# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics
[elapsed time: 00:00:00 | iter:2/2]
[11]:
Value Reference
Metric
Statistical Parity 0.078856 0
Disparate Impact 1.503264 1
Four Fifths Rule 0.665219 1
Cohen D 0.194378 0
2SD Rule 8.610334 0
Equality of Opportunity Difference -0.160993 0
False Positive Rate Difference 0.002258 0
Average Odds Difference -0.079367 0
Accuracy Difference -0.084606 0

5. Reject Option#

Traditional Implementation#

[12]:
# Define postprocessing model
from holisticai.bias.mitigation import RejectOptionClassification

mitigator = RejectOptionClassification(metric_name="Statistical parity difference")
mitigator
[12]:
[RejectOptionClassification]
RejectOptionClassification(low_class_thresh=0.01, high_class_thresh=0.99, num_class_thresh=100, num_ROC_margin=50, ...)

Type: Bias Mitigation Postprocessing
[13]:

model = LogisticRegression() # Standardize data and fit model scaler = StandardScaler() X_train = scaler.fit_transform(train['X']) model.fit(X_train, train['y']) y_pred = model.predict_proba(X_train) # Fit postprocessing model mitigator.fit(train['y'], y_pred, group_a=train['group_a'], group_b=train['group_b']) # Predict and mitigate bias X_test = scaler.transform(test['X']) y_pred = model.predict(X_test) y_proba = model.predict_proba(X_test) y_pred = mitigator.transform(y_pred, y_proba, test['group_a'], test['group_b'])["y_pred"] # Evaluate bias metrics metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both') metrics
[13]:
Value Reference
Metric
Statistical Parity 0.038357 0
Disparate Impact 2.475245 1
Four Fifths Rule 0.404000 1
Cohen D 0.173384 0
2SD Rule 7.686846 0
Equality of Opportunity Difference -0.026171 0
False Positive Rate Difference 0.006603 0
Average Odds Difference -0.009784 0
Accuracy Difference -0.161902 0