Data Loading and Packages Installation#
First, install the holisticai package if you haven’t already:
!pip install holisticai[all]
Then, import the necessary libraries.
[1]:
import warnings
import pandas as pd
from holisticai.bias.metrics import classification_bias_metrics
from holisticai.datasets import load_dataset
from holisticai.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
warnings.filterwarnings("ignore")
Dataset loading#
[2]:
dataset = load_dataset('adult', protected_attribute='sex')
train_test = dataset.train_test_split(test_size=0.2, random_state=42)
train = train_test['train']
test = train_test['test']
dataset
[2]:
[Dataset]
Instances: 45222
Features: X , y , p_attrs , group_a , group_b
Metadata: sex: {'group_a': 'Male', 'group_b': 'Female'}
1 . Calibrated Equalized Odds#
Traditional Implementation#
[4]:
# Define postprocessing model
from holisticai.bias.mitigation import CalibratedEqualizedOdds
mitigator = CalibratedEqualizedOdds(cost_constraint="fnr")
mitigator
[4]:
[CalibratedEqualizedOdds]
Type: Bias Mitigation Postprocessing
[5]:
model = LogisticRegression()
# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
model.fit(X_train, train['y'])
# Fit postprocessing model
y_pred = model.predict_proba(X_train)
mitigator.fit(train['y'], y_pred, group_a=train['group_a'], group_b=train['group_b'])
# Predict and mitigate bias
X_test = scaler.transform(test['X'])
y_pred = model.predict_proba(X_test)
y_pred = mitigator.transform(test['y'], y_pred, test['group_a'], test['group_b'])["y_pred"]
# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics
[5]:
| Value | Reference | |
|---|---|---|
| Metric | ||
| Statistical Parity | 0.106757 | 0 |
| Disparate Impact | 2.374672 | 1 |
| Four Fifths Rule | 0.421111 | 1 |
| Cohen D | 0.302012 | 0 |
| 2SD Rule | 13.301377 | 0 |
| Equality of Opportunity Difference | -0.078878 | 0 |
| False Positive Rate Difference | 0.050882 | 0 |
| Average Odds Difference | -0.013998 | 0 |
| Accuracy Difference | -0.147467 | 0 |
2. Equalized Odds#
Traditional Implementation#
[6]:
# Define postprocessing model
from holisticai.bias.mitigation import EqualizedOdds
mitigator = EqualizedOdds(solver='highs', seed=42)
mitigator
[6]:
[EqualizedOdds]
Type: Bias Mitigation Postprocessing
[7]:
# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
model.fit(X_train, train['y'])
y_pred = model.predict(X_train)
# Fit postprocessing model
mitigator.fit(train['y'], y_pred, group_a=train['group_a'], group_b=train['group_b'])
# Predict and mitigate bias
X_test = scaler.transform(test['X'])
y_pred = model.predict(X_test)
y_pred = mitigator.transform(y_pred, test['group_a'], test['group_b'])["y_pred"]
# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics
[7]:
| Value | Reference | |
|---|---|---|
| Metric | ||
| Statistical Parity | 0.089406 | 0 |
| Disparate Impact | 1.622222 | 1 |
| Four Fifths Rule | 0.616439 | 1 |
| Cohen D | 0.222988 | 0 |
| 2SD Rule | 9.864874 | 0 |
| Equality of Opportunity Difference | 0.004214 | 0 |
| False Positive Rate Difference | -0.003179 | 0 |
| Average Odds Difference | 0.000518 | 0 |
| Accuracy Difference | -0.063250 | 0 |
3. LP Debiaser#
Traditional Implementation#
[3]:
# Define postprocessing model
from holisticai.bias.mitigation import LPDebiaserBinary
mitigator = LPDebiaserBinary()
mitigator
[3]:
[LPDebiaserBinary]
Type: Bias Mitigation Postprocessing
[4]:
model = LogisticRegression()
# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
model.fit(X_train, train['y'])
y_proba = model.predict_proba(X_train)
# Fit postprocessing model
mitigator.fit(y=train['y'], y_proba=y_proba, group_a=train['group_a'], group_b=train['group_b'])
# Predict and mitigate bias
X_test = scaler.transform(test['X'])
y_pred = model.predict_proba(X_test)
y_pred = mitigator.transform(y_proba=y_pred, group_a=test['group_a'], group_b=test['group_b'])["y_pred"]
# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics
[4]:
| Value | Reference | |
|---|---|---|
| Metric | ||
| Statistical Parity | 0.110736 | 0 |
| Disparate Impact | 1.359246 | 1 |
| Four Fifths Rule | 0.735702 | 1 |
| Cohen D | 0.229077 | 0 |
| 2SD Rule | 10.131199 | 0 |
| Equality of Opportunity Difference | -0.006369 | 0 |
| False Positive Rate Difference | 0.000221 | 0 |
| Average Odds Difference | -0.003074 | 0 |
| Accuracy Difference | 0.011336 | 0 |
4. ML Debiaser#
Traditional Implementation#
[10]:
# Define postprocessing model
from holisticai.bias.mitigation import MLDebiaser
mitigator = MLDebiaser()
mitigator
[10]:
[MLDebiaser]
Type: Bias Mitigation Postprocessing
[11]:
model = LogisticRegression()
# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
model.fit(X_train, train['y'])
y_pred = model.predict_proba(X_train)
# Fit postprocessing model
mitigator.fit(y_pred, group_a=train['group_a'], group_b=train['group_b'])
# Predict and mitigate bias
X_test = scaler.transform(test['X'])
y_pred = model.predict_proba(X_test)
y_pred = mitigator.transform(y_pred, test['group_a'], test['group_b'])["y_pred"]
# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics
[elapsed time: 00:00:00 | iter:2/2]
[11]:
| Value | Reference | |
|---|---|---|
| Metric | ||
| Statistical Parity | 0.078856 | 0 |
| Disparate Impact | 1.503264 | 1 |
| Four Fifths Rule | 0.665219 | 1 |
| Cohen D | 0.194378 | 0 |
| 2SD Rule | 8.610334 | 0 |
| Equality of Opportunity Difference | -0.160993 | 0 |
| False Positive Rate Difference | 0.002258 | 0 |
| Average Odds Difference | -0.079367 | 0 |
| Accuracy Difference | -0.084606 | 0 |
5. Reject Option#
Traditional Implementation#
[12]:
# Define postprocessing model
from holisticai.bias.mitigation import RejectOptionClassification
mitigator = RejectOptionClassification(metric_name="Statistical parity difference")
mitigator
[12]:
[RejectOptionClassification]
Type: Bias Mitigation Postprocessing
[13]:
model = LogisticRegression()
# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
model.fit(X_train, train['y'])
y_pred = model.predict_proba(X_train)
# Fit postprocessing model
mitigator.fit(train['y'], y_pred, group_a=train['group_a'], group_b=train['group_b'])
# Predict and mitigate bias
X_test = scaler.transform(test['X'])
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)
y_pred = mitigator.transform(y_pred, y_proba, test['group_a'], test['group_b'])["y_pred"]
# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics
[13]:
| Value | Reference | |
|---|---|---|
| Metric | ||
| Statistical Parity | 0.038357 | 0 |
| Disparate Impact | 2.475245 | 1 |
| Four Fifths Rule | 0.404000 | 1 |
| Cohen D | 0.173384 | 0 |
| 2SD Rule | 7.686846 | 0 |
| Equality of Opportunity Difference | -0.026171 | 0 |
| False Positive Rate Difference | 0.006603 | 0 |
| Average Odds Difference | -0.009784 | 0 |
| Accuracy Difference | -0.161902 | 0 |