Ethical AI & Bias Detection: Building Fair and Responsible AI Systems
As AI systems increasingly impact critical decisionsโhiring, lending, criminal justiceโensuring fairness and detecting bias becomes essential. This guide covers practical techniques for building ethical AI systems.
Understanding Bias in AI
Types of Bias
"""
Common types of bias in AI systems:
1. Data Bias: Training data doesn't represent population
2. Algorithmic Bias: Model architecture favors certain groups
3. Measurement Bias: Metrics don't capture true outcomes
4. Aggregation Bias: Treating diverse groups as homogeneous
5. Evaluation Bias: Test set doesn't reflect real-world distribution
"""
# Example: Detecting data bias
import pandas as pd
import numpy as np
def analyze_data_distribution(df, protected_attribute, target):
"""Analyze data distribution across protected attributes."""
distribution = df.groupby(protected_attribute)[target].agg([
'count', 'mean', 'std'
])
print(f"Distribution of {target} by {protected_attribute}:")
print(distribution)
# Calculate representation ratio
total = len(df)
for group in df[protected_attribute].unique():
count = len(df[df[protected_attribute] == group])
percentage = (count / total) * 100
print(f"{group}: {percentage:.1f}%")
return distribution
# Usage
df = pd.DataFrame({
'gender': ['M', 'F', 'M', 'F', 'M', 'F'] * 100,
'hired': [1, 0, 1, 0, 1, 1] * 100
})
analyze_data_distribution(df, 'gender', 'hired')
Fairness Metrics
Demographic Parity
def demographic_parity(y_true, y_pred, protected_attr):
"""
Check if positive prediction rate is equal across groups.
Demographic parity: P(Y_pred=1|A=0) = P(Y_pred=1|A=1)
"""
groups = np.unique(protected_attr)
positive_rates = {}
for group in groups:
mask = protected_attr == group
positive_rate = np.mean(y_pred[mask] == 1)
positive_rates[group] = positive_rate
print("Positive prediction rates by group:")
for group, rate in positive_rates.items():
print(f"Group {group}: {rate:.4f}")
# Calculate disparity ratio
rates = list(positive_rates.values())
disparity = max(rates) / min(rates)
print(f"Disparity ratio: {disparity:.4f}")
return positive_rates
# Usage
y_pred = np.array([1, 0, 1, 1, 0, 1] * 100)
protected = np.array([0, 1, 0, 1, 0, 1] * 100)
demographic_parity(y_pred, y_pred, protected)
Equalized Odds
from sklearn.metrics import confusion_matrix
def equalized_odds(y_true, y_pred, protected_attr):
"""
Check if true positive rate and false positive rate are equal across groups.
Equalized odds: TPR(A=0) = TPR(A=1) and FPR(A=0) = FPR(A=1)
"""
groups = np.unique(protected_attr)
metrics = {}
for group in groups:
mask = protected_attr == group
y_true_group = y_true[mask]
y_pred_group = y_pred[mask]
tn, fp, fn, tp = confusion_matrix(y_true_group, y_pred_group).ravel()
tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
metrics[group] = {'TPR': tpr, 'FPR': fpr}
print("Equalized Odds Metrics:")
for group, values in metrics.items():
print(f"Group {group}: TPR={values['TPR']:.4f}, FPR={values['FPR']:.4f}")
return metrics
# Usage
y_true = np.array([1, 0, 1, 1, 0, 1] * 100)
y_pred = np.array([1, 0, 1, 0, 0, 1] * 100)
protected = np.array([0, 1, 0, 1, 0, 1] * 100)
equalized_odds(y_true, y_pred, protected)
Calibration
def calibration_analysis(y_true, y_pred_proba, protected_attr, n_bins=10):
"""
Check if predicted probabilities match actual outcomes across groups.
"""
groups = np.unique(protected_attr)
for group in groups:
mask = protected_attr == group
y_true_group = y_true[mask]
y_pred_group = y_pred_proba[mask]
# Bin predictions
bins = np.linspace(0, 1, n_bins + 1)
bin_indices = np.digitize(y_pred_group, bins)
print(f"\nCalibration for Group {group}:")
print("Predicted Prob | Actual Positive Rate")
for bin_idx in range(1, n_bins + 1):
mask_bin = bin_indices == bin_idx
if mask_bin.sum() > 0:
pred_prob = bins[bin_idx - 1]
actual_rate = y_true_group[mask_bin].mean()
print(f"{pred_prob:.2f}-{bins[bin_idx]:.2f} | {actual_rate:.4f}")
# Usage
y_true = np.array([1, 0, 1, 1, 0, 1] * 100)
y_pred_proba = np.random.rand(600)
protected = np.array([0, 1, 0, 1, 0, 1] * 100)
calibration_analysis(y_true, y_pred_proba, protected)
Bias Detection Tools
Using Fairlearn
from fairlearn.metrics import MetricFrame
from fairlearn.metrics import selection_rate, false_positive_rate
from sklearn.metrics import accuracy_score
def analyze_fairness_with_fairlearn(y_true, y_pred, protected_attr):
"""Comprehensive fairness analysis using Fairlearn."""
# Create metric frame
metrics = {
'accuracy': accuracy_score,
'selection_rate': selection_rate,
'false_positive_rate': false_positive_rate
}
metric_frame = MetricFrame(
metrics=metrics,
y_true=y_true,
y_pred=y_pred,
sensitive_features=protected_attr
)
print("Overall metrics:")
print(metric_frame.overall)
print("\nMetrics by group:")
print(metric_frame.by_group)
print("\nDifference (max - min):")
print(metric_frame.difference())
print("\nRatio (min / max):")
print(metric_frame.ratio())
return metric_frame
# Usage
y_true = np.array([1, 0, 1, 1, 0, 1] * 100)
y_pred = np.array([1, 0, 1, 0, 0, 1] * 100)
protected = np.array([0, 1, 0, 1, 0, 1] * 100)
metric_frame = analyze_fairness_with_fairlearn(y_true, y_pred, protected)
Using AI Fairness 360
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import BinaryLabelDatasetMetric
def analyze_with_aif360(df, protected_attr, target):
"""Analyze fairness using IBM's AI Fairness 360."""
# Create dataset
dataset = BinaryLabelDataset(
df=df,
label_names=[target],
protected_attribute_names=[protected_attr]
)
# Calculate metrics
metric = BinaryLabelDatasetMetric(dataset)
print(f"Disparate Impact Ratio: {metric.disparate_impact():.4f}")
print(f"Statistical Parity Difference: {metric.statistical_parity_difference():.4f}")
print(f"Equal Opportunity Difference: {metric.equal_opportunity_difference():.4f}")
return metric
# Usage
df = pd.DataFrame({
'gender': [0, 1, 0, 1, 0, 1] * 100,
'hired': [1, 0, 1, 0, 1, 1] * 100
})
metric = analyze_with_aif360(df, 'gender', 'hired')
Bias Mitigation Techniques
Pre-processing: Resampling
from sklearn.utils import resample
def balance_dataset(X, y, protected_attr):
"""Balance dataset to reduce representation bias."""
df = pd.DataFrame(X)
df['y'] = y
df['protected'] = protected_attr
# Separate by protected attribute
groups = df['protected'].unique()
balanced_dfs = []
# Find minimum group size
min_size = min(len(df[df['protected'] == g]) for g in groups)
for group in groups:
group_df = df[df['protected'] == group]
# Resample to minimum size
resampled = resample(
group_df,
n_samples=min_size,
random_state=42
)
balanced_dfs.append(resampled)
balanced_df = pd.concat(balanced_dfs)
print(f"Original distribution:")
print(df['protected'].value_counts())
print(f"\nBalanced distribution:")
print(balanced_df['protected'].value_counts())
return balanced_df
# Usage
X = np.random.rand(1000, 10)
y = np.random.randint(0, 2, 1000)
protected = np.array([0, 1, 0, 1, 0, 1] * 167)[:1000]
balanced_df = balance_dataset(X, y, protected)
In-processing: Fairness Constraints
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
def train_fair_model(X, y, protected_attr, fairness_weight=0.5):
"""
Train model with fairness constraints.
This is a simplified example; use Fairlearn for production.
"""
# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Train base model
model = LogisticRegression(random_state=42)
model.fit(X_scaled, y)
# Evaluate fairness
y_pred = model.predict(X_scaled)
# Calculate fairness penalty
groups = np.unique(protected_attr)
group_accuracies = []
for group in groups:
mask = protected_attr == group
acc = (y_pred[mask] == y[mask]).mean()
group_accuracies.append(acc)
fairness_gap = max(group_accuracies) - min(group_accuracies)
print(f"Fairness gap: {fairness_gap:.4f}")
return model, scaler
# Usage
X = np.random.rand(1000, 10)
y = np.random.randint(0, 2, 1000)
protected = np.array([0, 1, 0, 1, 0, 1] * 167)[:1000]
model, scaler = train_fair_model(X, y, protected)
Post-processing: Threshold Optimization
def optimize_thresholds_for_fairness(y_true, y_pred_proba, protected_attr):
"""
Optimize prediction thresholds per group for fairness.
"""
groups = np.unique(protected_attr)
thresholds = {}
for group in groups:
mask = protected_attr == group
y_true_group = y_true[mask]
y_pred_group = y_pred_proba[mask]
# Find threshold that maximizes fairness
best_threshold = 0.5
best_gap = float('inf')
for threshold in np.arange(0.1, 0.9, 0.05):
y_pred_binary = (y_pred_group >= threshold).astype(int)
accuracy = (y_pred_binary == y_true_group).mean()
# Calculate fairness metric
tpr = np.sum((y_pred_binary == 1) & (y_true_group == 1)) / np.sum(y_true_group == 1)
fpr = np.sum((y_pred_binary == 1) & (y_true_group == 0)) / np.sum(y_true_group == 0)
gap = abs(tpr - fpr)
if gap < best_gap:
best_gap = gap
best_threshold = threshold
thresholds[group] = best_threshold
print("Optimized thresholds by group:")
for group, threshold in thresholds.items():
print(f"Group {group}: {threshold:.2f}")
return thresholds
# Usage
y_true = np.array([1, 0, 1, 1, 0, 1] * 100)
y_pred_proba = np.random.rand(600)
protected = np.array([0, 1, 0, 1, 0, 1] * 100)
thresholds = optimize_thresholds_for_fairness(y_true, y_pred_proba, protected)
Model Interpretability
Feature Importance Analysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
def analyze_feature_importance(X, y, feature_names):
"""Analyze which features drive model predictions."""
model = RandomForestClassifier(random_state=42)
model.fit(X, y)
# Built-in feature importance
importances = model.feature_importances_
# Permutation importance
perm_importance = permutation_importance(model, X, y, random_state=42)
print("Feature Importance:")
for name, importance in zip(feature_names, importances):
print(f"{name}: {importance:.4f}")
print("\nPermutation Importance:")
for name, importance in zip(feature_names, perm_importance.importances_mean):
print(f"{name}: {importance:.4f}")
return importances
# Usage
X = np.random.rand(1000, 5)
y = np.random.randint(0, 2, 1000)
feature_names = ['age', 'income', 'credit_score', 'employment', 'education']
importances = analyze_feature_importance(X, y, feature_names)
SHAP Values for Explainability
import shap
def explain_predictions_with_shap(model, X, feature_names):
"""Use SHAP for model-agnostic explanations."""
# Create explainer
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
# Summary plot
print("SHAP Summary (feature importance):")
print(f"Mean absolute SHAP values:")
if isinstance(shap_values, list):
shap_values = shap_values[1] # For binary classification
mean_abs_shap = np.abs(shap_values).mean(axis=0)
for name, value in zip(feature_names, mean_abs_shap):
print(f"{name}: {value:.4f}")
return shap_values
# Usage
from sklearn.ensemble import RandomForestClassifier
X = np.random.rand(100, 5)
y = np.random.randint(0, 2, 100)
model = RandomForestClassifier(random_state=42)
model.fit(X, y)
feature_names = ['age', 'income', 'credit_score', 'employment', 'education']
shap_values = explain_predictions_with_shap(model, X, feature_names)
Common Pitfalls and Best Practices
โ Bad: Ignoring Bias
# DON'T: Deploy model without fairness analysis
model.fit(X_train, y_train)
model.predict(X_test)
โ Good: Comprehensive Fairness Analysis
# DO: Analyze fairness before deployment
metric_frame = analyze_fairness_with_fairlearn(y_true, y_pred, protected_attr)
if metric_frame.ratio().min() < 0.8:
print("Warning: Fairness issues detected")
โ Bad: Single Fairness Metric
# DON'T: Rely on one metric
if demographic_parity_ratio > 0.8:
print("Model is fair")
โ Good: Multiple Fairness Metrics
# DO: Use multiple metrics
metrics = {
'demographic_parity': demographic_parity(y_pred, protected),
'equalized_odds': equalized_odds(y_true, y_pred, protected),
'calibration': calibration_analysis(y_true, y_pred_proba, protected)
}
Production Checklist
def fairness_audit_checklist(model, X, y, protected_attr, feature_names):
"""Complete fairness audit before deployment."""
y_pred = model.predict(X)
y_pred_proba = model.predict_proba(X)[:, 1]
checks = {
'data_representation': analyze_data_distribution(pd.DataFrame(X), protected_attr, 'target'),
'demographic_parity': demographic_parity(y_pred, protected_attr),
'equalized_odds': equalized_odds(y, y_pred, protected_attr),
'calibration': calibration_analysis(y, y_pred_proba, protected_attr),
'feature_importance': analyze_feature_importance(X, y, feature_names)
}
print("โ Fairness audit complete")
return checks
Summary
Building ethical AI systems requires:
- Understanding bias types and their sources
- Measuring fairness with appropriate metrics
- Detecting bias using tools like Fairlearn and AI Fairness 360
- Mitigating bias through pre-, in-, and post-processing
- Explaining predictions with SHAP and feature importance
- Continuous monitoring in production
- Stakeholder engagement in fairness decisions
Ethical AI isn’t a one-time checkโit’s an ongoing commitment to building fair, transparent, and responsible systems.
Comments