Achieving equalized odds on synthetic data

NOTE: this notebook has extra requirements, install them with:

pip install "error_parity[dev]"

[1]:

import logging
from itertools import product

import numpy as np
import cvxpy as cp
from scipy.spatial import ConvexHull
from sklearn.metrics import roc_curve

[2]:

from error_parity import __version__
print(f"Notebook ran using `error-parity=={__version__}`")

Notebook ran using `error-parity==0.3.11`

Given some data (X, Y, S)

[3]:

def generate_synthetic_data(n_samples: int, n_groups: int, prevalence: float, seed: int):
    """Helper to generate synthetic features/labels/predictions."""

    # Construct numpy rng
    rng = np.random.default_rng(seed)

    # Different levels of gaussian noise per group (to induce some inequality in error rates)
    group_noise = [0.1 + 0.3 * rng.random() / (1+idx) for idx in range(n_groups)]

    # Generate predictions
    assert 0 < prevalence < 1
    y_score = rng.random(size=n_samples)

    # Generate labels
    # - define which samples belong to each group
    # - add different noise levels for each group
    group = rng.integers(low=0, high=n_groups, size=n_samples)

    y_true = np.zeros(n_samples)
    for i in range(n_groups):
        group_filter = group == i
        y_true_groupwise = ((
            y_score[group_filter] +
            rng.normal(size=np.sum(group_filter), scale=group_noise[i])
        ) > (1-prevalence)).astype(int)

        y_true[group_filter] = y_true_groupwise

    ### Generate features: just use the sample index
    # As we already have the y_scores, we can construct the features X
    # as the index of each sample, so we can construct a classifier that
    # simply maps this index to our pre-generated predictions for this clf.
    X = np.arange(len(y_true)).reshape((-1, 1))

    return X, y_true, y_score, group

[4]:

# N_GROUPS = 4
N_GROUPS = 3

# N_SAMPLES = 1_000_000
N_SAMPLES = 100_000

SEED = 23

X, y_true, y_score, group = generate_synthetic_data(
    n_samples=N_SAMPLES,
    n_groups=N_GROUPS,
    prevalence=0.25,
    seed=SEED)

[5]:

actual_prevalence = np.sum(y_true) / len(y_true)
print(f"Actual global prevalence: {actual_prevalence:.1%}")

Actual global prevalence: 26.6%

[6]:

EPSILON_TOLERANCE = 0.05

FALSE_POS_COST = 1
FALSE_NEG_COST = 1

# L_P_NORM = np.inf
L_P_NORM = 2

Given a trained predictor (that outputs real-valued scores)

[7]:

# Example predictor that predicts the synthetically produced scores above
predictor = lambda idx: y_score[idx]

Construct the fair optimal classifier (derived from the given predictor)

Fairness is measured by the equal odds constraint (equal FPR and TPR among groups);
- optionally, this constraint can be relaxed by some small tolerance;
Optimality is measured as minimizing the expected loss,
- parameterized by the given cost of false positive and false negative errors;

[8]:

from error_parity import RelaxedThresholdOptimizer

postproc_clf = RelaxedThresholdOptimizer(
    predictor=predictor,
    constraint="equalized_odds",
    tolerance=EPSILON_TOLERANCE,
    false_pos_cost=FALSE_POS_COST,
    false_neg_cost=FALSE_NEG_COST,
    l_p_norm=L_P_NORM,
    max_roc_ticks=None,
    seed=SEED,
)

[9]:

%%time
import logging
logging.basicConfig(level=logging.INFO, force=True)
postproc_clf.fit(X=X, y=y_true, group=group)

INFO:root:ROC convex hull contains 0.8% of the original points.
INFO:root:ROC convex hull contains 1.1% of the original points.
INFO:root:ROC convex hull contains 1.7% of the original points.
INFO:root:cvxpy solver took 0.000574791s; status is optimal.
INFO:root:Optimal solution value: 0.17475045311134396
INFO:root:Variable Global ROC point: value [0.07436224 0.54775546]
INFO:root:Variable ROC point for group 0: value [0.10639373 0.53750647]
INFO:root:Variable ROC point for group 1: value [0.05919955 0.55402037]
INFO:root:Variable ROC point for group 2: value [0.05891875 0.55319488]

CPU times: user 199 ms, sys: 8.76 ms, total: 208 ms
Wall time: 1.6 s

[9]:

<error_parity.threshold_optimizer.RelaxedThresholdOptimizer at 0x11df428c0>

Plot solution

[10]:

from matplotlib import pyplot as plt
import seaborn as sns
sns.set(style="whitegrid", rc={'grid.linestyle': ':'})

[11]:

from error_parity.plotting import plot_postprocessing_solution

plot_postprocessing_solution(
    postprocessed_clf=postproc_clf,
    plot_roc_curves=True,
    plot_roc_hulls=True,
    dpi=200, figsize=(5, 5),
)
plt.show()

../_images/examples_relaxed-equalized-odds.usage-example-synthetic-data_15_0.png

Plot realized ROC points

realized ROC points will converge to the theoretical solution for larger datasets, but some variance is expected for smaller datasets

[12]:

# Set group-wise colors and global color
palette = sns.color_palette(n_colors=N_GROUPS + 1)
global_color = palette[0]
all_group_colors = palette[1:]

[13]:

from error_parity.roc_utils import compute_roc_point_from_predictions

plot_postprocessing_solution(
    postprocessed_clf=postproc_clf,
    plot_roc_curves=True,
    plot_roc_hulls=True,
    plot_relaxation=True,
    dpi=200, figsize=(5, 5),
)

# Compute predictions
y_pred_binary = postproc_clf(X, group=group)

# Plot the group-wise points found
realized_roc_points = list()
for idx in range(N_GROUPS):

    # Evaluate triangulation of target point as a randomized clf
    group_filter = group == idx

    curr_realized_roc_point = compute_roc_point_from_predictions(y_true[group_filter], y_pred_binary[group_filter])
    realized_roc_points.append(curr_realized_roc_point)

    plt.plot(
        curr_realized_roc_point[0], curr_realized_roc_point[1],
        color=all_group_colors[idx],
        marker="*", markersize=8,
        lw=0,
    )

realized_roc_points = np.vstack(realized_roc_points)

# Plot actual global classifier performance
global_clf_realized_roc_point = compute_roc_point_from_predictions(y_true, y_pred_binary)
plt.plot(
    global_clf_realized_roc_point[0], global_clf_realized_roc_point[1],
    color=global_color,
    marker="*", markersize=8,
    lw=0,
)

plt.xlim(-0.003, 0.3)
plt.ylim(-0.01, 1.0)

plt.show()

../_images/examples_relaxed-equalized-odds.usage-example-synthetic-data_18_0.png

Compute distances between theorized ROC points and empirical ROC points

[14]:

# Distances to group-wise targets:
for i, (target_point, actual_point) in enumerate(zip(postproc_clf.groupwise_roc_points, realized_roc_points)):
    dist = np.linalg.norm(target_point - actual_point, ord=2)
    print(f"Group {i}: l2 distance from target to realized point := {dist:.3%}")

# Distance to global target point:
dist = np.linalg.norm(postproc_clf.global_roc_point - global_clf_realized_roc_point, ord=2)
print(f"Global l2 distance from target to realized point   := {dist:.3%}")

Group 0: l2 distance from target to realized point := 0.000%
Group 1: l2 distance from target to realized point := 0.282%
Group 2: l2 distance from target to realized point := 0.063%
Global l2 distance from target to realized point   := 0.089%

Compute performance differences

assumes FP_cost == FN_cost == 1.0

[15]:

from sklearn.metrics import accuracy_score
from error_parity.roc_utils import calc_cost_of_point

# Empirical
accuracy_val = accuracy_score(y_true, y_pred_binary)

# Theoretical
theoretical_global_cost = calc_cost_of_point(
    fpr=postproc_clf.global_roc_point[0],
    fnr=1 - postproc_clf.global_roc_point[1],
    prevalence=y_true.sum() / len(y_true),
)

print(f"Actual accuracy: \t\t\t{accuracy_val:.3%}")
print(f"Actual error rate (1 - Acc.):\t\t{1 - accuracy_val:.3%}")
print(f"Theoretical cost of solution found:\t{theoretical_global_cost:.3%}")

Actual accuracy:                        82.559%
Actual error rate (1 - Acc.):           17.441%
Theoretical cost of solution found:     17.475%

[16]:

print(f"Accuracy for dummy constant classifier: {max(np.mean(y_true==label) for label in {0, 1}):.1%}")

Accuracy for dummy constant classifier: 73.4%

[17]:

from error_parity.evaluation import eval_accuracy_and_equalized_odds
lp_acc, lp_eq_odds = eval_accuracy_and_equalized_odds(y_true, y_pred_binary, group, l_p_norm=L_P_NORM)

print(f"Realized LP accuracy:\t {lp_acc:.1%}")
print(f"Realized LP eq. odds violation: {lp_eq_odds:.1%}\n")

Realized LP accuracy:    82.6%
Realized LP eq. odds violation: 5.1%

Plot postprocessing Pareto frontier

i.e., all attainable optimal trade-offs for this predictor

[18]:

from error_parity.pareto_curve import compute_postprocessing_curve

postproc_results_df = compute_postprocessing_curve(
    model=predictor,
    fit_data=(X, y_true, group),
    eval_data={
        "fit": (X, y_true, group),
    },
    fairness_constraint="equalized_odds",
    l_p_norm=L_P_NORM,
    bootstrap=True,
    seed=SEED,
    predict_method="__call__",
)

INFO:root:Using `n_jobs=9` to compute adjustment curve.
INFO:root:Computing postprocessing for the following constraint tolerances: [0.   0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.1  0.11 0.12 0.13
 0.14 0.15 0.16 0.17 0.18 0.19 0.2  0.3  0.4  0.5  0.6  0.7  0.8  0.9 ].

[19]:

from error_parity.plotting import plot_postprocessing_frontier
from matplotlib import pyplot as plt

plot_postprocessing_frontier(
    postproc_results_df,
    perf_metric="accuracy",
    disp_metric=f"equalized_odds_diff_l{postproc_clf.l_p_norm}",
    show_data_type="fit",
    constant_clf_perf=max((y_true == const_pred).mean() for const_pred in {0, 1}),
    model_name=r"$\bigstar$",
)

plt.xlabel(r"accuracy $\rightarrow$")
l_p_norm_str = str(postproc_clf.l_p_norm) if postproc_clf.l_p_norm != np.inf else r"\infty"
plt.ylabel(r"equalized odds $\ell_" + l_p_norm_str + r"$ violation $\leftarrow$")
plt.show()

../_images/examples_relaxed-equalized-odds.usage-example-synthetic-data_27_0.png