Skip to content

Jackknife+-after-Bootstrap (JaB+) Conformal Detection

This example demonstrates how to use bootstrap resampling for conformal anomaly detection using the JaB+ strategy.

Setup

import numpy as np
from pyod.models.lof import LOF
from sklearn.datasets import load_breast_cancer
from nonconform import ConformalDetector, JackknifeBootstrap
from nonconform.metrics import false_discovery_rate, statistical_power

# Load example data
data = load_breast_cancer()
X = data.data
y = data.target

Basic Usage

# Initialize base detector
base_detector = LOF()

# Create JaB+ strategy
jab_strategy = JackknifeBootstrap(n_bootstraps=50)

# Initialize detector with JaB+ strategy
detector = ConformalDetector(
    detector=base_detector,
    strategy=jab_strategy,
    aggregation="median",
    seed=42
)

# Fit and predict
detector.fit(X)
discoveries = detector.select(X, alpha=0.05)
print(f"Discoveries with FDR control: {discoveries.sum()}")

Plus Mode for JaB+

# Use plus mode to keep all bootstrap models for aggregation
jab_plus_strategy = JackknifeBootstrap(
    n_bootstraps=100,
    aggregation_method="median",
    mode="plus"
)

detector_plus = ConformalDetector(
    detector=base_detector,
    strategy=jab_plus_strategy,
    aggregation="median",
    seed=42
)

# Fit and predict with ensemble
detector_plus.fit(X)

# Compare with FDR control using select()
jab_disc = detector.select(X, alpha=0.05)
jab_plus_disc = detector_plus.select(X, alpha=0.05)
print(f"JaB+ discoveries: {jab_disc.sum()}")
print(f"JaB+ (plus) discoveries: {jab_plus_disc.sum()}")

Comparing Different Bootstrap Configurations

# Try different bootstrap configurations
bootstrap_counts = [50, 100, 200]

results = {}
for n_bootstraps in bootstrap_counts:
    strategy = JackknifeBootstrap(n_bootstraps=n_bootstraps)
    detector = ConformalDetector(
        detector=base_detector,
        strategy=strategy,
        aggregation="median",
        seed=42,
    )
    detector.fit(X)
    disc = detector.select(X, alpha=0.05)

    key = f"B={n_bootstraps}"
    results[key] = disc.sum()
    print(f"{key}: {results[key]} discoveries")

Evaluation Metrics

# With ground truth labels available (y from breast cancer dataset)
# Note: In breast cancer, target=0 is malignant (anomaly), target=1 is benign (normal)
y_anomaly = 1 - y  # Convert so 1 = anomaly

print(f"\nEvaluation with FDR Control:")
print(f"Discoveries: {discoveries.sum()}")
print(f"Empirical FDR: {false_discovery_rate(y=y_anomaly, y_hat=discoveries):.3f}")
print(f"Statistical Power: {statistical_power(y=y_anomaly, y_hat=discoveries):.3f}")

Uncertainty Quantification

# Get raw scores for uncertainty analysis
raw_scores = detector.score_samples(X)

# Analyze score distribution
plt.figure(figsize=(12, 4))

# Score distribution
plt.subplot(1, 3, 1)
plt.hist(raw_scores, bins=50, alpha=0.7, color='blue', edgecolor='black')
plt.xlabel('Anomaly Score')
plt.ylabel('Frequency')
plt.title('Bootstrap Anomaly Score Distribution')

# P-value calculation using final calibration set
p_values = detector.compute_p_values(X)

# P-value vs Score relationship
plt.subplot(1, 3, 2)
plt.scatter(raw_scores, p_values, alpha=0.5)
plt.xlabel('Anomaly Score')
plt.ylabel('p-value')
plt.title('Score vs P-value Relationship')

# Bootstrap stability analysis
plt.subplot(1, 3, 3)
# Run multiple bootstrap iterations
stability_results = []
for _ in range(10):
    det = ConformalDetector(
        detector=base_detector,
        strategy=JackknifeBootstrap(n_bootstraps=50),
        aggregation="median",
        seed=np.random.randint(1000)
    )
    det.fit(X)
    disc = det.select(X, alpha=0.05)
    stability_results.append(disc.sum())

plt.boxplot(stability_results)
plt.ylabel('Number of Detections')
plt.title('Bootstrap Detection Stability')

plt.tight_layout()
plt.show()

Comparison with Other Strategies

from nonconform import CrossValidation, JackknifeBootstrap, Split

# Compare strategies
strategies = {
    'JaB+': JackknifeBootstrap(n_bootstraps=50),
    'Split': Split(n_calib=0.2),
    'CV': CrossValidation(k=5)
}

comparison_results = {}
for name, strategy in strategies.items():
    detector = ConformalDetector(
        detector=base_detector,
        strategy=strategy,
        aggregation="median",
        seed=42,
    )
    detector.fit(X)
    p_vals = detector.compute_p_values(X)
    disc = detector.select(X, alpha=0.05)
    comparison_results[name] = {
        'discoveries': disc.sum(),
        'min_p': p_vals.min(),
        'mean_p': p_vals.mean()
    }

print("\nStrategy Comparison (with FDR control):")
for name, results in comparison_results.items():
    print(f"{name}:")
    print(f"  Discoveries: {results['discoveries']}")
    print(f"  Min p-value: {results['min_p']:.4f}")
    print(f"  Mean p-value: {results['mean_p']:.4f}")

Next Steps