Skip to content

FDR Control for Multiple Testing

Use detector.select(...) for default FDR-controlled anomaly decisions, and scipy.stats.false_discovery_control for manual BH analysis on p-values.

Setup

import numpy as np
from pyod.models.lof import LOF
from scipy.stats import false_discovery_control
from oddball import Dataset, load
from nonconform import ConformalDetector, Split
from nonconform.enums import Pruning
from nonconform.metrics import false_discovery_rate, statistical_power

# Load benchmark data
X, X_test, y_test = load(Dataset.SHUTTLE, setup=True, seed=42)

Prerequisites

All code blocks below require running the Setup block above first.

Basic Usage

# Initialize detector
base_detector = LOF()
strategy = Split(n_calib=0.2)

detector = ConformalDetector(
    detector=base_detector,
    strategy=strategy,
    aggregation="median",
    seed=42
)

# Fit and run FDR-controlled selection
detector.fit(X)
discoveries = detector.select(X_test, alpha=0.05)
p_values = detector.last_result.p_values

# Optional manual BH analysis on cached p-values
adjusted_p_values = false_discovery_control(p_values, method="bh")

print(f"Original detections: {(p_values < 0.05).sum()}")
print(f"FDR-controlled discoveries: {discoveries.sum()}")
print(f"Reduction: {(p_values < 0.05).sum() - discoveries.sum()}")

Different FDR Control Methods

# BH method in scipy.stats.false_discovery_control
adjusted_p_vals = false_discovery_control(p_values, method='bh')
discoveries = adjusted_p_vals < 0.05
print(f"BH method: {discoveries.sum()} discoveries")

# Compare with no adjustment
no_adjustment = (p_values < 0.05).sum()
print(f"No adjustment: {no_adjustment} detections")

Weighted Conformal Selection (Covariate Shift)

from nonconform import JackknifeBootstrap, logistic_weight_estimator
from pyod.models.iforest import IForest

# Load a dataset that exhibits covariate shift between calibration and test sets
x_train, x_test, y_test = load(Dataset.SHUTTLE, setup=True, seed=1)

weighted_detector = ConformalDetector(
    detector=IForest(random_state=1),
    strategy=JackknifeBootstrap(n_bootstraps=50),
    aggregation="median",
    weight_estimator=logistic_weight_estimator(),
    seed=1,
)

weighted_detector.fit(x_train)

# Weighted select() dispatches to Weighted Conformal Selection under covariate shift
wcs_mask = weighted_detector.select(
    x_test,
    alpha=0.1,
    pruning=Pruning.DETERMINISTIC,
    seed=1,
)
# detector.last_result bundles the scores/weights produced by select()

print(f"WCS detections: {wcs_mask.sum()} out of {len(wcs_mask)} test points")
print(f"Empirical FDR: {false_discovery_rate(y=y_test, y_hat=wcs_mask):.3f}")

FDR Control at Different Levels

# Try different FDR levels
fdr_levels = [0.01, 0.05, 0.1, 0.2]

print("\nFDR Control at Different Levels:")
print("-" * 40)
print(f"{'FDR Level':<12} {'Discoveries':<12} {'Adjusted α':<12}")
print("-" * 40)

for alpha in fdr_levels:
    adjusted_p_vals = false_discovery_control(p_values, method="bh")
    discoveries = adjusted_p_vals < alpha

    print(f"{alpha:<12} {discoveries.sum():<12} {adjusted_p_vals.min():.6f}")

Evaluating FDR Control Performance

# Use benchmark data with known ground truth (loaded in Setup)
# X contains training data (normal), X_test contains test data, y_test contains labels

# Fit detector and get p-values
detector.fit(X)  # Fit only on normal data
p_values = detector.compute_p_values(X_test)

# Apply different FDR control levels
fdr_levels = [0.05, 0.1, 0.15, 0.2]

print("\nFDR Control Performance Evaluation:")
print("-" * 60)
print(f"{'FDR Level':<12} {'Discoveries':<14} {'Empirical FDR':<14} {'Power':<10}")
print("-" * 60)

for alpha in fdr_levels:
    adjusted_p_vals = false_discovery_control(p_values, method="bh")
    discoveries = adjusted_p_vals < alpha

    empirical_fdr = false_discovery_rate(y_test, discoveries)
    power = statistical_power(y_test, discoveries)

    print(f"{alpha:<12} {discoveries.sum():<14} {empirical_fdr:<14.3f} {power:<10.3f}")

Multiple Detectors with FDR Control

from pyod.models.knn import KNN
from pyod.models.ocsvm import OCSVM

# Multiple detectors
detectors = {
    'LOF': LOF(contamination=0.1),
    'KNN': KNN(contamination=0.1),
    'OCSVM': OCSVM(contamination=0.1)
}

# Get p-values from each detector
all_p_values = {}
strategy = Split(n_calib=0.2)

for name, base_det in detectors.items():
    detector = ConformalDetector(
        detector=base_det,
        strategy=strategy,
        aggregation="median",
        seed=42
    )
    detector.fit(X)
    p_vals = detector.compute_p_values(X_test)
    all_p_values[name] = p_vals

# Apply FDR control to each detector's p-values
print("\nMultiple Detectors with FDR Control:")
print("-" * 60)
print(f"{'Detector':<10} {'Raw Det.':<12} {'FDR Det.':<12} {'Emp. FDR':<12} {'Power':<10}")
print("-" * 60)

for name, p_vals in all_p_values.items():
    # Raw detections
    raw_detections = (p_vals < 0.05).sum()

    # FDR controlled detections
    adj_p_vals = false_discovery_control(p_vals, method='bh')
    fdr_discoveries = adj_p_vals < 0.05

    # Performance metrics using nonconform functions
    empirical_fdr = false_discovery_rate(y_test, fdr_discoveries)
    power = statistical_power(y_test, fdr_discoveries)

    print(f"{name:<10} {raw_detections:<12} {fdr_discoveries.sum():<12} {empirical_fdr:<12.3f} {power:<10.3f}")

Ensemble with FDR Control

# Combine p-values from multiple detectors and apply FDR control
# Using Fisher's method for combining p-values
from scipy.stats import combine_pvalues

# Combine p-values using Fisher's method
p_values_list = list(all_p_values.values())
combined_stats, combined_p_values = combine_pvalues(np.array(p_values_list).T, method='fisher')

# Apply FDR control to combined p-values
adj_combined_p_vals = false_discovery_control(combined_p_values, method='bh')
combined_discoveries = adj_combined_p_vals < 0.05

# Evaluate ensemble performance using nonconform metrics
ensemble_fdr = false_discovery_rate(y_test, combined_discoveries)
ensemble_power = statistical_power(y_test, combined_discoveries)

print(f"\nEnsemble with FDR Control:")
print(f"Discoveries: {combined_discoveries.sum()}")
print(f"Empirical FDR: {ensemble_fdr:.3f}")
print(f"Statistical Power: {ensemble_power:.3f}")

Visualization

import matplotlib.pyplot as plt

# Visualize FDR control effects
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# P-value histogram
axes[0, 0].hist(p_values, bins=50, alpha=0.7, color='blue', edgecolor='black')
axes[0, 0].axvline(x=0.05, color='red', linestyle='--', label='α=0.05')
axes[0, 0].set_xlabel('p-value')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].set_title('P-value Distribution')
axes[0, 0].legend()

# Adjusted p-value histogram
adjusted_p_vals = false_discovery_control(p_values, method='bh')
axes[0, 1].hist(adjusted_p_vals, bins=50, alpha=0.7, color='orange', edgecolor='black')
axes[0, 1].axvline(x=0.05, color='red', linestyle='--', label='alpha=0.05')
axes[0, 1].set_xlabel('Adjusted p-value')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].set_title('BH Adjusted P-value Distribution')
axes[0, 1].legend()

# Comparison of detection methods
detection_methods = ['Raw (alpha=0.05)', 'BH FDR Control']
detection_counts = [
    (p_values < 0.05).sum(),
    (false_discovery_control(p_values, method='bh') < 0.05).sum()
]

axes[1, 0].bar(detection_methods, detection_counts, color=['blue', 'orange'])
axes[1, 0].set_ylabel('Number of Detections')
axes[1, 0].set_title('Detection Comparison')
axes[1, 0].tick_params(axis='x', rotation=45)

# FDR control at different levels
fdr_levels = np.arange(0.01, 0.21, 0.01)
discoveries_at_levels = []

for alpha in fdr_levels:
    adj_p_vals = false_discovery_control(p_values, method='bh')
    discoveries_at_levels.append((adj_p_vals < alpha).sum())

axes[1, 1].plot(fdr_levels, discoveries_at_levels, 'o-', linewidth=2)
axes[1, 1].axhline(y=(p_values < 0.05).sum(), color='red', linestyle='--',
                   label='Raw (alpha=0.05)')
axes[1, 1].set_xlabel('FDR Level')
axes[1, 1].set_ylabel('Number of Discoveries')
axes[1, 1].set_title('Discoveries vs FDR Level')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

Power Analysis

# Analyze statistical power at different FDR levels using benchmark data
alpha_levels = [0.01, 0.05, 0.1, 0.2]
power_results = {}

# Use benchmark data loaded in Setup
detector = ConformalDetector(
    detector=LOF(contamination=0.1),
    strategy=Split(n_calib=0.2),
    aggregation="median",
    seed=42
)
detector.fit(X)
p_vals = detector.compute_p_values(X_test)

for alpha in alpha_levels:
    # Apply FDR control at different significance levels
    adj_p_vals = false_discovery_control(p_vals, method='bh')
    discoveries = adj_p_vals < alpha

    # Calculate power using nonconform's statistical_power
    power_results[alpha] = statistical_power(y_test, discoveries)

print("\nPower Analysis:")
print("-" * 30)
print(f"{'Alpha Level':<12} {'Power':<8}")
print("-" * 30)
for alpha, power in power_results.items():
    print(f"{alpha:<12} {power:<8.3f}")

Next Steps