Classical Conformal Anomaly Detection¶

This example demonstrates how to use classical conformal prediction for anomaly detection.

Setup¶

import numpy as np
from pyod.models.lof import LOF
from scipy.stats import false_discovery_control
from nonconform.estimation import ConformalDetector
from nonconform.strategy import Split
from nonconform.utils.func import Aggregation
from nonconform.utils.data import load, Dataset

# Load example data - downloads automatically and caches in memory
x_train, x_test, y_test = load(Dataset.BREAST, setup=True)
print(f"Training samples: {len(x_train)}, Test samples: {len(x_test)}")

Basic Usage¶

# Initialize base detector
base_detector = LOF()

# Create conformal detector with split strategy
strategy = Split(calib_size=0.2)
detector = ConformalDetector(
    detector=base_detector,
    strategy=strategy,
    aggregation=Aggregation.MEDIAN,
    seed=42
)

# Fit the detector on training data (normal samples only)
detector.fit(x_train)

# Get p-values for test data
p_values = detector.predict(x_test, raw=False)

# Get raw anomaly scores
scores = detector.predict(x_test, raw=True)

# Simple anomaly detection at 5% significance level
anomalies = p_values < 0.05
print(f"Number of anomalies detected: {anomalies.sum()}")
print(f"True anomaly rate in test set: {y_test.mean():.2%}")

FDR Control¶

# Control False Discovery Rate at 5%
adjusted_p_values = false_discovery_control(p_values, method='bh')
discoveries = adjusted_p_values < 0.05

print(f"Number of discoveries with FDR control: {discoveries.sum()}")
print(f"Empirical FDR: {(discoveries & (y_test == 0)).sum() / max(1, discoveries.sum()):.3f}")

Advanced Usage with Cross-Validation¶

from nonconform.strategy import CrossValidation

# Use cross-validation strategy for better calibration
cv_strategy = CrossValidation(k=5)
cv_detector = ConformalDetector(
    detector=base_detector,
    strategy=cv_strategy,
    aggregation=Aggregation.MEDIAN,
    seed=42
)

# Fit and predict with cross-validation
cv_detector.fit(x_train)
cv_p_values = cv_detector.predict(x_test, raw=False)

# Compare with split strategy
# Apply FDR control for fair comparison
split_fdr = false_discovery_control(p_values, method='bh')
cv_fdr = false_discovery_control(cv_p_values, method='bh')

print(f"Split strategy detections: {(split_fdr < 0.05).sum()}")
print(f"Cross-validation detections: {(cv_fdr < 0.05).sum()}")

Comparing Different Aggregation Methods¶

# Try different aggregation methods
aggregation_methods = [Aggregation.MEAN, Aggregation.MEDIAN, Aggregation.MAX]

for agg_method in aggregation_methods:
    detector = ConformalDetector(
        detector=base_detector,
        strategy=strategy,
        aggregation=agg_method,
        seed=42
    )
    detector.fit(x_train)
    p_vals = detector.predict(x_test, raw=False)

    # Apply FDR control
    fdr_controlled = false_discovery_control(p_vals, method='bh')
    print(f"{agg_method.value} aggregation: {(fdr_controlled < 0.05).sum()} detections")

Visualization¶

import matplotlib.pyplot as plt

# Plot p-value distribution
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.hist(p_values, bins=50, alpha=0.7, color='blue', edgecolor='black')
plt.axvline(x=0.05, color='red', linestyle='--', label='α=0.05')
plt.xlabel('p-value')
plt.ylabel('Frequency')
plt.title('P-value Distribution')
plt.legend()

plt.subplot(1, 2, 2)
plt.scatter(range(len(p_values)), p_values, c=p_values < 0.05,
            cmap='coolwarm', alpha=0.6)
plt.axhline(y=0.05, color='red', linestyle='--', label='α=0.05')
plt.xlabel('Sample Index')
plt.ylabel('p-value')
plt.title('P-values by Sample')
plt.legend()

plt.tight_layout()
plt.show()

Next Steps¶

Try weighted conformal detection for handling distribution shift
Learn about FDR control for multiple testing
Explore bootstrap-based detection for uncertainty estimation