Skip to content

Examples

This section provides practical, real-world examples of using online-fdr for various applications. Each example includes complete code, explanations, and interpretations to help you apply online FDR control in your domain.

Example Categories

🚀 Basic Usage

Getting started with online FDR control

  • Simple sequential testing workflow
  • Comparing methods on simulated data
  • Parameter tuning and sensitivity analysis
  • Performance evaluation and metrics

🏭 Advanced Scenarios

Real-world applications and complex use cases

  • A/B testing in tech companies
  • Genomic variant discovery
  • Clinical trial interim analyses
  • Financial anomaly detection
  • Web analytics and conversion optimization

📊 Method Comparisons

Systematic comparison of different approaches

  • Online vs batch method performance
  • Power and FDR trade-offs across methods
  • Dependency structure effects
  • Parameter sensitivity studies

Quick Start Examples

1. Basic Online Testing

from online_fdr.investing.addis.addis import Addis

# Initialize method
addis = Addis(alpha=0.05, wealth=0.025, lambda_=0.25, tau=0.5)

# Your p-values from real experiments
p_values = [0.032, 0.001, 0.145, 0.003, 0.234, 0.089, 0.012]

# Test sequentially and collect results
significant_results = []
for i, p_val in enumerate(p_values):
    if addis.test_one(p_val):
        significant_results.append((i, p_val))
        print(f"✓ Significant: Test {i+1} with p-value {p_val:.4f}")

print(f"\nFound {len(significant_results)} significant results out of {len(p_values)} tests")

2. A/B Test Monitoring

from online_fdr.investing.addis.addis import Addis
import numpy as np

def ab_test_with_fdr_control(variants, control_data, alpha=0.05):
    """A/B test multiple variants with online FDR control."""

    method = Addis(alpha=alpha, wealth=alpha/2, lambda_=0.25, tau=0.5)

    results = {}

    for variant_name, variant_data in variants.items():
        # Perform statistical test (e.g., t-test)
        from scipy.stats import ttest_ind
        statistic, p_value = ttest_ind(variant_data, control_data)

        # Apply online FDR control
        is_significant = method.test_one(p_value)

        results[variant_name] = {
            'p_value': p_value,
            'statistic': statistic,
            'significant': is_significant,
            'effect_size': np.mean(variant_data) - np.mean(control_data)
        }

        if is_significant:
            print(f"🎯 {variant_name}: Significant effect detected!")
            print(f"   P-value: {p_value:.4f}, Effect: {results[variant_name]['effect_size']:.3f}")

    return results

# Example usage
np.random.seed(42)
control = np.random.normal(100, 15, 1000)  # Control group
variants = {
    'Variant_A': np.random.normal(105, 15, 1000),  # Small positive effect
    'Variant_B': np.random.normal(98, 15, 1000),   # Small negative effect  
    'Variant_C': np.random.normal(110, 15, 1000),  # Large positive effect
    'Variant_D': np.random.normal(101, 15, 1000),  # Minimal effect
}

results = ab_test_with_fdr_control(variants, control, alpha=0.1)

3. Gene Expression Analysis

from online_fdr.investing.addis.addis import Addis
from scipy.stats import ttest_ind
import numpy as np

def differential_expression_analysis(gene_expression_data, group_labels, 
                                   alpha=0.05):
    """Identify differentially expressed genes with FDR control."""

    method = Addis(alpha=alpha, wealth=alpha/2, lambda_=0.25, tau=0.5)

    group1_mask = group_labels == 'treatment'
    group2_mask = group_labels == 'control'

    significant_genes = []

    for gene_id in range(gene_expression_data.shape[0]):
        group1_expr = gene_expression_data[gene_id, group1_mask]
        group2_expr = gene_expression_data[gene_id, group2_mask]

        # Perform t-test
        statistic, p_value = ttest_ind(group1_expr, group2_expr)

        # Apply online FDR control
        if method.test_one(p_value):
            fold_change = np.mean(group1_expr) / np.mean(group2_expr)
            significant_genes.append({
                'gene_id': gene_id,
                'p_value': p_value,
                'fold_change': fold_change,
                'log_fc': np.log2(fold_change)
            })

    return significant_genes

# Simulate gene expression data
np.random.seed(123)
n_genes, n_samples = 1000, 50
expression_data = np.random.lognormal(2, 1, (n_genes, n_samples))

# Add differential expression to some genes
diff_genes = np.random.choice(n_genes, 50, replace=False)
expression_data[diff_genes, :25] *= 1.5  # Treatment group upregulated

group_labels = np.array(['treatment'] * 25 + ['control'] * 25)

significant_genes = differential_expression_analysis(
    expression_data, group_labels, alpha=0.1
)

print(f"Found {len(significant_genes)} differentially expressed genes")

4. Clinical Trial Interim Analysis

from online_fdr.investing.lord.three import LordThree
from scipy.stats import chi2_contingency
import numpy as np

def interim_analysis(endpoints, alpha=0.05):
    """Analyze multiple endpoints with interim monitoring."""

    # Use LORD3 for temporal correlation in sequential analyses
    method = LordThree(alpha=alpha, wealth=alpha/2, reward=0.05)

    results = {}

    for endpoint_name, (treatment_outcomes, control_outcomes) in endpoints.items():
        # Create contingency table
        treatment_success = np.sum(treatment_outcomes)
        treatment_total = len(treatment_outcomes)
        control_success = np.sum(control_outcomes) 
        control_total = len(control_outcomes)

        contingency_table = np.array([
            [treatment_success, treatment_total - treatment_success],
            [control_success, control_total - control_success]
        ])

        # Perform chi-square test
        chi2, p_value, dof, expected = chi2_contingency(contingency_table)

        # Apply online FDR control
        is_significant = method.test_one(p_value)

        # Calculate effect measures
        treatment_rate = treatment_success / treatment_total
        control_rate = control_success / control_total
        relative_risk = treatment_rate / control_rate if control_rate > 0 else np.inf

        results[endpoint_name] = {
            'p_value': p_value,
            'significant': is_significant,
            'treatment_rate': treatment_rate,
            'control_rate': control_rate,
            'relative_risk': relative_risk,
            'chi2_statistic': chi2
        }

        if is_significant:
            print(f"🏥 {endpoint_name}: Significant treatment effect!")
            print(f"   Treatment rate: {treatment_rate:.3f}")
            print(f"   Control rate: {control_rate:.3f}")
            print(f"   Relative risk: {relative_risk:.3f}")

    return results

# Simulate clinical trial data
np.random.seed(456)
endpoints = {
    'Primary_Efficacy': (
        np.random.binomial(1, 0.65, 200),    # Treatment group
        np.random.binomial(1, 0.50, 200)     # Control group  
    ),
    'Secondary_QoL': (
        np.random.binomial(1, 0.70, 200),
        np.random.binomial(1, 0.60, 200)
    ),
    'Safety_AE': (
        np.random.binomial(1, 0.15, 200),
        np.random.binomial(1, 0.12, 200)
    )
}

trial_results = interim_analysis(endpoints, alpha=0.1)

Domain-Specific Applications

Technology & Web

  • Multi-variant testing with FDR control
  • Conversion rate optimization
  • Feature rollout decision making
  • Revenue impact assessment
  • Anomaly detection in metrics
  • Performance regression testing
  • Alert fatigue reduction
  • SLA violation analysis

Life Sciences

  • Differential gene expression
  • GWAS analysis
  • Variant prioritization
  • Pathway enrichment
  • High-throughput screening
  • Biomarker identification
  • Toxicity testing
  • Clinical endpoint analysis

Finance & Economics

  • Market anomaly detection
  • Portfolio optimization
  • Fraud detection
  • Credit scoring
  • Strategy backtesting
  • Factor discovery
  • Market regime detection
  • Risk factor analysis

Research & Academia

  • Experiment replication studies
  • Meta-analysis
  • Survey data analysis
  • Behavioral intervention studies
  • Disease outbreak detection
  • Risk factor identification
  • Treatment effectiveness
  • Public health surveillance

Code Templates

Template 1: Sequential Testing Pipeline

def sequential_testing_pipeline(data_stream, method_class, method_params, 
                               statistical_test, significance_threshold=0.05):
    """Generic pipeline for sequential hypothesis testing."""

    # Initialize FDR method
    fdr_method = method_class(**method_params)

    results = {
        'decisions': [],
        'p_values': [],
        'test_statistics': [],
        'effect_sizes': [],
        'timestamps': []
    }

    for i, data_point in enumerate(data_stream):
        # Extract test data
        test_data = data_point['test_data']
        control_data = data_point['control_data']
        timestamp = data_point.get('timestamp', i)

        # Perform statistical test
        statistic, p_value = statistical_test(test_data, control_data)

        # Apply FDR control
        decision = fdr_method.test_one(p_value)

        # Calculate effect size
        effect_size = np.mean(test_data) - np.mean(control_data)

        # Store results
        results['decisions'].append(decision)
        results['p_values'].append(p_value)
        results['test_statistics'].append(statistic)
        results['effect_sizes'].append(effect_size)
        results['timestamps'].append(timestamp)

        # Optional: Early stopping condition
        if decision and effect_size < -0.5:  # Negative effect threshold
            print(f"Early stopping at test {i+1}: Large negative effect detected")
            break

    return results

Template 2: Performance Evaluation

def evaluate_fdr_method(method, p_values, true_labels, alpha=0.05):
    """Comprehensive evaluation of FDR method performance."""

    # Run the method
    decisions = []
    for p_val in p_values:
        decisions.append(method.test_one(p_val))

    # Calculate performance metrics
    decisions = np.array(decisions)
    true_labels = np.array(true_labels)  # True if alternative hypothesis

    # Confusion matrix components
    true_positives = np.sum(decisions & true_labels)
    false_positives = np.sum(decisions & ~true_labels)
    true_negatives = np.sum(~decisions & ~true_labels)
    false_negatives = np.sum(~decisions & true_labels)

    # Performance metrics
    total_discoveries = true_positives + false_positives
    empirical_fdr = false_positives / max(total_discoveries, 1)
    power = true_positives / np.sum(true_labels) if np.sum(true_labels) > 0 else 0
    precision = true_positives / max(total_discoveries, 1)
    recall = power  # Same as power

    results = {
        'total_tests': len(p_values),
        'total_discoveries': total_discoveries,
        'true_positives': true_positives,
        'false_positives': false_positives,
        'true_negatives': true_negatives,
        'false_negatives': false_negatives,
        'empirical_fdr': empirical_fdr,
        'power': power,
        'precision': precision,
        'recall': recall,
        'target_fdr': alpha,
        'fdr_controlled': empirical_fdr <= alpha * 1.1  # 10% tolerance
    }

    return results

Visualization Examples

Plotting FDR Control Over Time

import matplotlib.pyplot as plt
import numpy as np

def plot_fdr_control(p_values, true_labels, method, alpha=0.05):
    """Plot empirical FDR over time."""

    decisions = []
    cumulative_fdr = []
    true_pos = false_pos = 0

    for p_val, is_alt in zip(p_values, true_labels):
        decision = method.test_one(p_val)
        decisions.append(decision)

        if decision:
            if is_alt:
                true_pos += 1
            else:
                false_pos += 1

        # Calculate cumulative FDR
        total_discoveries = true_pos + false_pos
        current_fdr = false_pos / max(total_discoveries, 1)
        cumulative_fdr.append(current_fdr)

    # Create plot
    plt.figure(figsize=(12, 6))

    # Plot empirical FDR
    plt.subplot(1, 2, 1)
    plt.plot(cumulative_fdr, label='Empirical FDR', linewidth=2)
    plt.axhline(y=alpha, color='red', linestyle='--', 
               label=f'Target FDR (α={alpha})')
    plt.xlabel('Test Number')
    plt.ylabel('Cumulative FDR')
    plt.title('FDR Control Over Time')
    plt.legend()
    plt.grid(True, alpha=0.3)

    # Plot discoveries
    plt.subplot(1, 2, 2)
    cumulative_discoveries = np.cumsum(decisions)
    plt.plot(cumulative_discoveries, label='Total Discoveries', linewidth=2)
    plt.fill_between(range(len(cumulative_discoveries)), 
                    cumulative_discoveries, alpha=0.3)
    plt.xlabel('Test Number')  
    plt.ylabel('Cumulative Discoveries')
    plt.title('Discoveries Over Time')
    plt.legend()
    plt.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    return cumulative_fdr, cumulative_discoveries

Next Steps

Ready to dive deeper? Choose an example category:

Or explore other sections: - API Reference: Detailed method documentation - Theory: Mathematical foundations
- User Guide: Concepts and best practices