Skip to content

Examples

This section provides practical, real-world examples of using online-fdr for various applications. Each example includes complete code, explanations, and interpretations to help you apply online FDR control in your domain.

Example Categories

Basic Usage

Getting started with online FDR control

  • Simple sequential testing workflow
  • Comparing methods on simulated data
  • Parameter tuning and sensitivity analysis
  • Performance evaluation and metrics

Advanced Scenarios

Real-world applications and complex use cases

  • A/B testing in tech companies
  • Genomic variant discovery
  • Clinical trial interim analyses
  • Financial anomaly detection
  • Web analytics and conversion optimization

Method Comparisons

Systematic comparison of different approaches

  • Online vs batch method performance
  • Power and FDR trade-offs across methods
  • Dependency structure effects
  • Parameter sensitivity studies

Quick Start Examples

1. Basic Online Testing

from online_fdr.investing.addis.addis import Addis

# Initialize method
addis = Addis(alpha=0.05, wealth=0.025, lambda_=0.25, tau=0.5)

# Your p-values from real experiments
p_values = [0.032, 0.001, 0.145, 0.003, 0.234, 0.089, 0.012]

# Test sequentially and collect results
significant_results = []
for i, p_val in enumerate(p_values):
    if addis.test_one(p_val):
        significant_results.append((i, p_val))
        print(f" Significant: Test {i+1} with p-value {p_val:.4f}")

print(f"\nFound {len(significant_results)} significant results out of {len(p_values)} tests")

2. A/B Test Monitoring

from online_fdr.investing.addis.addis import Addis
import numpy as np

def ab_test_with_fdr_control(variants, control_data, alpha=0.05):
    """A/B test multiple variants with online FDR control."""

    method = Addis(alpha=alpha, wealth=alpha/2, lambda_=0.25, tau=0.5)

    results = {}

    for variant_name, variant_data in variants.items():
        # Perform statistical test (e.g., t-test)
        from scipy.stats import ttest_ind
        statistic, p_value = ttest_ind(variant_data, control_data)

        # Apply online FDR control
        is_significant = method.test_one(p_value)

        results[variant_name] = {
            'p_value': p_value,
            'statistic': statistic,
            'significant': is_significant,
            'effect_size': np.mean(variant_data) - np.mean(control_data)
        }

        if is_significant:
            print(f" {variant_name}: Significant effect detected!")
            print(f"   P-value: {p_value:.4f}, Effect: {results[variant_name]['effect_size']:.3f}")

    return results

# Example usage
np.random.seed(42)
control = np.random.normal(100, 15, 1000)  # Control group
variants = {
    'Variant_A': np.random.normal(105, 15, 1000),  # Small positive effect
    'Variant_B': np.random.normal(98, 15, 1000),   # Small negative effect  
    'Variant_C': np.random.normal(110, 15, 1000),  # Large positive effect
    'Variant_D': np.random.normal(101, 15, 1000),  # Minimal effect
}

results = ab_test_with_fdr_control(variants, control, alpha=0.1)

3. Gene Expression Analysis

from online_fdr.investing.addis.addis import Addis
from scipy.stats import ttest_ind
import numpy as np

def differential_expression_analysis(gene_expression_data, group_labels, 
                                   alpha=0.05):
    """Identify differentially expressed genes with FDR control."""

    method = Addis(alpha=alpha, wealth=alpha/2, lambda_=0.25, tau=0.5)

    group1_mask = group_labels == 'treatment'
    group2_mask = group_labels == 'control'

    significant_genes = []

    for gene_id in range(gene_expression_data.shape[0]):
        group1_expr = gene_expression_data[gene_id, group1_mask]
        group2_expr = gene_expression_data[gene_id, group2_mask]

        # Perform t-test
        statistic, p_value = ttest_ind(group1_expr, group2_expr)

        # Apply online FDR control
        if method.test_one(p_value):
            fold_change = np.mean(group1_expr) / np.mean(group2_expr)
            significant_genes.append({
                'gene_id': gene_id,
                'p_value': p_value,
                'fold_change': fold_change,
                'log_fc': np.log2(fold_change)
            })

    return significant_genes

# Simulate gene expression data
np.random.seed(123)
n_genes, n_samples = 1000, 50
expression_data = np.random.lognormal(2, 1, (n_genes, n_samples))

# Add differential expression to some genes
diff_genes = np.random.choice(n_genes, 50, replace=False)
expression_data[diff_genes, :25] *= 1.5  # Treatment group upregulated

group_labels = np.array(['treatment'] * 25 + ['control'] * 25)

significant_genes = differential_expression_analysis(
    expression_data, group_labels, alpha=0.1
)

print(f"Found {len(significant_genes)} differentially expressed genes")

4. Clinical Trial Interim Analysis

from online_fdr.investing.lord.three import LordThree
from scipy.stats import chi2_contingency
import numpy as np

def interim_analysis(endpoints, alpha=0.05):
    """Analyze multiple endpoints with interim monitoring."""

    # Use LORD3 for temporal correlation in sequential analyses
    method = LordThree(alpha=alpha, wealth=alpha/2, reward=alpha/2)

    results = {}

    for endpoint_name, (treatment_outcomes, control_outcomes) in endpoints.items():
        # Create contingency table
        treatment_success = np.sum(treatment_outcomes)
        treatment_total = len(treatment_outcomes)
        control_success = np.sum(control_outcomes) 
        control_total = len(control_outcomes)

        contingency_table = np.array([
            [treatment_success, treatment_total - treatment_success],
            [control_success, control_total - control_success]
        ])

        # Perform chi-square test
        chi2, p_value, dof, expected = chi2_contingency(contingency_table)

        # Apply online FDR control
        is_significant = method.test_one(p_value)

        # Calculate effect measures
        treatment_rate = treatment_success / treatment_total
        control_rate = control_success / control_total
        relative_risk = treatment_rate / control_rate if control_rate > 0 else np.inf

        results[endpoint_name] = {
            'p_value': p_value,
            'significant': is_significant,
            'treatment_rate': treatment_rate,
            'control_rate': control_rate,
            'relative_risk': relative_risk,
            'chi2_statistic': chi2
        }

        if is_significant:
            print(f" {endpoint_name}: Significant treatment effect!")
            print(f"   Treatment rate: {treatment_rate:.3f}")
            print(f"   Control rate: {control_rate:.3f}")
            print(f"   Relative risk: {relative_risk:.3f}")

    return results

# Simulate clinical trial data
np.random.seed(456)
endpoints = {
    'Primary_Efficacy': (
        np.random.binomial(1, 0.65, 200),    # Treatment group
        np.random.binomial(1, 0.50, 200)     # Control group  
    ),
    'Secondary_QoL': (
        np.random.binomial(1, 0.70, 200),
        np.random.binomial(1, 0.60, 200)
    ),
    'Safety_AE': (
        np.random.binomial(1, 0.15, 200),
        np.random.binomial(1, 0.12, 200)
    )
}

trial_results = interim_analysis(endpoints, alpha=0.1)

Domain-Specific Applications

Technology & Web

  • Multi-variant testing with FDR control
  • Conversion rate optimization
  • Feature rollout decision making
  • Revenue impact assessment
  • Anomaly detection in metrics
  • Performance regression testing
  • Alert fatigue reduction
  • SLA violation analysis

Life Sciences

  • Differential gene expression
  • GWAS analysis
  • Variant prioritization
  • Pathway enrichment
  • High-throughput screening
  • Biomarker identification
  • Toxicity testing
  • Clinical endpoint analysis

Finance & Economics

  • Market anomaly detection
  • Portfolio optimization
  • Fraud detection
  • Credit scoring
  • Strategy backtesting
  • Factor discovery
  • Market regime detection
  • Risk factor analysis

Research & Academia

  • Experiment replication studies
  • Meta-analysis
  • Survey data analysis
  • Behavioral intervention studies
  • Disease outbreak detection
  • Risk factor identification
  • Treatment effectiveness
  • Public health surveillance

Code Templates

Template 1: Sequential Testing Pipeline

def sequential_testing_pipeline(data_stream, method_class, method_params, 
                               statistical_test, significance_threshold=0.05):
    """Generic pipeline for sequential hypothesis testing."""

    # Initialize FDR method
    fdr_method = method_class(**method_params)

    results = {
        'decisions': [],
        'p_values': [],
        'test_statistics': [],
        'effect_sizes': [],
        'timestamps': []
    }

    for i, data_point in enumerate(data_stream):
        # Extract test data
        test_data = data_point['test_data']
        control_data = data_point['control_data']
        timestamp = data_point.get('timestamp', i)

        # Perform statistical test
        statistic, p_value = statistical_test(test_data, control_data)

        # Apply FDR control
        decision = fdr_method.test_one(p_value)

        # Calculate effect size
        effect_size = np.mean(test_data) - np.mean(control_data)

        # Store results
        results['decisions'].append(decision)
        results['p_values'].append(p_value)
        results['test_statistics'].append(statistic)
        results['effect_sizes'].append(effect_size)
        results['timestamps'].append(timestamp)

        # Optional: Early stopping condition
        if decision and effect_size < -0.5:  # Negative effect threshold
            print(f"Early stopping at test {i+1}: Large negative effect detected")
            break

    return results

Template 2: Performance Evaluation

def evaluate_fdr_method(method, p_values, true_labels, alpha=0.05):
    """Comprehensive evaluation of FDR method performance."""

    # Run the method
    decisions = []
    for p_val in p_values:
        decisions.append(method.test_one(p_val))

    # Calculate performance metrics
    decisions = np.array(decisions)
    true_labels = np.array(true_labels)  # True if alternative hypothesis

    # Confusion matrix components
    true_positives = np.sum(decisions & true_labels)
    false_positives = np.sum(decisions & ~true_labels)
    true_negatives = np.sum(~decisions & ~true_labels)
    false_negatives = np.sum(~decisions & true_labels)

    # Performance metrics
    total_discoveries = true_positives + false_positives
    empirical_fdr = false_positives / max(total_discoveries, 1)
    power = true_positives / np.sum(true_labels) if np.sum(true_labels) > 0 else 0
    precision = true_positives / max(total_discoveries, 1)
    recall = power  # Same as power

    results = {
        'total_tests': len(p_values),
        'total_discoveries': total_discoveries,
        'true_positives': true_positives,
        'false_positives': false_positives,
        'true_negatives': true_negatives,
        'false_negatives': false_negatives,
        'empirical_fdr': empirical_fdr,
        'power': power,
        'precision': precision,
        'recall': recall,
        'target_fdr': alpha,
        'fdr_controlled': empirical_fdr <= alpha * 1.1  # 10% tolerance
    }

    return results

Visualization Examples

Plotting FDR Control Over Time

import matplotlib.pyplot as plt
import numpy as np

def plot_fdr_control(p_values, true_labels, method, alpha=0.05):
    """Plot empirical FDR over time."""

    decisions = []
    cumulative_fdr = []
    true_pos = false_pos = 0

    for p_val, is_alt in zip(p_values, true_labels):
        decision = method.test_one(p_val)
        decisions.append(decision)

        if decision:
            if is_alt:
                true_pos += 1
            else:
                false_pos += 1

        # Calculate cumulative FDR
        total_discoveries = true_pos + false_pos
        current_fdr = false_pos / max(total_discoveries, 1)
        cumulative_fdr.append(current_fdr)

    # Create plot
    plt.figure(figsize=(12, 6))

    # Plot empirical FDR
    plt.subplot(1, 2, 1)
    plt.plot(cumulative_fdr, label='Empirical FDR', linewidth=2)
    plt.axhline(y=alpha, color='red', linestyle='--', 
               label=f'Target FDR (={alpha})')
    plt.xlabel('Test Number')
    plt.ylabel('Cumulative FDR')
    plt.title('FDR Control Over Time')
    plt.legend()
    plt.grid(True, alpha=0.3)

    # Plot discoveries
    plt.subplot(1, 2, 2)
    cumulative_discoveries = np.cumsum(decisions)
    plt.plot(cumulative_discoveries, label='Total Discoveries', linewidth=2)
    plt.fill_between(range(len(cumulative_discoveries)), 
                    cumulative_discoveries, alpha=0.3)
    plt.xlabel('Test Number')  
    plt.ylabel('Cumulative Discoveries')
    plt.title('Discoveries Over Time')
    plt.legend()
    plt.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    return cumulative_fdr, cumulative_discoveries

Next Steps

Ready to dive deeper? Choose an example category:

Or explore other sections: - API Reference: Detailed method documentation - Theory: Mathematical foundations
- User Guide: Concepts and best practices