install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Hematology/Flow_Cytometry/bead-normalization" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-bead-normalization && rm -rf "$T"
manifest:
Skills/Hematology/Flow_Cytometry/bead-normalization/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-flow-cytometry-bead-normalization description: Bead-based normalization for CyTOF and high-parameter flow cytometry. Covers EQ bead normalization, signal drift correction, and batch normalization. Use when correcting instrument drift in CyTOF or harmonizing data across batches. tool_type: r primary_tool: CATALYST measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
Bead Normalization
CyTOF EQ Bead Normalization
library(CATALYST) library(flowCore) # CyTOF data typically includes EQ normalization beads # Fluidigm provides normalizer software, but can also do in R # Load FCS with beads ff <- read.FCS('cytof_with_beads.fcs') # EQ beads contain known amounts of: Ce140, Eu151, Eu153, Ho165, Lu175 bead_channels <- c('Ce140Di', 'Eu151Di', 'Eu153Di', 'Ho165Di', 'Lu175Di') # Identify bead events (high signal in bead channels) bead_data <- exprs(ff)[, bead_channels] bead_scores <- rowMeans(scale(bead_data)) # Beads typically have very high intensity bead_threshold <- quantile(bead_scores, 0.99) is_bead <- bead_scores > bead_threshold cat('Identified', sum(is_bead), 'bead events (', round(mean(is_bead) * 100, 2), '%)\n')
Calculate Normalization Factors
# For each acquisition, calculate median bead intensity # Compare to reference to get normalization factor calculate_norm_factors <- function(ff, bead_channels, bead_idx) { bead_intensities <- exprs(ff)[bead_idx, bead_channels] # Median intensity per channel medians <- apply(bead_intensities, 2, median) return(medians) } # Reference values (from first file or known standards) reference_beads <- c(Ce140 = 500, Eu151 = 600, Eu153 = 550, Ho165 = 450, Lu175 = 400) # Calculate factors sample_beads <- calculate_norm_factors(ff, bead_channels, is_bead) norm_factors <- reference_beads / sample_beads cat('Normalization factors:\n') print(round(norm_factors, 3))
Apply Normalization
# Apply normalization to all marker channels (not scatter) marker_channels <- setdiff(colnames(ff), c('Time', 'Event_length', bead_channels)) normalize_cytof <- function(ff, norm_factors, channels) { # Get expression matrix expr <- exprs(ff) # Apply geometric mean of bead factors to all channels global_factor <- exp(mean(log(norm_factors))) # Or apply per-channel if you have channel-specific factors expr[, channels] <- expr[, channels] * global_factor exprs(ff) <- expr return(ff) } ff_normalized <- normalize_cytof(ff, norm_factors, marker_channels) # Remove bead events ff_clean <- ff_normalized[!is_bead, ] cat('Final cell count:', nrow(ff_clean), '\n')
Time-Based Drift Correction
# Correct for signal drift over acquisition time correct_drift <- function(ff, time_channel = 'Time') { expr <- exprs(ff) time <- expr[, time_channel] # Bin by time n_bins <- 20 time_bins <- cut(time, breaks = n_bins, labels = FALSE) # For each marker, fit LOESS to bead signal over time corrected <- expr marker_cols <- setdiff(colnames(expr), c(time_channel, 'Event_length')) for (marker in marker_cols) { bin_medians <- tapply(expr[is_bead, marker], time_bins[is_bead], median) if (length(unique(time_bins[is_bead])) > 3) { # Fit smooth curve to drift drift_data <- data.frame( time = as.numeric(names(bin_medians)), intensity = as.numeric(bin_medians) ) loess_fit <- loess(intensity ~ time, data = drift_data, span = 0.5) # Predict correction factor for all events correction <- predict(loess_fit, newdata = data.frame(time = time_bins)) reference <- median(drift_data$intensity) corrected[, marker] <- expr[, marker] * (reference / correction) } } exprs(ff) <- corrected return(ff) } ff_drift_corrected <- correct_drift(ff)
Batch Normalization with CytoNorm
# CytoNorm for cross-batch normalization using reference samples library(CytoNorm) # Requires: training samples run on all batches (e.g., same PBMC reference) # Creates spline-based transformation # Prepare training data train_files <- list.files('batch1_reference/', pattern = '\\.fcs$', full.names = TRUE) train_data <- lapply(train_files, read.FCS) # Define model model <- CytoNorm.train( files = train_files, labels = rep('Reference', length(train_files)), channels = marker_channels, transformList = NULL, # If already transformed nQ = 100, # Number of quantiles seed = 42 ) # Apply to new batch test_files <- list.files('batch2/', pattern = '\\.fcs$', full.names = TRUE) normalized_files <- CytoNorm.normalize( model = model, files = test_files, labels = rep('Test', length(test_files)), outputDir = 'batch2_normalized/' )
Quantile Normalization
# Simple quantile normalization across samples quantile_normalize <- function(fs, channels) { # Extract expression matrices expr_list <- lapply(fs, function(ff) exprs(ff)[, channels]) # Get reference distribution (mean of all samples) all_values <- do.call(rbind, expr_list) reference_quantiles <- apply(all_values, 2, function(x) sort(x)) reference <- colMeans(reference_quantiles) # Normalize each sample normalized_fs <- fs for (i in 1:length(fs)) { expr <- exprs(fs[[i]]) for (ch in channels) { ranks <- rank(expr[, ch], ties.method = 'average') normalized_values <- approx(1:length(reference), sort(reference), xout = ranks)$y expr[, ch] <- normalized_values } exprs(normalized_fs[[i]]) <- expr } return(normalized_fs) }
CATALYST-Based Normalization
library(CATALYST) # CATALYST provides bead-based normalization for CyTOF # Load data with prepData (handles bead removal) sce <- prepData(fs, panel, md, transform = TRUE, cofactor = 5, by_time = TRUE) # Correct time-dependent drift # Or manual bead gating in CATALYST # sce <- prepData(fs, panel, md, FACS = FALSE) # sce <- filterSCE(sce, !sce$is_bead)
Visualization
library(ggplot2) # Plot bead signal over time bead_plot_data <- data.frame( Time = exprs(ff)[is_bead, 'Time'], Ce140 = exprs(ff)[is_bead, 'Ce140Di'], Eu151 = exprs(ff)[is_bead, 'Eu151Di'] ) ggplot(bead_plot_data, aes(x = Time, y = Ce140)) + geom_point(alpha = 0.1, size = 0.5) + geom_smooth(method = 'loess', color = 'red') + theme_bw() + labs(title = 'Bead Signal Over Time (Ce140)', x = 'Time', y = 'Intensity') ggsave('bead_drift.png', width = 10, height = 4) # Before/after normalization compare_df <- data.frame( Value = c(exprs(ff)[, 'CD45'], exprs(ff_normalized)[, 'CD45']), Status = rep(c('Before', 'After'), each = nrow(ff)) ) ggplot(compare_df, aes(x = Value, fill = Status)) + geom_histogram(bins = 100, alpha = 0.5, position = 'identity') + theme_bw() + labs(title = 'Normalization Effect on CD45')
Export Normalized Data
# Save normalized FCS files write.FCS(ff_clean, 'normalized_sample.fcs') # For CATALYST object # saveRDS(sce, 'normalized_sce.rds')
Related Skills
Workflow order: cytometry-qc → doublet-detection → bead-normalization → clustering
- cytometry-qc - Run first: identify drift and quality issues
- doublet-detection - Run before: remove doublets prior to normalization
- compensation-transformation - Initial data preprocessing
- clustering-phenotyping - Analysis after normalization
- differential-analysis - Batch-aware statistical testing