install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Epigenomics/atac-seq/motif-deviation" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-motif-deviation && rm -rf "$T"
manifest:
Skills/Epigenomics/atac-seq/motif-deviation/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-atac-seq-motif-deviation description: Analyze transcription factor motif accessibility variability using chromVAR. Use when identifying which TF motifs show variable accessibility across samples or conditions in ATAC-seq data. tool_type: r primary_tool: chromVAR measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
Motif Deviation Analysis
Measure per-sample variability in transcription factor motif accessibility using chromVAR. This identifies TFs whose binding sites show differential accessibility across conditions.
Required Packages
library(chromVAR) library(motifmatchr) library(BSgenome.Hsapiens.UCSC.hg38) # or appropriate genome library(JASPAR2020) library(TFBSTools) library(SummarizedExperiment)
Basic Workflow
1. Load Peak Counts
library(chromVAR) library(SummarizedExperiment) # From count matrix and peak ranges peaks <- read.table('peaks.bed', col.names = c('chr', 'start', 'end')) peak_ranges <- GRanges(seqnames = peaks$chr, ranges = IRanges(peaks$start, peaks$end)) counts <- read.table('counts.txt', header = TRUE, row.names = 1) counts_matrix <- as.matrix(counts) fragment_counts <- SummarizedExperiment( assays = list(counts = counts_matrix), rowRanges = peak_ranges )
2. Add GC Bias Correction
library(BSgenome.Hsapiens.UCSC.hg38) fragment_counts <- addGCBias(fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg38)
3. Filter Low-Quality Peaks
# min_depth=1500: Minimum total reads per sample. Adjust based on library size. # min_in_peaks=0.15: Minimum fraction of reads in peaks (FRiP). 0.15 = 15%. fragment_counts <- filterSamples(fragment_counts, min_depth = 1500, min_in_peaks = 0.15) # min_count=10: Require peaks with >=10 reads across samples. # n_samples_frac=0.1: Peak must be detected in >=10% of samples. fragment_counts <- filterPeaks(fragment_counts, non_overlapping = TRUE, min_count = 10, n_samples_frac = 0.1)
Get Motif Annotations
From JASPAR
library(JASPAR2020) library(TFBSTools) library(motifmatchr) # Get vertebrate motifs from JASPAR pfm <- getMatrixSet(JASPAR2020, opts = list(collection = 'CORE', tax_group = 'vertebrates')) # Match motifs to peaks # p.cutoff=5e-5: Motif match p-value threshold. Lower = more stringent. motif_ix <- matchMotifs(pfm, fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg38, p.cutoff = 5e-5)
From CIS-BP or Custom PWMs
# Load custom motifs from file library(universalmotif) motifs <- read_meme('custom_motifs.meme') pfm_list <- lapply(motifs, function(m) convert_motifs(m, class = 'TFBSTools-PFMatrix')) motif_ix <- matchMotifs(pfm_list, fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg38)
Compute Deviations
# Compute chromVAR deviation scores dev <- computeDeviations(object = fragment_counts, annotations = motif_ix) # Extract deviation scores (z-scores) deviation_scores <- deviations(dev) # Extract variability across samples variability <- computeVariability(dev)
Interpreting Results
Deviation Scores
# Deviation z-scores: positive = more accessible than expected # Compare across samples dev_matrix <- deviations(dev) print(dim(dev_matrix)) # motifs x samples # Get top variable motifs var_df <- variability var_df <- var_df[order(-var_df$variability), ] head(var_df, 20)
Variability Interpretation
| Variability | Interpretation |
|---|---|
| > 2.0 | Highly variable across samples |
| 1.0 - 2.0 | Moderately variable |
| < 1.0 | Low variability |
Visualization
Deviation Heatmap
library(pheatmap) # Get top variable motifs # n_top=50: Number of top variable motifs to display. n_top <- 50 top_motifs <- head(rownames(var_df), n_top) top_dev <- deviation_scores[top_motifs, ] # Add sample annotations sample_info <- data.frame( Condition = colData(fragment_counts)$condition, row.names = colnames(top_dev) ) pheatmap(top_dev, annotation_col = sample_info, scale = 'row', clustering_method = 'ward.D2', show_rownames = TRUE)
Variability Plot
plotVariability(variability, use_plotly = FALSE)
PCA of Deviation Scores
library(ggplot2) # PCA on deviation scores pca <- prcomp(t(deviation_scores), scale. = TRUE) pca_df <- data.frame(PC1 = pca$x[,1], PC2 = pca$x[,2], Condition = colData(fragment_counts)$condition) ggplot(pca_df, aes(x = PC1, y = PC2, color = Condition)) + geom_point(size = 3) + theme_minimal() + labs(title = 'PCA of chromVAR Deviations')
Differential Motif Accessibility
Compare Two Groups
library(limma) # Get sample groups groups <- factor(colData(fragment_counts)$condition) # Design matrix design <- model.matrix(~ groups) # Fit linear model to deviation scores fit <- lmFit(deviation_scores, design) fit <- eBayes(fit) # Get differential motifs # p.value=0.05: FDR threshold for significance. diff_motifs <- topTable(fit, coef = 2, number = Inf, p.value = 0.05) print(head(diff_motifs, 20))
Volcano Plot
library(ggplot2) all_results <- topTable(fit, coef = 2, number = Inf) all_results$significant <- all_results$adj.P.Val < 0.05 ggplot(all_results, aes(x = logFC, y = -log10(adj.P.Val), color = significant)) + geom_point(alpha = 0.6) + geom_hline(yintercept = -log10(0.05), linetype = 'dashed') + scale_color_manual(values = c('grey', 'red')) + theme_minimal() + labs(title = 'Differential Motif Accessibility', x = 'Log2 Fold Change', y = '-log10(adjusted p-value)')
Working with Single-Cell ATAC-seq
# For scATAC-seq, aggregate cells by cluster first # Then run chromVAR on pseudo-bulk profiles # Or use chromVAR with sparse matrices library(Matrix) # Create SummarizedExperiment with sparse counts sparse_counts <- Matrix(counts_matrix, sparse = TRUE) fragment_counts <- SummarizedExperiment( assays = list(counts = sparse_counts), rowRanges = peak_ranges ) # Proceed with standard workflow fragment_counts <- addGCBias(fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg38)
Background Peaks Strategy
# Custom background for better bias correction # n_iterations=50: Number of background sets. Higher = more stable but slower. bg <- getBackgroundPeaks(object = fragment_counts, niterations = 50) # Use custom background in deviation calculation dev <- computeDeviations(object = fragment_counts, annotations = motif_ix, background_peaks = bg)
Export Results
# Save deviation scores write.csv(as.data.frame(deviation_scores), 'chromvar_deviations.csv') # Save variability write.csv(variability, 'chromvar_variability.csv') # Save differential results write.csv(diff_motifs, 'differential_motifs.csv')
Complete Workflow
library(chromVAR) library(motifmatchr) library(BSgenome.Hsapiens.UCSC.hg38) library(JASPAR2020) library(TFBSTools) # 1. Load data fragment_counts <- getCounts('peaks.bed', c('sample1.bam', 'sample2.bam', 'sample3.bam')) # 2. Add GC bias fragment_counts <- addGCBias(fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg38) # 3. Filter fragment_counts <- filterPeaks(fragment_counts) # 4. Get motifs pfm <- getMatrixSet(JASPAR2020, opts = list(collection = 'CORE', tax_group = 'vertebrates')) motif_ix <- matchMotifs(pfm, fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg38) # 5. Compute deviations dev <- computeDeviations(fragment_counts, motif_ix) # 6. Analyze variability variability <- computeVariability(dev) plotVariability(variability)
Related Skills
- differential-accessibility - Peak-level differential analysis with DiffBind
- footprinting - TF footprinting with TOBIAS
- atac-qc - Quality control before chromVAR
- chip-seq/motif-analysis - Alternative motif enrichment approaches