LLMs-Universal-Life-Science-and-Clinical-Skills- motif-deviation

<!--

install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Epigenomics/atac-seq/motif-deviation" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-motif-deviation && rm -rf "$T"
manifest: Skills/Epigenomics/atac-seq/motif-deviation/SKILL.md
source content
<!-- # COPYRIGHT NOTICE # This file is part of the "Universal Biomedical Skills" project. # Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu> # All Rights Reserved. # # This code is proprietary and confidential. # Unauthorized copying of this file, via any medium is strictly prohibited. # # Provenance: Authenticated by MD BABU MIA -->

name: bio-atac-seq-motif-deviation description: Analyze transcription factor motif accessibility variability using chromVAR. Use when identifying which TF motifs show variable accessibility across samples or conditions in ATAC-seq data. tool_type: r primary_tool: chromVAR measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:

  • read_file
  • run_shell_command

Motif Deviation Analysis

Measure per-sample variability in transcription factor motif accessibility using chromVAR. This identifies TFs whose binding sites show differential accessibility across conditions.

Required Packages

library(chromVAR)
library(motifmatchr)
library(BSgenome.Hsapiens.UCSC.hg38)  # or appropriate genome
library(JASPAR2020)
library(TFBSTools)
library(SummarizedExperiment)

Basic Workflow

1. Load Peak Counts

library(chromVAR)
library(SummarizedExperiment)

# From count matrix and peak ranges
peaks <- read.table('peaks.bed', col.names = c('chr', 'start', 'end'))
peak_ranges <- GRanges(seqnames = peaks$chr, ranges = IRanges(peaks$start, peaks$end))

counts <- read.table('counts.txt', header = TRUE, row.names = 1)
counts_matrix <- as.matrix(counts)

fragment_counts <- SummarizedExperiment(
    assays = list(counts = counts_matrix),
    rowRanges = peak_ranges
)

2. Add GC Bias Correction

library(BSgenome.Hsapiens.UCSC.hg38)

fragment_counts <- addGCBias(fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg38)

3. Filter Low-Quality Peaks

# min_depth=1500: Minimum total reads per sample. Adjust based on library size.
# min_in_peaks=0.15: Minimum fraction of reads in peaks (FRiP). 0.15 = 15%.
fragment_counts <- filterSamples(fragment_counts, min_depth = 1500, min_in_peaks = 0.15)

# min_count=10: Require peaks with >=10 reads across samples.
# n_samples_frac=0.1: Peak must be detected in >=10% of samples.
fragment_counts <- filterPeaks(fragment_counts, non_overlapping = TRUE,
                                min_count = 10, n_samples_frac = 0.1)

Get Motif Annotations

From JASPAR

library(JASPAR2020)
library(TFBSTools)
library(motifmatchr)

# Get vertebrate motifs from JASPAR
pfm <- getMatrixSet(JASPAR2020, opts = list(collection = 'CORE', tax_group = 'vertebrates'))

# Match motifs to peaks
# p.cutoff=5e-5: Motif match p-value threshold. Lower = more stringent.
motif_ix <- matchMotifs(pfm, fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg38, p.cutoff = 5e-5)

From CIS-BP or Custom PWMs

# Load custom motifs from file
library(universalmotif)
motifs <- read_meme('custom_motifs.meme')
pfm_list <- lapply(motifs, function(m) convert_motifs(m, class = 'TFBSTools-PFMatrix'))

motif_ix <- matchMotifs(pfm_list, fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg38)

Compute Deviations

# Compute chromVAR deviation scores
dev <- computeDeviations(object = fragment_counts, annotations = motif_ix)

# Extract deviation scores (z-scores)
deviation_scores <- deviations(dev)

# Extract variability across samples
variability <- computeVariability(dev)

Interpreting Results

Deviation Scores

# Deviation z-scores: positive = more accessible than expected
# Compare across samples
dev_matrix <- deviations(dev)
print(dim(dev_matrix))  # motifs x samples

# Get top variable motifs
var_df <- variability
var_df <- var_df[order(-var_df$variability), ]
head(var_df, 20)

Variability Interpretation

VariabilityInterpretation
> 2.0Highly variable across samples
1.0 - 2.0Moderately variable
< 1.0Low variability

Visualization

Deviation Heatmap

library(pheatmap)

# Get top variable motifs
# n_top=50: Number of top variable motifs to display.
n_top <- 50
top_motifs <- head(rownames(var_df), n_top)
top_dev <- deviation_scores[top_motifs, ]

# Add sample annotations
sample_info <- data.frame(
    Condition = colData(fragment_counts)$condition,
    row.names = colnames(top_dev)
)

pheatmap(top_dev, annotation_col = sample_info, scale = 'row',
         clustering_method = 'ward.D2', show_rownames = TRUE)

Variability Plot

plotVariability(variability, use_plotly = FALSE)

PCA of Deviation Scores

library(ggplot2)

# PCA on deviation scores
pca <- prcomp(t(deviation_scores), scale. = TRUE)
pca_df <- data.frame(PC1 = pca$x[,1], PC2 = pca$x[,2],
                     Condition = colData(fragment_counts)$condition)

ggplot(pca_df, aes(x = PC1, y = PC2, color = Condition)) +
    geom_point(size = 3) +
    theme_minimal() +
    labs(title = 'PCA of chromVAR Deviations')

Differential Motif Accessibility

Compare Two Groups

library(limma)

# Get sample groups
groups <- factor(colData(fragment_counts)$condition)

# Design matrix
design <- model.matrix(~ groups)

# Fit linear model to deviation scores
fit <- lmFit(deviation_scores, design)
fit <- eBayes(fit)

# Get differential motifs
# p.value=0.05: FDR threshold for significance.
diff_motifs <- topTable(fit, coef = 2, number = Inf, p.value = 0.05)
print(head(diff_motifs, 20))

Volcano Plot

library(ggplot2)

all_results <- topTable(fit, coef = 2, number = Inf)
all_results$significant <- all_results$adj.P.Val < 0.05

ggplot(all_results, aes(x = logFC, y = -log10(adj.P.Val), color = significant)) +
    geom_point(alpha = 0.6) +
    geom_hline(yintercept = -log10(0.05), linetype = 'dashed') +
    scale_color_manual(values = c('grey', 'red')) +
    theme_minimal() +
    labs(title = 'Differential Motif Accessibility',
         x = 'Log2 Fold Change', y = '-log10(adjusted p-value)')

Working with Single-Cell ATAC-seq

# For scATAC-seq, aggregate cells by cluster first
# Then run chromVAR on pseudo-bulk profiles

# Or use chromVAR with sparse matrices
library(Matrix)

# Create SummarizedExperiment with sparse counts
sparse_counts <- Matrix(counts_matrix, sparse = TRUE)
fragment_counts <- SummarizedExperiment(
    assays = list(counts = sparse_counts),
    rowRanges = peak_ranges
)

# Proceed with standard workflow
fragment_counts <- addGCBias(fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg38)

Background Peaks Strategy

# Custom background for better bias correction
# n_iterations=50: Number of background sets. Higher = more stable but slower.
bg <- getBackgroundPeaks(object = fragment_counts, niterations = 50)

# Use custom background in deviation calculation
dev <- computeDeviations(object = fragment_counts, annotations = motif_ix, background_peaks = bg)

Export Results

# Save deviation scores
write.csv(as.data.frame(deviation_scores), 'chromvar_deviations.csv')

# Save variability
write.csv(variability, 'chromvar_variability.csv')

# Save differential results
write.csv(diff_motifs, 'differential_motifs.csv')

Complete Workflow

library(chromVAR)
library(motifmatchr)
library(BSgenome.Hsapiens.UCSC.hg38)
library(JASPAR2020)
library(TFBSTools)

# 1. Load data
fragment_counts <- getCounts('peaks.bed', c('sample1.bam', 'sample2.bam', 'sample3.bam'))

# 2. Add GC bias
fragment_counts <- addGCBias(fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg38)

# 3. Filter
fragment_counts <- filterPeaks(fragment_counts)

# 4. Get motifs
pfm <- getMatrixSet(JASPAR2020, opts = list(collection = 'CORE', tax_group = 'vertebrates'))
motif_ix <- matchMotifs(pfm, fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg38)

# 5. Compute deviations
dev <- computeDeviations(fragment_counts, motif_ix)

# 6. Analyze variability
variability <- computeVariability(dev)
plotVariability(variability)

Related Skills

  • differential-accessibility - Peak-level differential analysis with DiffBind
  • footprinting - TF footprinting with TOBIAS
  • atac-qc - Quality control before chromVAR
  • chip-seq/motif-analysis - Alternative motif enrichment approaches
<!-- AUTHOR_SIGNATURE: 9a7f3c2e-MD-BABU-MIA-2026-MSSM-SECURE -->