install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Epigenomics/atac-seq/differential-accessibility" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-differential-acces && rm -rf "$T"
manifest:
Skills/Epigenomics/atac-seq/differential-accessibility/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-atac-seq-differential-accessibility description: Find differentially accessible chromatin regions between conditions using DiffBind or DESeq2. Use when comparing chromatin accessibility between treatment groups, cell types, or developmental stages in ATAC-seq experiments. tool_type: r primary_tool: DiffBind measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
Differential Accessibility
DiffBind Workflow
library(DiffBind) # 1. Create sample sheet samples <- data.frame( SampleID = c('ctrl_1', 'ctrl_2', 'treat_1', 'treat_2'), Condition = c('control', 'control', 'treated', 'treated'), Replicate = c(1, 2, 1, 2), bamReads = c('ctrl_1.bam', 'ctrl_2.bam', 'treat_1.bam', 'treat_2.bam'), Peaks = c('ctrl_1.narrowPeak', 'ctrl_2.narrowPeak', 'treat_1.narrowPeak', 'treat_2.narrowPeak') ) write.csv(samples, 'samples.csv', row.names=FALSE) # 2. Load data dba <- dba(sampleSheet='samples.csv') # 3. Count reads dba <- dba.count(dba) # 4. Normalize (required in DiffBind 3.0+) dba <- dba.normalize(dba) # 5. Set up contrasts dba <- dba.contrast(dba, contrast=c('Condition', 'treated', 'control')) # 6. Differential analysis dba <- dba.analyze(dba) # 7. Get results results <- dba.report(dba)
DiffBind with Consensus Peaks
library(DiffBind) # Load samples dba <- dba(sampleSheet='samples.csv') # Count with specific parameters dba <- dba.count(dba, summits=250, # Re-center peaks on summit minOverlap=2, # Peak in at least 2 samples score=DBA_SCORE_NORMALIZED) # Normalize dba <- dba.normalize(dba, normalize=DBA_NORM_NATIVE) # Analyze dba <- dba.contrast(dba, contrast=c('Condition', 'treated', 'control')) dba <- dba.analyze(dba, method=DBA_DESEQ2) # Extract results results <- dba.report(dba, th=0.05, bCounts=TRUE) # Save write.csv(as.data.frame(results), 'differential_peaks.csv')
DiffBind Visualizations
# PCA plot dba.plotPCA(dba, attributes=DBA_CONDITION) # MA plot dba.plotMA(dba) # Volcano plot dba.plotVolcano(dba) # Heatmap of differential peaks dba.plotHeatmap(dba, contrast=1, correlations=FALSE) # Venn diagram of overlapping peaks dba.plotVenn(dba, contrast=1, bDB=TRUE, bGain=TRUE, bLoss=TRUE)
Using DESeq2 Directly
library(DESeq2) library(GenomicRanges) # Load peak counts (from featureCounts or custom counting) counts <- read.delim('peak_counts.txt', row.names=1) # Sample metadata coldata <- data.frame( row.names = colnames(counts), condition = factor(c('control', 'control', 'treated', 'treated')) ) # Create DESeq object dds <- DESeqDataSetFromMatrix(countData=counts, colData=coldata, design=~condition) # Filter low counts dds <- dds[rowSums(counts(dds)) >= 10, ] # Run DESeq2 dds <- DESeq(dds) # Results res <- results(dds, contrast=c('condition', 'treated', 'control')) res <- res[order(res$padj), ] # Significant peaks sig <- subset(res, padj < 0.05 & abs(log2FoldChange) > 1)
Count Reads in Peaks
# Using featureCounts # First convert peaks to SAF format awk 'BEGIN{OFS="\t"; print "GeneID\tChr\tStart\tEnd\tStrand"} {print $1"_"$2"_"$3, $1, $2, $3, "."}' consensus_peaks.bed > peaks.saf featureCounts \ -a peaks.saf \ -F SAF \ -o peak_counts.txt \ -p \ --countReadPairs \ -T 8 \ *.bam
Python Alternative
import pandas as pd import numpy as np from scipy import stats def simple_differential(counts_file, groups): '''Simple differential accessibility test.''' counts = pd.read_csv(counts_file, sep='\t', index_col=0, comment='#') # Normalize to CPM cpm = counts.div(counts.sum()) * 1e6 # Log transform log_cpm = np.log2(cpm + 1) # Separate groups group1 = [c for c in counts.columns if groups[c] == 'control'] group2 = [c for c in counts.columns if groups[c] == 'treated'] results = [] for peak in counts.index: g1_vals = log_cpm.loc[peak, group1] g2_vals = log_cpm.loc[peak, group2] log2fc = g2_vals.mean() - g1_vals.mean() t_stat, pval = stats.ttest_ind(g1_vals, g2_vals) results.append({ 'peak': peak, 'log2FoldChange': log2fc, 'pvalue': pval }) df = pd.DataFrame(results) df['padj'] = stats.false_discovery_control(df['pvalue']) return df
Annotate Differential Peaks
library(ChIPseeker) library(TxDb.Hsapiens.UCSC.hg38.knownGene) # Annotate differential peaks diff_peaks <- dba.report(dba) peakAnno <- annotatePeak(diff_peaks, TxDb=TxDb.Hsapiens.UCSC.hg38.knownGene) # Plot annotation plotAnnoPie(peakAnno) plotDistToTSS(peakAnno) # Get genes genes <- as.data.frame(peakAnno)$geneId
Filter Results
# Get significant results sig_peaks <- dba.report(dba, th=0.05, fold=1) # Opened in treatment opened <- sig_peaks[sig_peaks$Fold > 0] # Closed in treatment closed <- sig_peaks[sig_peaks$Fold < 0] # Export as BED export.bed(opened, 'opened_peaks.bed') export.bed(closed, 'closed_peaks.bed')
Multi-factor Designs
# Complex design with batch correction samples$Batch <- factor(c('A', 'B', 'A', 'B')) dba <- dba(sampleSheet=samples) dba <- dba.count(dba) dba <- dba.normalize(dba) # Design formula approach dba <- dba.contrast(dba, design='~Batch + Condition') dba <- dba.analyze(dba)
Related Skills
- atac-seq/atac-peak-calling - Generate input peaks
- differential-expression/deseq2-basics - DESeq2 methods
- chip-seq/differential-binding - Similar DiffBind workflow
- pathway-analysis/go-enrichment - Analyze differential genes