install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Epigenomics/atac-seq/nucleosome-positioning" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-nucleosome-positio && rm -rf "$T"
manifest:
Skills/Epigenomics/atac-seq/nucleosome-positioning/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-atac-seq-nucleosome-positioning description: Extract nucleosome positions from ATAC-seq data using NucleoATAC, ATACseqQC, and fragment analysis. Use when analyzing chromatin organization, identifying nucleosome-free regions at promoters, or characterizing nucleosome occupancy patterns from ATAC-seq fragment size distributions. tool_type: mixed primary_tool: NucleoATAC measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
Nucleosome Positioning
Extract nucleosome positions and occupancy from ATAC-seq fragment size patterns.
Background
ATAC-seq fragments reflect chromatin structure:
- < 100 bp: Nucleosome-free regions (NFR)
- 180-247 bp: Mono-nucleosome
- 315-473 bp: Di-nucleosome
- 558-615 bp: Tri-nucleosome
ATACseqQC (R)
Installation
BiocManager::install('ATACseqQC')
Fragment Size Distribution
library(ATACseqQC) library(Rsamtools) # Read BAM bamfile <- 'sample.bam' # Fragment size distribution fragSize <- fragSizeDist(bamfile, 'sample') # Nucleosome-free and mono-nucleosome ratios # Automatic QC metrics
Nucleosome Positioning
library(ATACseqQC) library(TxDb.Hsapiens.UCSC.hg38.knownGene) library(BSgenome.Hsapiens.UCSC.hg38) # Get TSS regions txs <- transcripts(TxDb.Hsapiens.UCSC.hg38.knownGene) tss <- promoters(txs, upstream=1000, downstream=1000) # Read BAM gal <- readBamFile(bamfile, asMates=TRUE, bigFile=TRUE) # Shift reads (Tn5 offset correction) gal_shifted <- shiftGAlignmentsList(gal) # Split by nucleosome-free and nucleosomal objs <- splitGAlignmentsByCut(gal_shifted, txs=txs, genome=BSgenome.Hsapiens.UCSC.hg38) # nucleosome-free fragments nfr <- objs$NussomeFree # Mono-nucleosome fragments mono <- objs$mononucleosome # Signal around TSS sigs <- featureAlignedSignal(cvglist=objs, feature.gr=tss, upstream=1000, downstream=1000)
V-Plot (Fragment Size vs Position)
# V-plot showing nucleosome positioning around TSS vp <- vPlot(gal_shifted, tss, genome=BSgenome.Hsapiens.UCSC.hg38, upstream=1000, downstream=1000)
Footprinting
# Transcription factor footprinting library(MotifDb) # Get motif motif <- query(MotifDb, 'CTCF')[[1]] # Find motif occurrences library(motifmatchr) motif_pos <- matchMotifs(motif, BSgenome.Hsapiens.UCSC.hg38, genome='hg38', out='positions') # Calculate footprint fp <- factorFootprints(gal_shifted, motif_pos, genome=BSgenome.Hsapiens.UCSC.hg38, upstream=100, downstream=100)
NucleoATAC (Python)
Installation
pip install nucleoatac
Run NucleoATAC
# Call nucleosomes nucleoatac run --bed regions.bed --bam sample.bam --fasta reference.fa \ --out nucleoatac_output --cores 8
Output Files
| File | Description |
|---|---|
| Nucleosome positions |
| All nucleosome calls |
| NFR positions |
| Nucleosome occupancy track |
| Combined nucleosome map |
Visualize Output
# Convert to bigWig for visualization bedGraphToBigWig nucleoatac_output.occ.bedgraph chrom.sizes nucleosome_occ.bw
Fragment Analysis (Custom)
Extract Fragment Sizes
import pysam import numpy as np import matplotlib.pyplot as plt bam = pysam.AlignmentFile('sample.bam', 'rb') fragment_sizes = [] for read in bam.fetch(): if read.is_proper_pair and read.is_read1: frag_size = abs(read.template_length) if 0 < frag_size < 1000: fragment_sizes.append(frag_size) bam.close() # Plot distribution plt.figure(figsize=(10, 6)) plt.hist(fragment_sizes, bins=200, edgecolor='none', alpha=0.7) plt.axvline(100, color='red', linestyle='--', label='NFR cutoff') plt.axvline(180, color='blue', linestyle='--', label='Mono-nuc start') plt.xlabel('Fragment Size (bp)') plt.ylabel('Count') plt.legend() plt.savefig('fragment_distribution.png', dpi=300)
Split by Fragment Size
# Extract nucleosome-free reads samtools view -h sample.bam | \ awk '$9 > -100 && $9 < 100 || $1 ~ /^@/' | \ samtools view -b > nfr.bam # Extract mono-nucleosome reads samtools view -h sample.bam | \ awk '($9 >= 180 && $9 <= 247) || ($9 <= -180 && $9 >= -247) || $1 ~ /^@/' | \ samtools view -b > mono_nuc.bam
Signal Around Features
import pysam import numpy as np import pyBigWig def signal_around_sites(bam_file, sites, upstream=1000, downstream=1000): bam = pysam.AlignmentFile(bam_file, 'rb') window_size = upstream + downstream signal = np.zeros(window_size) for chrom, pos, strand in sites: start = pos - upstream if strand == '+' else pos - downstream end = pos + downstream if strand == '+' else pos + upstream for read in bam.fetch(chrom, max(0, start), end): if read.is_proper_pair and read.is_read1: frag_center = read.reference_start + abs(read.template_length) // 2 rel_pos = frag_center - start if 0 <= rel_pos < window_size: signal[rel_pos] += 1 bam.close() return signal / len(sites) # Load TSS sites tss_sites = [] # Load from GTF nfr_signal = signal_around_sites('nfr.bam', tss_sites) mono_signal = signal_around_sites('mono_nuc.bam', tss_sites)
DANPOS
Installation
conda install -c bioconda danpos
Run DANPOS
# Single sample danpos.py dpos sample.bam -o danpos_output # Compare conditions danpos.py dpeak -b treatment.bam -c control.bam -o danpos_diff
Complete Workflow
library(ATACseqQC) library(TxDb.Hsapiens.UCSC.hg38.knownGene) library(BSgenome.Hsapiens.UCSC.hg38) bamfile <- 'sample.bam' # 1. Fragment size QC fragSize <- fragSizeDist(bamfile, 'sample') pdf('fragment_size.pdf') plot(fragSize) dev.off() # 2. Read and shift gal <- readBamFile(bamfile, asMates=TRUE, bigFile=TRUE) gal_shifted <- shiftGAlignmentsList(gal) # 3. Get TSS regions txs <- transcripts(TxDb.Hsapiens.UCSC.hg38.knownGene) tss <- promoters(txs, upstream=2000, downstream=2000) # 4. Split by fragment size objs <- splitGAlignmentsByCut(gal_shifted, txs=txs, genome=BSgenome.Hsapiens.UCSC.hg38) # 5. Calculate signals sigs <- featureAlignedSignal(cvglist=objs, feature.gr=tss, upstream=2000, downstream=2000) # 6. Plot heatmap pdf('nucleosome_heatmap.pdf', width=8, height=10) featureAlignedHeatmap(sigs, tss, upstream=2000, downstream=2000) dev.off() # 7. V-plot pdf('vplot.pdf') vPlot(gal_shifted, tss, genome=BSgenome.Hsapiens.UCSC.hg38, upstream=1000, downstream=1000) dev.off() # 8. Export nucleosome-free and nucleosomal BAMs export(objs$NuclsomeFree, 'nfr.bam') export(objs$mononucleosome, 'mono_nucleosome.bam')
Related Skills
- atac-seq/atac-peak-calling - Call accessibility peaks
- atac-seq/atac-qc - Quality control metrics
- atac-seq/footprinting - TF footprinting
- chip-seq/peak-annotation - Annotate nucleosome positions