install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Genomics/Single_Cell/scatac-analysis" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-scatac-analysis-fbd6a7 && rm -rf "$T"
manifest:
Skills/Genomics/Single_Cell/scatac-analysis/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-single-cell-scatac-analysis description: Single-cell ATAC-seq analysis with Signac (R/Seurat) and ArchR. Process 10X Genomics scATAC data, perform QC, dimensionality reduction, clustering, peak calling, and motif activity scoring with chromVAR. Use when analyzing single-cell ATAC-seq data. tool_type: r primary_tool: Signac measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
scATAC-seq Analysis
Analyze single-cell chromatin accessibility data to identify cell types and regulatory elements.
Tool Comparison
| Tool | Ecosystem | Strengths |
|---|---|---|
| Signac | Seurat | Integration with scRNA-seq, familiar API |
| ArchR | Standalone | Memory efficient, comprehensive |
| chromVAR | Bioconductor | TF motif deviation scoring |
| SnapATAC2 | Python | Fast, scalable |
Signac (R/Seurat)
Installation
install.packages('Signac') BiocManager::install(c('EnsDb.Hsapiens.v86', 'biovizBase', 'motifmatchr', 'chromVAR', 'JASPAR2020', 'TFBSTools'))
Load 10X Data
library(Signac) library(Seurat) library(EnsDb.Hsapiens.v86) counts <- Read10X_h5('filtered_peak_bc_matrix.h5') metadata <- read.csv('singlecell.csv', header = TRUE, row.names = 1) chrom_assay <- CreateChromatinAssay( counts = counts, sep = c(':', '-'), genome = 'hg38', fragments = 'fragments.tsv.gz', min.cells = 10, min.features = 200 ) obj <- CreateSeuratObject(counts = chrom_assay, assay = 'peaks', meta.data = metadata)
Add Gene Annotations
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) seqlevelsStyle(annotations) <- 'UCSC' Annotation(obj) <- annotations
QC Metrics
obj <- NucleosomeSignal(obj) obj <- TSSEnrichment(obj, fast = FALSE) obj$pct_reads_in_peaks <- obj$peak_region_fragments / obj$passed_filters * 100 obj$blacklist_ratio <- obj$blacklist_region_fragments / obj$peak_region_fragments VlnPlot(obj, features = c('pct_reads_in_peaks', 'peak_region_fragments', 'TSS.enrichment', 'blacklist_ratio', 'nucleosome_signal'), pt.size = 0.1, ncol = 5)
QC Filtering
obj <- subset(obj, peak_region_fragments > 3000 & peak_region_fragments < 20000 & pct_reads_in_peaks > 15 & blacklist_ratio < 0.05 & nucleosome_signal < 4 & TSS.enrichment > 2 )
Normalization and Dimensionality Reduction
obj <- RunTFIDF(obj) obj <- FindTopFeatures(obj, min.cutoff = 'q0') obj <- RunSVD(obj) DepthCor(obj) # Check correlation with sequencing depth
Clustering
obj <- RunUMAP(obj, reduction = 'lsi', dims = 2:30) obj <- FindNeighbors(obj, reduction = 'lsi', dims = 2:30) obj <- FindClusters(obj, algorithm = 3, resolution = 0.5) DimPlot(obj, label = TRUE) + NoLegend()
Gene Activity Scores
gene_activities <- GeneActivity(obj) obj[['RNA']] <- CreateAssayObject(counts = gene_activities) obj <- NormalizeData(obj, assay = 'RNA', normalization.method = 'LogNormalize', scale.factor = median(obj$nCount_RNA)) DefaultAssay(obj) <- 'RNA' FeaturePlot(obj, features = c('CD34', 'MS4A1', 'CD3D', 'CD14'), pt.size = 0.1, max.cutoff = 'q95')
Peak Calling per Cluster
peaks <- CallPeaks(obj, group.by = 'seurat_clusters', macs2.path = '/path/to/macs2') # Quantify peaks peak_counts <- FeatureMatrix(fragments = Fragments(obj), features = peaks, cells = colnames(obj)) obj[['peaks_called']] <- CreateChromatinAssay(counts = peak_counts, fragments = Fragments(obj), annotation = Annotation(obj))
Differential Accessibility
DefaultAssay(obj) <- 'peaks' da_peaks <- FindMarkers(obj, ident.1 = 'cluster1', ident.2 = 'cluster2', test.use = 'LR', latent.vars = 'peak_region_fragments') # Top differentially accessible peaks head(da_peaks[order(da_peaks$avg_log2FC, decreasing = TRUE), ], 10)
Motif Analysis with chromVAR
library(JASPAR2020) library(TFBSTools) library(motifmatchr) pfm <- getMatrixSet(JASPAR2020, opts = list(collection = 'CORE', tax_group = 'vertebrates', all_versions = FALSE)) obj <- AddMotifs(obj, genome = BSgenome.Hsapiens.UCSC.hg38, pfm = pfm) obj <- RunChromVAR(obj, genome = BSgenome.Hsapiens.UCSC.hg38) DefaultAssay(obj) <- 'chromvar' FeaturePlot(obj, features = 'MA0139.1', min.cutoff = 'q10', max.cutoff = 'q90') # CTCF # Differential motif activity differential_activity <- FindMarkers(obj, ident.1 = 'cluster1', ident.2 = 'cluster2', only.pos = TRUE, mean.fxn = rowMeans, fc.name = 'avg_diff')
Coverage Plots
CoveragePlot(obj, region = 'chr1-1000000-1050000', group.by = 'seurat_clusters', annotation = TRUE) # Gene track CoveragePlot(obj, region = 'MS4A1', group.by = 'seurat_clusters', extend.upstream = 10000, extend.downstream = 5000)
Integration with scRNA-seq
transfer_anchors <- FindTransferAnchors(reference = rna_obj, query = obj, features = VariableFeatures(rna_obj), reference.assay = 'RNA', query.assay = 'RNA', reduction = 'cca') predicted_labels <- TransferData(anchorset = transfer_anchors, refdata = rna_obj$celltype, weight.reduction = obj[['lsi']], dims = 2:30) obj <- AddMetaData(obj, metadata = predicted_labels)
ArchR
Installation
devtools::install_github('GreenleafLab/ArchR', ref = 'master', repos = BiocManager::repositories()) library(ArchR) addArchRGenome('hg38')
Create Arrow Files
inputFiles <- c('sample1_fragments.tsv.gz', 'sample2_fragments.tsv.gz') names(inputFiles) <- c('sample1', 'sample2') ArrowFiles <- createArrowFiles(inputFiles = inputFiles, sampleNames = names(inputFiles), filterTSS = 4, filterFrags = 1000, addTileMat = TRUE, addGeneScoreMat = TRUE)
Create ArchRProject
proj <- ArchRProject(ArrowFiles = ArrowFiles, outputDirectory = 'ArchR_output', copyArrows = TRUE)
QC and Filtering
df <- getCellColData(proj, select = c('log10(nFrags)', 'TSSEnrichment')) p <- ggPoint(x = df[,1], y = df[,2], colorDensity = TRUE, continuousSet = 'sambaNight', xlabel = 'Log10 Unique Fragments', ylabel = 'TSS Enrichment', xlim = c(log10(500), quantile(df[,1], probs = 0.99)), ylim = c(0, quantile(df[,2], probs = 0.99))) proj <- filterDoublets(proj)
Dimensionality Reduction and Clustering
proj <- addIterativeLSI(proj, useMatrix = 'TileMatrix', name = 'IterativeLSI', iterations = 2, clusterParams = list(resolution = c(0.2), sampleCells = 10000, n.start = 10), varFeatures = 25000, dimsToUse = 1:30) proj <- addClusters(proj, reducedDims = 'IterativeLSI', method = 'Seurat', name = 'Clusters', resolution = 0.8) proj <- addUMAP(proj, reducedDims = 'IterativeLSI', name = 'UMAP', nNeighbors = 30, minDist = 0.5, metric = 'cosine') plotEmbedding(proj, colorBy = 'cellColData', name = 'Clusters', embedding = 'UMAP')
Gene Scores
markersGS <- getMarkerFeatures(proj, useMatrix = 'GeneScoreMatrix', groupBy = 'Clusters', bias = c('TSSEnrichment', 'log10(nFrags)'), testMethod = 'wilcoxon') markerList <- getMarkers(markersGS, cutOff = 'FDR <= 0.01 & Log2FC >= 1.25') heatmapGS <- plotMarkerHeatmap(seMarker = markersGS, cutOff = 'FDR <= 0.01 & Log2FC >= 1.25', labelMarkers = c('CD34', 'MS4A1', 'CD3D', 'CD14'))
Peak Calling
proj <- addGroupCoverages(proj, groupBy = 'Clusters') proj <- addReproduciblePeakSet(proj, groupBy = 'Clusters', pathToMacs2 = '/path/to/macs2') proj <- addPeakMatrix(proj)
Motif Enrichment
proj <- addMotifAnnotations(proj, motifSet = 'cisbp', name = 'Motif') enrichMotifs <- peakAnnoEnrichment(seMarker = markersPeaks, ArchRProj = proj, peakAnnotation = 'Motif', cutOff = 'FDR <= 0.1 & Log2FC >= 0.5') heatmapEM <- plotEnrichHeatmap(enrichMotifs, n = 7, transpose = TRUE)
chromVAR Deviations
proj <- addBgdPeaks(proj) proj <- addDeviationsMatrix(proj, peakAnnotation = 'Motif', force = TRUE) plotVarDev <- getVarDeviations(proj, name = 'MotifMatrix', plot = TRUE) motifs <- c('GATA1', 'CEBPA', 'EBF1', 'IRF8', 'PAX5') markerMotifs <- getFeatures(proj, select = paste(motifs, collapse = '|'), useMatrix = 'MotifMatrix') p <- plotEmbedding(proj, colorBy = 'MotifMatrix', name = sort(markerMotifs), embedding = 'UMAP', imputeWeights = getImputeWeights(proj))
chromVAR Standalone
Installation
BiocManager::install('chromVAR')
Basic Workflow
library(chromVAR) library(motifmatchr) library(BSgenome.Hsapiens.UCSC.hg38) counts <- readRDS('peak_counts.rds') # SummarizedExperiment counts <- addGCBias(counts, genome = BSgenome.Hsapiens.UCSC.hg38) motifs <- getJasparMotifs() motif_ix <- matchMotifs(motifs, counts, genome = BSgenome.Hsapiens.UCSC.hg38) bg <- getBackgroundPeaks(counts) dev <- computeDeviations(counts, motif_ix, background_peaks = bg) variability <- computeVariability(dev) plotVariability(variability, use_plotly = FALSE)
QC Thresholds
| Metric | Typical Range | Filter |
|---|---|---|
| Unique fragments | 1,000-100,000 | > 1,000-3,000 |
| TSS enrichment | 2-30 | > 2-4 |
| Fraction in peaks | 20-70% | > 15-20% |
| Nucleosome signal | 0.5-2 | < 4 |
| Blacklist ratio | 0-0.05 | < 0.05 |
Related Skills
- preprocessing - scRNA-seq QC (similar concepts)
- clustering - Clustering approaches (shared with scRNA-seq)
- multimodal-integration - Joint scRNA+scATAC analysis
- atac-seq - Bulk ATAC-seq methods
- chip-seq/motif-analysis - Motif databases and analysis