LLMs-Universal-Life-Science-and-Clinical-Skills- dmr-detection

<!--

install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Epigenomics/methylation-analysis/dmr-detection" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-dmr-detection && rm -rf "$T"
manifest: Skills/Epigenomics/methylation-analysis/dmr-detection/SKILL.md
source content
<!-- # COPYRIGHT NOTICE # This file is part of the "Universal Biomedical Skills" project. # Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu> # All Rights Reserved. # # This code is proprietary and confidential. # Unauthorized copying of this file, via any medium is strictly prohibited. # # Provenance: Authenticated by MD BABU MIA -->

name: bio-methylation-dmr-detection description: Differentially methylated region (DMR) detection using methylKit tiles, bsseq BSmooth, and DMRcate. Use when identifying contiguous genomic regions with methylation differences between experimental conditions or cell types. tool_type: r primary_tool: methylKit measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:

  • read_file
  • run_shell_command

DMR Detection

methylKit Tile-Based DMRs

library(methylKit)

# Read and process data
meth_obj <- methRead(location = file_list, sample.id = sample_ids, treatment = treatment,
                      assembly = 'hg38', pipeline = 'bismarkCoverage')
meth_filt <- filterByCoverage(meth_obj, lo.count = 10, hi.perc = 99.9)

# Create tiles (windows)
tiles <- tileMethylCounts(meth_filt, win.size = 1000, step.size = 1000, cov.bases = 3)

tiles_united <- unite(tiles, destrand = TRUE)

# Differential methylation on tiles
diff_tiles <- calculateDiffMeth(tiles_united, overdispersion = 'MN', mc.cores = 4)

# Get significant DMRs
dmrs <- getMethylDiff(diff_tiles, difference = 25, qvalue = 0.01)
dmrs_hyper <- getMethylDiff(diff_tiles, difference = 25, qvalue = 0.01, type = 'hyper')
dmrs_hypo <- getMethylDiff(diff_tiles, difference = 25, qvalue = 0.01, type = 'hypo')

bsseq BSmooth DMRs

library(bsseq)

# Read Bismark cytosine reports
bs <- read.bismark(files = c('sample1.CpG_report.txt.gz', 'sample2.CpG_report.txt.gz'),
                    sampleNames = c('ctrl', 'treat'),
                    rmZeroCov = TRUE,
                    strandCollapse = TRUE)

# Smooth methylation data
bs_smooth <- BSmooth(bs, mc.cores = 4, verbose = TRUE)

# Filter by coverage
bs_cov <- getCoverage(bs_smooth)
keep <- which(rowSums(bs_cov >= 2) == ncol(bs_cov))
bs_filt <- bs_smooth[keep, ]

# Find DMRs with BSmooth
dmrs_bsseq <- dmrFinder(bs_filt, cutoff = c(-0.1, 0.1), stat = 'tstat.corrected')

DMRcate Method

library(DMRcate)
library(minfi)

# From methylation matrix (beta values)
# Rows = CpGs, columns = samples
design <- model.matrix(~ treatment)

# Run DMRcate
myannotation <- cpg.annotate('array', meth_matrix, what = 'Beta', arraytype = 'EPIC',
                               design = design, coef = 2)

dmr_results <- dmrcate(myannotation, lambda = 1000, C = 2)
dmr_ranges <- extractRanges(dmr_results)

Annotate DMRs with Genes

library(annotatr)

# Build annotations
annots <- build_annotations(genome = 'hg38', annotations = c(
    'hg38_basicgenes',
    'hg38_genes_promoters',
    'hg38_cpg_islands'
))

# Convert DMRs to GRanges
dmr_gr <- as(dmrs, 'GRanges')

# Annotate
dmr_annotated <- annotate_regions(regions = dmr_gr, annotations = annots, ignore.strand = TRUE)
dmr_df <- data.frame(dmr_annotated)

Annotate with genomation

library(genomation)

# Read gene annotations
gene_obj <- readTranscriptFeatures('genes.bed12')

# Annotate DMRs
dmr_gr <- as(dmrs, 'GRanges')
annot_result <- annotateWithGeneParts(dmr_gr, gene_obj)

# Get promoter/exon/intron breakdown
getTargetAnnotationStats(annot_result, percentage = TRUE, precedence = TRUE)

Visualize DMR

library(Gviz)

# Create track for a DMR
chr <- 'chr1'
start <- 1000000
end <- 1010000

# Methylation data track
meth_track <- DataTrack(
    range = bs_smooth,
    genome = 'hg38',
    name = 'Methylation',
    type = 'smooth'
)

# Gene annotation track
gene_track <- GeneRegionTrack(TxDb.Hsapiens.UCSC.hg38.knownGene, genome = 'hg38', name = 'Genes')

# Plot
plotTracks(list(meth_track, gene_track), from = start, to = end, chromosome = chr)

Merge Adjacent DMRs

library(GenomicRanges)

dmr_gr <- as(dmrs, 'GRanges')

# Merge DMRs within 500bp
dmr_merged <- reduce(dmr_gr, min.gapwidth = 500)

Export DMRs

# To BED
library(rtracklayer)
export(dmr_gr, 'dmrs.bed', format = 'BED')

# To CSV
dmr_df <- getData(dmrs)
write.csv(dmr_df, 'dmrs.csv', row.names = FALSE)

# To GFF
export(dmr_gr, 'dmrs.gff3', format = 'GFF3')

DMR Comparison Across Methods

MethodPackageApproachBest For
TilesmethylKitFixed windowsQuick analysis
BSmoothbsseqSmoothingWGBS data
DMRcateDMRcateKernel smoothingArray data
DSSDSSBayesianComplex designs

Key Parameters

methylKit tileMethylCounts

ParameterDefaultDescription
win.size1000Window size (bp)
step.size1000Step size (bp)
cov.bases0Min CpGs per tile

bsseq dmrFinder

ParameterDescription
cutoffMethylation difference threshold
statStatistic to use
maxGapMax gap between CpGs

Related Skills

  • methylkit-analysis - Single CpG analysis
  • methylation-calling - Generate input files
  • pathway-analysis/go-enrichment - Functional annotation of DMR genes
  • differential-expression/deseq2-basics - Compare with expression changes
<!-- AUTHOR_SIGNATURE: 9a7f3c2e-MD-BABU-MIA-2026-MSSM-SECURE -->