install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Epigenomics/methylation-analysis/methylkit-analysis" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-methylkit-analysis && rm -rf "$T"
manifest:
Skills/Epigenomics/methylation-analysis/methylkit-analysis/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-methylation-methylkit description: DNA methylation analysis with methylKit in R. Import Bismark coverage files, filter by coverage, normalize samples, and perform statistical comparisons. Use when analyzing single-base methylation patterns, comparing samples, or preparing data for DMR detection. tool_type: r primary_tool: methylKit measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
methylKit Analysis
Read Bismark Coverage Files
library(methylKit) file_list <- list('sample1.bismark.cov.gz', 'sample2.bismark.cov.gz', 'sample3.bismark.cov.gz', 'sample4.bismark.cov.gz') sample_ids <- c('ctrl_1', 'ctrl_2', 'treat_1', 'treat_2') treatment <- c(0, 0, 1, 1) # 0 = control, 1 = treatment meth_obj <- methRead( location = as.list(file_list), sample.id = as.list(sample_ids), treatment = treatment, assembly = 'hg38', context = 'CpG', pipeline = 'bismarkCoverage' )
Read Bismark cytosine Report
meth_obj <- methRead( location = as.list(file_list), sample.id = as.list(sample_ids), treatment = treatment, assembly = 'hg38', context = 'CpG', pipeline = 'bismarkCytosineReport' )
Basic Statistics
# Coverage statistics getMethylationStats(meth_obj[[1]], plot = TRUE, both.strands = FALSE) # Coverage per sample getCoverageStats(meth_obj[[1]], plot = TRUE, both.strands = FALSE)
Filter by Coverage
# Remove CpGs with very low or very high coverage meth_filtered <- filterByCoverage( meth_obj, lo.count = 10, # Minimum 10 reads lo.perc = NULL, hi.count = NULL, hi.perc = 99.9 # Remove top 0.1% (likely PCR artifacts) )
Normalize Coverage
# Normalize coverage between samples (recommended) meth_norm <- normalizeCoverage(meth_filtered, method = 'median')
Merge Samples (Unite)
# Find common CpGs across all samples meth_united <- unite(meth_norm, destrand = TRUE) # Combine strands # Allow some missing data meth_united <- unite(meth_norm, destrand = TRUE, min.per.group = 2L)
Visualize Samples
# Correlation between samples getCorrelation(meth_united, plot = TRUE) # PCA of samples PCASamples(meth_united, screeplot = TRUE) PCASamples(meth_united) # Clustering clusterSamples(meth_united, dist = 'correlation', method = 'ward.D', plot = TRUE)
Differential Methylation (Single CpGs)
# Calculate differential methylation diff_meth <- calculateDiffMeth( meth_united, overdispersion = 'MN', # Use shrinkage test = 'Chisq', mc.cores = 4 ) # Get significant differentially methylated CpGs dmcs <- getMethylDiff(diff_meth, difference = 25, qvalue = 0.01) # Hyper vs hypomethylated dmcs_hyper <- getMethylDiff(diff_meth, difference = 25, qvalue = 0.01, type = 'hyper') dmcs_hypo <- getMethylDiff(diff_meth, difference = 25, qvalue = 0.01, type = 'hypo')
Tile-Based Analysis (Regions)
# Aggregate CpGs into tiles/windows tiles <- tileMethylCounts(meth_obj, win.size = 1000, step.size = 1000) tiles_united <- unite(tiles, destrand = TRUE) # Differential methylation on tiles diff_tiles <- calculateDiffMeth(tiles_united, overdispersion = 'MN', mc.cores = 4) dmrs <- getMethylDiff(diff_tiles, difference = 25, qvalue = 0.01)
Export Results
# To data frame diff_df <- getData(dmcs) write.csv(diff_df, 'dmcs_results.csv', row.names = FALSE) # To BED file library(genomation) dmcs_gr <- as(dmcs, 'GRanges') export(dmcs_gr, 'dmcs.bed', format = 'BED')
Annotate with Genomic Features
library(genomation) gene_obj <- readTranscriptFeatures('genes.bed') annotated <- annotateWithGeneParts(as(dmcs, 'GRanges'), gene_obj) # Or with annotatr library(annotatr) annotations <- build_annotations(genome = 'hg38', annotations = 'hg38_basicgenes') dmcs_annotated <- annotate_regions(regions = as(dmcs, 'GRanges'), annotations = annotations)
Reorganize for Multi-Group Comparison
# For more than 2 groups meth_obj <- reorganize( meth_united, sample.ids = c('A1', 'A2', 'B1', 'B2', 'C1', 'C2'), treatment = c(0, 0, 1, 1, 2, 2) )
Pool Replicates
# Combine biological replicates meth_pooled <- pool(meth_united, sample.ids = c('control', 'treatment'))
Key Functions
| Function | Purpose |
|---|---|
| methRead | Read methylation files |
| filterByCoverage | Remove low/high coverage |
| normalizeCoverage | Normalize between samples |
| unite | Find common CpGs |
| calculateDiffMeth | Statistical test |
| getMethylDiff | Filter significant results |
| tileMethylCounts | Region-level analysis |
| PCASamples | PCA visualization |
| getCorrelation | Sample correlation |
Key Parameters for calculateDiffMeth
| Parameter | Default | Description |
|---|---|---|
| overdispersion | none | MN (shrinkage) or shrinkMN |
| test | Chisq | Chisq, F, fast.fisher |
| mc.cores | 1 | Parallel cores |
| slim | TRUE | Remove unused columns |
Related Skills
- bismark-alignment - Generate input BAM files
- methylation-calling - Extract coverage files
- dmr-detection - Advanced DMR methods
- pathway-analysis/go-enrichment - Functional annotation