Claude-skill-registry-data bio-methylation-methylkit

DNA methylation analysis with methylKit in R. Import Bismark coverage files, filter by coverage, normalize samples, and perform statistical comparisons. Use when analyzing single-base methylation patterns, comparing samples, or preparing data for DMR detection.

install
source · Clone the upstream repo
git clone https://github.com/majiayu000/claude-skill-registry-data
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/majiayu000/claude-skill-registry-data "$T" && mkdir -p ~/.claude/skills && cp -r "$T/data/methylkit-analysis" ~/.claude/skills/majiayu000-claude-skill-registry-data-bio-methylation-methylkit && rm -rf "$T"
manifest: data/methylkit-analysis/SKILL.md
source content

methylKit Analysis

Read Bismark Coverage Files

library(methylKit)

file_list <- list('sample1.bismark.cov.gz', 'sample2.bismark.cov.gz',
                   'sample3.bismark.cov.gz', 'sample4.bismark.cov.gz')
sample_ids <- c('ctrl_1', 'ctrl_2', 'treat_1', 'treat_2')
treatment <- c(0, 0, 1, 1)  # 0 = control, 1 = treatment

meth_obj <- methRead(
    location = as.list(file_list),
    sample.id = as.list(sample_ids),
    treatment = treatment,
    assembly = 'hg38',
    context = 'CpG',
    pipeline = 'bismarkCoverage'
)

Read Bismark cytosine Report

meth_obj <- methRead(
    location = as.list(file_list),
    sample.id = as.list(sample_ids),
    treatment = treatment,
    assembly = 'hg38',
    context = 'CpG',
    pipeline = 'bismarkCytosineReport'
)

Basic Statistics

# Coverage statistics
getMethylationStats(meth_obj[[1]], plot = TRUE, both.strands = FALSE)

# Coverage per sample
getCoverageStats(meth_obj[[1]], plot = TRUE, both.strands = FALSE)

Filter by Coverage

# Remove CpGs with very low or very high coverage
meth_filtered <- filterByCoverage(
    meth_obj,
    lo.count = 10,        # Minimum 10 reads
    lo.perc = NULL,
    hi.count = NULL,
    hi.perc = 99.9        # Remove top 0.1% (likely PCR artifacts)
)

Normalize Coverage

# Normalize coverage between samples (recommended)
meth_norm <- normalizeCoverage(meth_filtered, method = 'median')

Merge Samples (Unite)

# Find common CpGs across all samples
meth_united <- unite(meth_norm, destrand = TRUE)  # Combine strands

# Allow some missing data
meth_united <- unite(meth_norm, destrand = TRUE, min.per.group = 2L)

Visualize Samples

# Correlation between samples
getCorrelation(meth_united, plot = TRUE)

# PCA of samples
PCASamples(meth_united, screeplot = TRUE)
PCASamples(meth_united)

# Clustering
clusterSamples(meth_united, dist = 'correlation', method = 'ward.D', plot = TRUE)

Differential Methylation (Single CpGs)

# Calculate differential methylation
diff_meth <- calculateDiffMeth(
    meth_united,
    overdispersion = 'MN',     # Use shrinkage
    test = 'Chisq',
    mc.cores = 4
)

# Get significant differentially methylated CpGs
dmcs <- getMethylDiff(diff_meth, difference = 25, qvalue = 0.01)

# Hyper vs hypomethylated
dmcs_hyper <- getMethylDiff(diff_meth, difference = 25, qvalue = 0.01, type = 'hyper')
dmcs_hypo <- getMethylDiff(diff_meth, difference = 25, qvalue = 0.01, type = 'hypo')

Tile-Based Analysis (Regions)

# Aggregate CpGs into tiles/windows
tiles <- tileMethylCounts(meth_obj, win.size = 1000, step.size = 1000)
tiles_united <- unite(tiles, destrand = TRUE)

# Differential methylation on tiles
diff_tiles <- calculateDiffMeth(tiles_united, overdispersion = 'MN', mc.cores = 4)
dmrs <- getMethylDiff(diff_tiles, difference = 25, qvalue = 0.01)

Export Results

# To data frame
diff_df <- getData(dmcs)
write.csv(diff_df, 'dmcs_results.csv', row.names = FALSE)

# To BED file
library(genomation)
dmcs_gr <- as(dmcs, 'GRanges')
export(dmcs_gr, 'dmcs.bed', format = 'BED')

Annotate with Genomic Features

library(genomation)

gene_obj <- readTranscriptFeatures('genes.bed')

annotated <- annotateWithGeneParts(as(dmcs, 'GRanges'), gene_obj)

# Or with annotatr
library(annotatr)
annotations <- build_annotations(genome = 'hg38', annotations = 'hg38_basicgenes')
dmcs_annotated <- annotate_regions(regions = as(dmcs, 'GRanges'), annotations = annotations)

Reorganize for Multi-Group Comparison

# For more than 2 groups
meth_obj <- reorganize(
    meth_united,
    sample.ids = c('A1', 'A2', 'B1', 'B2', 'C1', 'C2'),
    treatment = c(0, 0, 1, 1, 2, 2)
)

Pool Replicates

# Combine biological replicates
meth_pooled <- pool(meth_united, sample.ids = c('control', 'treatment'))

Key Functions

FunctionPurpose
methReadRead methylation files
filterByCoverageRemove low/high coverage
normalizeCoverageNormalize between samples
uniteFind common CpGs
calculateDiffMethStatistical test
getMethylDiffFilter significant results
tileMethylCountsRegion-level analysis
PCASamplesPCA visualization
getCorrelationSample correlation

Key Parameters for calculateDiffMeth

ParameterDefaultDescription
overdispersionnoneMN (shrinkage) or shrinkMN
testChisqChisq, F, fast.fisher
mc.cores1Parallel cores
slimTRUERemove unused columns

Related Skills

  • bismark-alignment - Generate input BAM files
  • methylation-calling - Extract coverage files
  • dmr-detection - Advanced DMR methods
  • pathway-analysis/go-enrichment - Functional annotation