LLMs-Universal-Life-Science-and-Clinical-Skills- methylation-calling

<!--

install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Epigenomics/methylation-analysis/methylation-calling" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-methylation-callin && rm -rf "$T"
manifest: Skills/Epigenomics/methylation-analysis/methylation-calling/SKILL.md
source content
<!-- # COPYRIGHT NOTICE # This file is part of the "Universal Biomedical Skills" project. # Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu> # All Rights Reserved. # # This code is proprietary and confidential. # Unauthorized copying of this file, via any medium is strictly prohibited. # # Provenance: Authenticated by MD BABU MIA -->

name: bio-methylation-calling description: Extract methylation calls from Bismark BAM files using bismark_methylation_extractor. Generates per-cytosine reports for CpG, CHG, and CHH contexts. Use when extracting methylation levels from aligned bisulfite sequencing data for downstream analysis. tool_type: cli primary_tool: bismark measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:

  • read_file
  • run_shell_command

Methylation Calling

Basic Extraction

# Extract methylation calls from Bismark BAM
bismark_methylation_extractor --gzip --bedGraph \
    sample_bismark_bt2.bam

Paired-End Extraction

bismark_methylation_extractor --paired-end --gzip --bedGraph \
    sample_bismark_bt2_pe.bam

Common Options

bismark_methylation_extractor \
    --paired-end \                 # For paired-end data
    --gzip \                       # Compress output
    --bedGraph \                   # Generate bedGraph file
    --cytosine_report \            # Genome-wide cytosine report
    --genome_folder /path/to/genome/ \  # Required for cytosine_report
    --buffer_size 10G \            # Memory buffer
    --parallel 4 \                 # Parallel extraction
    -o output_dir/ \
    sample.bam

CpG Context Only

# Most common - extract only CpG methylation
bismark_methylation_extractor \
    --paired-end \
    --no_overlap \                 # Avoid double counting overlapping reads
    --gzip \
    --bedGraph \
    --CX \                         # Also extract CHG/CHH (optional)
    sample.bam

Genome-Wide Cytosine Report

# Comprehensive report with all CpGs in genome
bismark_methylation_extractor \
    --paired-end \
    --gzip \
    --bedGraph \
    --cytosine_report \
    --genome_folder /path/to/genome/ \
    sample.bam

Strand-Specific Output

# Default: strand-specific output
# CpG_OT_sample.txt - Original Top strand
# CpG_OB_sample.txt - Original Bottom strand
# CpG_CTOT_sample.txt - Complementary to OT
# CpG_CTOB_sample.txt - Complementary to OB

# Merge strands (CpG methylation is usually symmetric)
bismark_methylation_extractor --merge_non_CpG --gzip sample.bam

Avoid Double-Counting Overlapping Reads

# For paired-end data with overlapping reads
bismark_methylation_extractor \
    --paired-end \
    --no_overlap \                 # Ignore overlapping portion of read 2
    --gzip \
    sample_pe.bam

Generate Coverage File

# bismark2bedGraph creates coverage file
bismark_methylation_extractor --bedGraph --gzip sample.bam

# Or run separately
bismark2bedGraph -o sample CpG_context_sample.txt.gz

# Coverage format: chr start end methylation_percentage count_meth count_unmeth

Convert to BigWig for Visualization

# bedGraph to BigWig (requires UCSC tools)
bedGraphToBigWig sample.bedGraph.gz chrom.sizes sample.bw

M-Bias Plot

# Check for methylation bias across read positions
bismark_methylation_extractor --paired-end \
    --mbias_only \                 # Only generate M-bias plot
    sample.bam

# Generates sample.M-bias.txt and sample.M-bias_R1.png, sample.M-bias_R2.png

Ignore End Bias

# Ignore positions with systematic bias (found from M-bias plot)
bismark_methylation_extractor \
    --paired-end \
    --ignore 2 \                   # Ignore first 2 bp of read 1
    --ignore_r2 2 \                # Ignore first 2 bp of read 2
    --ignore_3prime 2 \            # Ignore last 2 bp of read 1
    --ignore_3prime_r2 2 \         # Ignore last 2 bp of read 2
    sample.bam

Output Files

# Main output files:
# CpG_context_sample.txt.gz      - Per-read CpG methylation
# sample.bismark.cov.gz          - Coverage file
# sample.bedGraph.gz             - bedGraph for visualization
# sample.CpG_report.txt.gz       - Genome-wide CpG report (with --cytosine_report)

# Coverage file format:
# chr  start  end  methylation%  count_methylated  count_unmethylated

Parse Output in Python

import pandas as pd

cov = pd.read_csv('sample.bismark.cov.gz', sep='\t', header=None,
                   names=['chr', 'start', 'end', 'meth_pct', 'count_meth', 'count_unmeth'])
cov['coverage'] = cov['count_meth'] + cov['count_unmeth']
cov_filtered = cov[cov['coverage'] >= 10]

Key Parameters

ParameterDescription
--paired-endPaired-end mode
--gzipCompress output
--bedGraphGenerate bedGraph
--cytosine_reportFull genome cytosine report
--genome_folderPath to genome (for cytosine_report)
--CXReport CHG/CHH contexts
--no_overlapAvoid counting overlapping reads twice
--parallelParallel extraction threads
--mbias_onlyOnly M-bias analysis
--ignore NIgnore first N bp of read 1
--ignore_r2 NIgnore first N bp of read 2

Output Formats

FormatDescriptionUse Case
CpG_contextPer-read methylation callsDetailed analysis
.bismark.covPer-CpG coverage summarymethylKit input
.bedGraphMethylation trackGenome browser
.CpG_reportAll genome CpGsComprehensive analysis

Related Skills

  • bismark-alignment - Generate input BAM files
  • methylkit-analysis - Import coverage files to R
  • dmr-detection - Find differentially methylated regions
<!-- AUTHOR_SIGNATURE: 9a7f3c2e-MD-BABU-MIA-2026-MSSM-SECURE -->