Claude-skill-registry bio-small-rna-seq-differential-mirna

Perform differential expression analysis of miRNAs between conditions using DESeq2 or edgeR with small RNA-specific considerations. Use when identifying miRNAs that change between treatment groups, disease states, or developmental stages.

install

source · Clone the upstream repo

git clone https://github.com/majiayu000/claude-skill-registry

Claude Code · Install into ~/.claude/skills/

T=$(mktemp -d) && git clone --depth=1 https://github.com/majiayu000/claude-skill-registry "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/data/differential-mirna" ~/.claude/skills/majiayu000-claude-skill-registry-bio-small-rna-seq-differential-mirna && rm -rf "$T"

manifest: skills/data/differential-mirna/SKILL.md

Differential miRNA Expression

Load miRNA Count Data

library(DESeq2)

# Load miRge3 or miRDeep2 counts
counts <- read.csv('miR.Counts.csv', row.names = 1)

# Create sample metadata
coldata <- data.frame(
    sample = colnames(counts),
    condition = factor(c('control', 'control', 'treated', 'treated')),
    row.names = colnames(counts)
)

DESeq2 Analysis

# Create DESeq2 dataset
dds <- DESeqDataSetFromMatrix(
    countData = round(counts),  # DESeq2 requires integers
    colData = coldata,
    design = ~ condition
)

# Filter low-expressed miRNAs
# miRNAs typically have fewer total counts than mRNAs
# Keep miRNAs with at least 10 reads across samples
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep, ]

# Run DESeq2
dds <- DESeq(dds)

# Get results
res <- results(dds, contrast = c('condition', 'treated', 'control'))
res <- res[order(res$padj), ]

Apply Shrinkage for Effect Sizes

# apeglm shrinkage for more accurate log2 fold changes
# Particularly important for low-count miRNAs
library(apeglm)

res_shrunk <- lfcShrink(
    dds,
    coef = 'condition_treated_vs_control',
    type = 'apeglm'
)

Filter Significant miRNAs

# Standard thresholds for miRNA DE
# padj < 0.05: FDR-corrected significance
# |log2FC| > 1: 2-fold change minimum
sig <- subset(res_shrunk, padj < 0.05 & abs(log2FoldChange) > 1)
sig <- sig[order(sig$padj), ]

# Separate up and down-regulated
up <- subset(sig, log2FoldChange > 0)
down <- subset(sig, log2FoldChange < 0)

cat('Upregulated:', nrow(up), '\n')
cat('Downregulated:', nrow(down), '\n')

edgeR Alternative

library(edgeR)

# Create DGEList
dge <- DGEList(counts = counts, group = coldata$condition)

# Filter low expression
keep <- filterByExpr(dge)
dge <- dge[keep, , keep.lib.sizes = FALSE]

# Normalize
dge <- calcNormFactors(dge)

# Design matrix
design <- model.matrix(~ condition, data = coldata)

# Estimate dispersion
dge <- estimateDisp(dge, design)

# Fit model and test
fit <- glmQLFit(dge, design)
qlf <- glmQLFTest(fit, coef = 2)

# Get results
res_edger <- topTags(qlf, n = Inf)$table

Visualization

library(ggplot2)
library(EnhancedVolcano)

# Volcano plot
EnhancedVolcano(
    res_shrunk,
    lab = rownames(res_shrunk),
    x = 'log2FoldChange',
    y = 'padj',
    pCutoff = 0.05,
    FCcutoff = 1,
    title = 'Differential miRNA Expression'
)

# MA plot
plotMA(res_shrunk, ylim = c(-4, 4))

Heatmap of DE miRNAs

library(pheatmap)

# Get normalized counts
vsd <- vst(dds, blind = FALSE)

# Select significant miRNAs
sig_mirnas <- rownames(sig)
mat <- assay(vsd)[sig_mirnas, ]

# Z-score scale rows
mat_scaled <- t(scale(t(mat)))

pheatmap(
    mat_scaled,
    annotation_col = coldata['condition'],
    cluster_rows = TRUE,
    cluster_cols = TRUE,
    show_rownames = nrow(mat) < 50
)

Export Results

# Full results with normalized counts
res_df <- as.data.frame(res_shrunk)
res_df$miRNA <- rownames(res_df)
res_df$baseMean_norm <- rowMeans(counts(dds, normalized = TRUE)[rownames(res_df), ])

write.csv(res_df, 'DE_miRNAs_full.csv', row.names = FALSE)

# Significant only
write.csv(as.data.frame(sig), 'DE_miRNAs_significant.csv')

Related Skills

mirge3-analysis - Get miRNA counts
mirdeep2-analysis - Alternative quantification
target-prediction - Predict targets of DE miRNAs
differential-expression/deseq2-basics - General DE analysis concepts