BioSkills bio-small-rna-seq-differential-mirna
Perform differential expression analysis of miRNAs between conditions using DESeq2 or edgeR with small RNA-specific considerations. Use when identifying miRNAs that change between treatment groups, disease states, or developmental stages.
git clone https://github.com/GPTomics/bioSkills
T=$(mktemp -d) && git clone --depth=1 https://github.com/GPTomics/bioSkills "$T" && mkdir -p ~/.claude/skills && cp -r "$T/small-rna-seq/differential-mirna" ~/.claude/skills/gptomics-bioskills-bio-small-rna-seq-differential-mirna && rm -rf "$T"
small-rna-seq/differential-mirna/SKILL.mdVersion Compatibility
Reference examples tested with: DESeq2 1.42+, edgeR 4.0+, ggplot2 3.5+, scanpy 1.10+
Before using code patterns, verify installed versions match. If versions differ:
- R:
thenpackageVersion('<pkg>')
to verify parameters?function_name
If code throws ImportError, AttributeError, or TypeError, introspect the installed package and adapt the example to match the actual API rather than retrying.
Differential miRNA Expression
"Find differentially expressed miRNAs between my conditions" → Perform statistical testing on miRNA count matrices to identify miRNAs with significant expression changes, accounting for small RNA-specific normalization considerations.
- R:
orDESeq2::DESeq()
on miRNA count dataedgeR::glmQLFTest()
Load miRNA Count Data
library(DESeq2) # Load miRge3 or miRDeep2 counts counts <- read.csv('miR.Counts.csv', row.names = 1) # Create sample metadata coldata <- data.frame( sample = colnames(counts), condition = factor(c('control', 'control', 'treated', 'treated')), row.names = colnames(counts) )
DESeq2 Analysis
Goal: Identify miRNAs with significant expression changes between experimental conditions, accounting for small RNA-specific normalization.
Approach: Create a DESeqDataSet from miRNA counts, filter low-expressed miRNAs using a lower threshold than mRNA (10 reads total), run the DESeq2 pipeline, and extract results with BH-corrected p-values.
# Create DESeq2 dataset dds <- DESeqDataSetFromMatrix( countData = round(counts), # DESeq2 requires integers colData = coldata, design = ~ condition ) # Filter low-expressed miRNAs # miRNAs typically have fewer total counts than mRNAs # Keep miRNAs with at least 10 reads across samples keep <- rowSums(counts(dds)) >= 10 dds <- dds[keep, ] # Run DESeq2 dds <- DESeq(dds) # Get results res <- results(dds, contrast = c('condition', 'treated', 'control')) res <- res[order(res$padj), ]
Apply Shrinkage for Effect Sizes
# apeglm shrinkage for more accurate log2 fold changes # Particularly important for low-count miRNAs library(apeglm) res_shrunk <- lfcShrink( dds, coef = 'condition_treated_vs_control', type = 'apeglm' )
Filter Significant miRNAs
# Standard thresholds for miRNA DE # padj < 0.05: FDR-corrected significance # |log2FC| > 1: 2-fold change minimum sig <- subset(res_shrunk, padj < 0.05 & abs(log2FoldChange) > 1) sig <- sig[order(sig$padj), ] # Separate up and down-regulated up <- subset(sig, log2FoldChange > 0) down <- subset(sig, log2FoldChange < 0) cat('Upregulated:', nrow(up), '\n') cat('Downregulated:', nrow(down), '\n')
edgeR Alternative
library(edgeR) # Create DGEList dge <- DGEList(counts = counts, group = coldata$condition) # Filter low expression keep <- filterByExpr(dge) dge <- dge[keep, , keep.lib.sizes = FALSE] # Normalize dge <- calcNormFactors(dge) # Design matrix design <- model.matrix(~ condition, data = coldata) # Estimate dispersion dge <- estimateDisp(dge, design) # Fit model and test fit <- glmQLFit(dge, design) qlf <- glmQLFTest(fit, coef = 2) # Get results res_edger <- topTags(qlf, n = Inf)$table
Visualization
library(ggplot2) library(EnhancedVolcano) # Volcano plot EnhancedVolcano( res_shrunk, lab = rownames(res_shrunk), x = 'log2FoldChange', y = 'padj', pCutoff = 0.05, FCcutoff = 1, title = 'Differential miRNA Expression' ) # MA plot plotMA(res_shrunk, ylim = c(-4, 4))
Heatmap of DE miRNAs
library(pheatmap) # Get normalized counts vsd <- vst(dds, blind = FALSE) # Select significant miRNAs sig_mirnas <- rownames(sig) mat <- assay(vsd)[sig_mirnas, ] # Z-score scale rows mat_scaled <- t(scale(t(mat))) pheatmap( mat_scaled, annotation_col = coldata['condition'], cluster_rows = TRUE, cluster_cols = TRUE, show_rownames = nrow(mat) < 50 )
Export Results
# Full results with normalized counts res_df <- as.data.frame(res_shrunk) res_df$miRNA <- rownames(res_df) res_df$baseMean_norm <- rowMeans(counts(dds, normalized = TRUE)[rownames(res_df), ]) write.csv(res_df, 'DE_miRNAs_full.csv', row.names = FALSE) # Significant only write.csv(as.data.frame(sig), 'DE_miRNAs_significant.csv')
Related Skills
- mirge3-analysis - Get miRNA counts
- mirdeep2-analysis - Alternative quantification
- target-prediction - Predict targets of DE miRNAs
- differential-expression/deseq2-basics - General DE analysis concepts