OpenClaw-Medical-Skills bio-workflows-metabolomics-pipeline

<!--

install
source · Clone the upstream repo
git clone https://github.com/FreedomIntelligence/OpenClaw-Medical-Skills
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/FreedomIntelligence/OpenClaw-Medical-Skills "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/bio-workflows-metabolomics-pipeline" ~/.claude/skills/freedomintelligence-openclaw-medical-skills-bio-workflows-metabolomics-pipeline && rm -rf "$T"
OpenClaw · Install into ~/.openclaw/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/FreedomIntelligence/OpenClaw-Medical-Skills "$T" && mkdir -p ~/.openclaw/skills && cp -r "$T/skills/bio-workflows-metabolomics-pipeline" ~/.openclaw/skills/freedomintelligence-openclaw-medical-skills-bio-workflows-metabolomics-pipeline && rm -rf "$T"
manifest: skills/bio-workflows-metabolomics-pipeline/SKILL.md
source content
<!-- # COPYRIGHT NOTICE # This file is part of the "Universal Biomedical Skills" project. # Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu> # All Rights Reserved. # # This code is proprietary and confidential. # Unauthorized copying of this file, via any medium is strictly prohibited. # # Provenance: Authenticated by MD BABU MIA -->

name: bio-workflows-metabolomics-pipeline description: End-to-end metabolomics workflow from raw MS data to pathway analysis. Orchestrates XCMS preprocessing, annotation, normalization, statistical analysis, and pathway mapping. Use when processing LC-MS metabolomics data. tool_type: r primary_tool: XCMS workflow: true depends_on:

  • metabolomics/xcms-preprocessing
  • metabolomics/metabolite-annotation
  • metabolomics/normalization-qc
  • metabolomics/statistical-analysis
  • metabolomics/pathway-mapping
  • metabolomics/lipidomics
  • metabolomics/targeted-analysis
  • metabolomics/msdial-preprocessing measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
  • read_file
  • run_shell_command

Metabolomics Pipeline

Pipeline Overview

Raw MS Data (mzML/mzXML) ──> Peak Detection ──> Feature Matrix
                                                     │
                                                     ▼
                   ┌─────────────────────────────────────────────┐
                   │          metabolomics-pipeline              │
                   ├─────────────────────────────────────────────┤
                   │  1. Peak Detection (XCMS)                   │
                   │  2. Retention Time Alignment                │
                   │  3. Feature Grouping & Gap Filling          │
                   │  4. QC & Normalization                      │
                   │  5. Statistical Analysis                    │
                   │  6. Metabolite Annotation                   │
                   │  7. Pathway Mapping                         │
                   └─────────────────────────────────────────────┘
                                                     │
                                                     ▼
                       Differential Metabolites + Enriched Pathways

Complete R Workflow

library(xcms)
library(MSnbase)
library(MetaboAnalystR)
library(ggplot2)

# === 1. LOAD DATA ===
mzml_files <- list.files('data/', pattern = '\\.mzML$', full.names = TRUE)
sample_data <- read.csv('sample_metadata.csv')

raw_data <- readMSData(mzml_files, mode = 'onDisk')

# Add sample metadata
pData(raw_data) <- sample_data

cat('Loaded', length(mzml_files), 'samples\n')

# === 2. PEAK DETECTION ===
cwp <- CentWaveParam(
    peakwidth = c(5, 30),
    ppm = 25,
    snthresh = 10,
    prefilter = c(3, 1000),
    mzdiff = 0.01,
    noise = 1000
)

xdata <- findChromPeaks(raw_data, param = cwp)
cat('Detected', nrow(chromPeaks(xdata)), 'peaks\n')

# === 3. RETENTION TIME ALIGNMENT ===
xdata <- adjustRtime(xdata, param = ObiwarpParam(binSize = 0.6))
cat('Aligned retention times\n')

# === 4. FEATURE GROUPING ===
pdp <- PeakDensityParam(
    sampleGroups = pData(xdata)$condition,
    minFraction = 0.5,
    bw = 5,
    binSize = 0.025
)

xdata <- groupChromPeaks(xdata, param = pdp)
cat('Grouped into', nrow(featureDefinitions(xdata)), 'features\n')

# === 5. GAP FILLING ===
xdata <- fillChromPeaks(xdata, param = ChromPeakAreaParam())

# === 6. EXTRACT FEATURE MATRIX ===
feature_matrix <- featureValues(xdata, value = 'into', method = 'maxint')
feature_info <- featureDefinitions(xdata)

# === 7. QC & NORMALIZATION ===
# Log2 transform
feature_matrix[feature_matrix == 0] <- NA
log_matrix <- log2(feature_matrix)

# Filter features (present in >50% of samples)
valid_features <- rowSums(!is.na(log_matrix)) > ncol(log_matrix) * 0.5
filtered_matrix <- log_matrix[valid_features, ]
cat('After filtering:', nrow(filtered_matrix), 'features\n')

# Median normalization
sample_medians <- apply(filtered_matrix, 2, median, na.rm = TRUE)
global_median <- median(sample_medians)
normalized <- sweep(filtered_matrix, 2, sample_medians - global_median)

# === 8. QC PLOTS ===
# PCA
pca <- prcomp(t(normalized), scale. = TRUE)
pca_df <- data.frame(PC1 = pca$x[, 1], PC2 = pca$x[, 2],
                     Sample = rownames(pca$x),
                     Condition = pData(xdata)$condition)

ggplot(pca_df, aes(PC1, PC2, color = Condition)) +
    geom_point(size = 3) +
    theme_bw() +
    labs(title = 'PCA of Metabolomics Data')
ggsave('qc_pca.png', width = 8, height = 6)

# === 9. STATISTICAL ANALYSIS ===
library(limma)

design <- model.matrix(~ 0 + condition, data = pData(xdata))
colnames(design) <- levels(factor(pData(xdata)$condition))

# Impute missing values for limma
imputed <- normalized
imputed[is.na(imputed)] <- min(imputed, na.rm = TRUE) - 1

fit <- lmFit(imputed, design)
contrast <- makeContrasts(Treatment - Control, levels = design)
fit2 <- contrasts.fit(fit, contrast)
fit2 <- eBayes(fit2)

results <- topTable(fit2, number = Inf, adjust.method = 'BH')
results$feature_id <- rownames(results)
results$significant <- abs(results$logFC) > 1 & results$adj.P.Val < 0.05

cat('\nSignificant features:', sum(results$significant), '\n')

# === 10. METABOLITE ANNOTATION ===
# Add m/z and RT to results
results$mz <- feature_info[results$feature_id, 'mzmed']
results$rt <- feature_info[results$feature_id, 'rtmed']

# KEGG annotation (simplified - use CAMERA for adduct annotation)
library(KEGGREST)

annotate_mz <- function(mz, ppm = 10) {
    # Query KEGG for matching compounds
    # This is simplified - real annotation uses databases
    mz_range <- c(mz * (1 - ppm/1e6), mz * (1 + ppm/1e6))
    return(NA)  # Placeholder
}

# === 11. VOLCANO PLOT ===
ggplot(results, aes(x = logFC, y = -log10(adj.P.Val), color = significant)) +
    geom_point(alpha = 0.5) +
    geom_hline(yintercept = -log10(0.05), linetype = 'dashed') +
    geom_vline(xintercept = c(-1, 1), linetype = 'dashed') +
    scale_color_manual(values = c('gray', 'red')) +
    theme_bw() +
    labs(title = 'Differential Metabolites', x = 'Log2 Fold Change', y = '-Log10(adj. p-value)')
ggsave('volcano_metabolites.png', width = 8, height = 6)

# === 12. OUTPUT ===
write.csv(results, 'differential_metabolites.csv', row.names = FALSE)
write.csv(normalized, 'normalized_feature_matrix.csv')
cat('Results saved!\n')

MetaboAnalystR Pathway Analysis

library(MetaboAnalystR)

# Initialize
mSet <- InitDataObjects('conc', 'pathora', FALSE)

# Load compound list (HMDB IDs)
sig_features <- results[results$significant, ]
compound_list <- sig_features$hmdb_id  # Requires annotation

mSet <- Setup.MapData(mSet, compound_list)
mSet <- CrossReferencing(mSet, 'hmdb')
mSet <- CreateMappingResultTable(mSet)

# Pathway analysis
mSet <- SetKEGG.PathLib(mSet, 'hsa')
mSet <- SetMetabolomeFilter(mSet, FALSE)
mSet <- CalculateOraScore(mSet, 'rbc', 'hyperg')

# View results
pathway_results <- mSet$analSet$ora.mat
head(pathway_results)

# Plot
mSet <- PlotPathSummary(mSet, 'pathway_overview', 'png', 300, 10, 10)

Alternative: MS-DIAL Preprocessing

# Load MS-DIAL exported data
msdial_export <- read.csv('msdial_alignment.csv')

# MS-DIAL already provides:
# - Peak detection
# - Alignment
# - Gap filling
# - Annotation attempts

# Continue with normalization and statistics
feature_matrix <- as.matrix(msdial_export[, grep('Area', colnames(msdial_export))])
rownames(feature_matrix) <- msdial_export$`Alignment.ID`

# Proceed with normalization and limma as above

QC Checkpoints

StageCheckAction if Failed
Peak detection>1000 featuresAdjust parameters
AlignmentRT deviation <30sCheck QC samples
Grouping>60% features groupedAdjust bw/minFraction
Missing values<30% per sampleCheck injection
QC RSD<30% for QC featuresCheck instrument
PCAGroups separateCheck batch effects

Workflow Variants

Lipidomics

# Adjust peak width for lipids
cwp_lipid <- CentWaveParam(
    peakwidth = c(10, 60),  # Broader peaks
    ppm = 15,
    snthresh = 5
)

# Use LipidMaps for annotation

Targeted Analysis

# Define target compounds
targets <- data.frame(
    name = c('Glucose', 'Lactate', 'Citrate'),
    mz = c(179.0561, 89.0244, 191.0197),
    rt = c(120, 90, 180)
)

# Extract targeted features
extractTargets <- function(xdata, targets, mz_ppm = 10, rt_tol = 30) {
    lapply(1:nrow(targets), function(i) {
        chromPeaks(xdata, mz = targets$mz[i], ppm = mz_ppm,
                   rt = c(targets$rt[i] - rt_tol, targets$rt[i] + rt_tol))
    })
}

Related Skills

  • metabolomics/xcms-preprocessing - XCMS parameters
  • metabolomics/metabolite-annotation - Compound identification
  • metabolomics/normalization-qc - QC and normalization methods
  • metabolomics/statistical-analysis - Statistical testing
  • metabolomics/pathway-mapping - KEGG/MetaboAnalyst
  • metabolomics/lipidomics - Lipid-specific analysis
  • metabolomics/targeted-analysis - Absolute quantification
  • metabolomics/msdial-preprocessing - MS-DIAL export processing
  • multi-omics-integration/mofa-integration - Integrate with other omics
<!-- AUTHOR_SIGNATURE: 9a7f3c2e-MD-BABU-MIA-2026-MSSM-SECURE -->