OpenClaw-Medical-Skills bio-workflows-proteomics-pipeline

<!--

install
source · Clone the upstream repo
git clone https://github.com/FreedomIntelligence/OpenClaw-Medical-Skills
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/FreedomIntelligence/OpenClaw-Medical-Skills "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/bio-workflows-proteomics-pipeline" ~/.claude/skills/freedomintelligence-openclaw-medical-skills-bio-workflows-proteomics-pipeline && rm -rf "$T"
OpenClaw · Install into ~/.openclaw/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/FreedomIntelligence/OpenClaw-Medical-Skills "$T" && mkdir -p ~/.openclaw/skills && cp -r "$T/skills/bio-workflows-proteomics-pipeline" ~/.openclaw/skills/freedomintelligence-openclaw-medical-skills-bio-workflows-proteomics-pipeline && rm -rf "$T"
manifest: skills/bio-workflows-proteomics-pipeline/SKILL.md
source content
<!-- # COPYRIGHT NOTICE # This file is part of the "Universal Biomedical Skills" project. # Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu> # All Rights Reserved. # # This code is proprietary and confidential. # Unauthorized copying of this file, via any medium is strictly prohibited. # # Provenance: Authenticated by MD BABU MIA -->

name: bio-workflows-proteomics-pipeline description: End-to-end proteomics workflow from MaxQuant output to differential protein abundance. Orchestrates data import, normalization, imputation, and statistical testing with MSstats or limma. Use when processing mass spectrometry proteomics. tool_type: mixed primary_tool: MSstats workflow: true depends_on:

  • proteomics/data-import
  • proteomics/proteomics-qc
  • proteomics/quantification
  • proteomics/protein-inference
  • proteomics/differential-abundance measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
  • read_file
  • run_shell_command

Proteomics Pipeline

Pipeline Overview

Raw MS Data (mzML) ──> MaxQuant/DIA-NN ──> proteinGroups.txt
                                                 │
                                                 ▼
                    ┌────────────────────────────────────────────┐
                    │           proteomics-pipeline              │
                    ├────────────────────────────────────────────┤
                    │  1. Data Import & Filtering                │
                    │  2. Log2 Transform & Normalization         │
                    │  3. Missing Value Imputation               │
                    │  4. QC: PCA, Correlation                   │
                    │  5. Differential Abundance (limma/MSstats) │
                    │  6. Visualization & Export                 │
                    └────────────────────────────────────────────┘
                                                 │
                                                 ▼
                          Differential Proteins + Volcano Plots

Complete R Workflow

library(limma)
library(ggplot2)
library(pheatmap)

# === 1. DATA IMPORT ===
proteins <- read.delim('proteinGroups.txt', stringsAsFactors = FALSE)
cat('Loaded', nrow(proteins), 'protein groups\n')

# Filter contaminants, reverse, only-by-site
proteins <- proteins[proteins$Potential.contaminant != '+' &
                      proteins$Reverse != '+' &
                      proteins$Only.identified.by.site != '+', ]
cat('After filtering:', nrow(proteins), 'proteins\n')

# Extract LFQ intensities
lfq_cols <- grep('^LFQ\\.intensity\\.', colnames(proteins), value = TRUE)
intensities <- proteins[, lfq_cols]
rownames(intensities) <- proteins$Majority.protein.IDs
colnames(intensities) <- gsub('LFQ\\.intensity\\.', '', colnames(intensities))

# === 2. LOG2 TRANSFORM & NORMALIZE ===
intensities[intensities == 0] <- NA
log2_int <- log2(intensities)

# Median centering
sample_medians <- apply(log2_int, 2, median, na.rm = TRUE)
global_median <- median(sample_medians)
normalized <- sweep(log2_int, 2, sample_medians - global_median)

# === 3. FILTER & IMPUTE ===
# Keep proteins with < 50% missing
valid_rows <- rowSums(is.na(normalized)) < ncol(normalized) * 0.5
filtered <- normalized[valid_rows, ]
cat('Proteins after filtering:', nrow(filtered), '\n')

# MinProb imputation (left-censored)
impute_minprob <- function(x) {
    nas <- is.na(x)
    if (all(nas)) return(x)
    x[nas] <- rnorm(sum(nas), mean = mean(x, na.rm = TRUE) - 1.8 * sd(x, na.rm = TRUE),
                    sd = 0.3 * sd(x, na.rm = TRUE))
    x
}
imputed <- as.data.frame(t(apply(filtered, 1, impute_minprob)))

# === 4. QC ===
# PCA
pca <- prcomp(t(imputed), scale. = TRUE)
pca_df <- data.frame(PC1 = pca$x[, 1], PC2 = pca$x[, 2], Sample = rownames(pca$x))

# === 5. DIFFERENTIAL ANALYSIS ===
# Load sample annotation (columns: sample, condition)
sample_info <- read.csv('sample_annotation.csv')
sample_info$condition <- factor(sample_info$condition)

design <- model.matrix(~ 0 + condition, data = sample_info)
colnames(design) <- levels(sample_info$condition)

fit <- lmFit(as.matrix(imputed), design)
contrast <- makeContrasts(Treatment - Control, levels = design)
fit2 <- contrasts.fit(fit, contrast)
fit2 <- eBayes(fit2)

results <- topTable(fit2, number = Inf, adjust.method = 'BH')
results$protein <- rownames(results)
results$significant <- abs(results$logFC) > 1 & results$adj.P.Val < 0.05

# === 6. OUTPUT ===
cat('\nResults:\n')
cat('  Significant proteins:', sum(results$significant), '\n')
cat('  Up-regulated:', sum(results$significant & results$logFC > 0), '\n')
cat('  Down-regulated:', sum(results$significant & results$logFC < 0), '\n')

write.csv(results, 'differential_proteins.csv', row.names = FALSE)

MSstats Workflow

library(MSstats)

# From MaxQuant
evidence <- read.table('evidence.txt', sep = '\t', header = TRUE)
proteinGroups <- read.table('proteinGroups.txt', sep = '\t', header = TRUE)
annotation <- read.csv('annotation.csv')

# Convert to MSstats format
msstats_input <- MaxQtoMSstatsFormat(evidence = evidence,
                                      proteinGroups = proteinGroups,
                                      annotation = annotation)

# Process data
processed <- dataProcess(msstats_input, normalization = 'equalizeMedians',
                         summaryMethod = 'TMP', censoredInt = 'NA')

# Comparison
comparison <- matrix(c(1, -1), nrow = 1)
rownames(comparison) <- 'Treatment_vs_Control'
colnames(comparison) <- c('Control', 'Treatment')

results <- groupComparison(contrast.matrix = comparison, data = processed)

QC Checkpoints

StageCheckAction if Failed
Import>1000 proteinsRe-run MaxQuant
Filter<30% removedCheck sample prep
Missing<40% per sampleCheck MS performance
PCAReplicates clusterCheck for batch effects
Stats>1% differentialAdjust thresholds

Workflow Variants

TMT/iTRAQ Isobaric Labeling

library(MSnbase)

# Load TMT data
tmt_data <- readMSnSet('tmt_psms.txt')

# Normalize with reference channel
tmt_norm <- normalize(tmt_data, method = 'center.median')

# Summarize to protein level
protein_data <- combineFeatures(tmt_norm, groupBy = fData(tmt_norm)$protein, fun = 'median')

# Then proceed with limma as above

SILAC Workflow

# SILAC ratios from MaxQuant
silac <- read.delim('proteinGroups.txt')
ratio_cols <- grep('Ratio.H.L.normalized', colnames(silac), value = TRUE)

# Log2 transform ratios
silac_log2 <- log2(silac[, ratio_cols])

# One-sample t-test against 0 (no change)
results <- apply(silac_log2, 1, function(x) t.test(x, mu = 0)$p.value)

DIA-NN Workflow

# Load DIA-NN report
diann <- read.delim('report.tsv')

# Pivot to matrix
library(tidyr)
protein_matrix <- diann %>%
    select(Protein.Group, Run, PG.MaxLFQ) %>%
    pivot_wider(names_from = Run, values_from = PG.MaxLFQ)

# Then proceed with normalization and limma

Related Skills

  • proteomics/data-import - Load MS data formats
  • proteomics/proteomics-qc - Quality control before analysis
  • proteomics/quantification - Normalization methods
  • proteomics/differential-abundance - Statistical testing details
  • proteomics/ptm-analysis - Phosphoproteomics and other PTMs
  • data-visualization/specialized-omics-plots - Volcano plots
<!-- AUTHOR_SIGNATURE: 9a7f3c2e-MD-BABU-MIA-2026-MSSM-SECURE -->