install
source · Clone the upstream repo
git clone https://github.com/FreedomIntelligence/OpenClaw-Medical-Skills
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/FreedomIntelligence/OpenClaw-Medical-Skills "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/ngs-analysis" ~/.claude/skills/freedomintelligence-openclaw-medical-skills-ngs-analysis && rm -rf "$T"
OpenClaw · Install into ~/.openclaw/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/FreedomIntelligence/OpenClaw-Medical-Skills "$T" && mkdir -p ~/.openclaw/skills && cp -r "$T/skills/ngs-analysis" ~/.openclaw/skills/freedomintelligence-openclaw-medical-skills-ngs-analysis && rm -rf "$T"
manifest:
skills/ngs-analysis/SKILL.mdsafety · automated scan (low risk)
This is a pattern-based risk scan, not a security review. Our crawler flagged:
- shell exec via library
Always read a skill's source content before installing. Patterns alone don't mean the skill is malicious — but they warrant attention.
source content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: ngs-analysis description: "Next-generation sequencing data analysis pipelines including bulk RNA-seq, scRNA-seq preprocessing, variant calling, and quality control. Use when working with FASTQ files, alignment (STAR, BWA), quantification (featureCounts, Salmon), DESeq2/edgeR analysis, or building NGS pipelines. Supports GEO/SRA data retrieval." license: Proprietary
NGS Data Analysis Pipelines
Data Retrieval from GEO/SRA
# Install SRA toolkit conda install -c bioconda sra-tools # Download SRA files prefetch SRR12345678 fastq-dump --split-files --gzip SRR12345678 # Parallel download with fasterq-dump fasterq-dump --split-files -e 8 SRR12345678 gzip SRR12345678_*.fastq
Quality Control
# FastQC fastqc -t 8 -o fastqc_output/ *.fastq.gz # MultiQC aggregation multiqc fastqc_output/ -o multiqc_report/ # Trimming with fastp fastp -i R1.fastq.gz -I R2.fastq.gz \ -o R1_trimmed.fastq.gz -O R2_trimmed.fastq.gz \ --detect_adapter_for_pe --thread 8 \ --html fastp_report.html
Bulk RNA-seq Pipeline
Alignment with STAR
# Build index (once) STAR --runMode genomeGenerate \ --genomeDir star_index/ \ --genomeFastaFiles genome.fa \ --sjdbGTFfile genes.gtf \ --runThreadN 16 # Alignment STAR --runThreadN 16 \ --genomeDir star_index/ \ --readFilesIn R1.fastq.gz R2.fastq.gz \ --readFilesCommand zcat \ --outFileNamePrefix sample_ \ --outSAMtype BAM SortedByCoordinate \ --quantMode GeneCounts
Quantification with featureCounts
featureCounts -T 8 -p -B -C \ -a genes.gtf \ -o counts.txt \ *.bam
Salmon Pseudo-alignment
# Index salmon index -t transcripts.fa -i salmon_index -k 31 # Quantification salmon quant -i salmon_index -l A \ -1 R1.fastq.gz -2 R2.fastq.gz \ -p 8 -o salmon_quant/
Differential Expression with DESeq2
library(DESeq2) library(tidyverse) # Load counts counts <- read.table("counts.txt", header=TRUE, row.names=1) coldata <- read.csv("sample_info.csv", row.names=1) # Create DESeq object dds <- DESeqDataSetFromMatrix( countData = counts, colData = coldata, design = ~ condition ) # Filter low counts keep <- rowSums(counts(dds) >= 10) >= 3 dds <- dds[keep,] # Run DESeq2 dds <- DESeq(dds) res <- results(dds, contrast=c("condition", "treatment", "control")) res_df <- as.data.frame(res) %>% rownames_to_column("gene") %>% filter(!is.na(padj)) %>% arrange(padj) # Significant genes sig_genes <- res_df %>% filter(padj < 0.05, abs(log2FoldChange) > 1) write.csv(res_df, "DEG_results.csv", row.names=FALSE)
Variant Calling (Somatic)
# BWA-MEM2 alignment bwa-mem2 index reference.fa bwa-mem2 mem -t 16 reference.fa R1.fq.gz R2.fq.gz | \ samtools sort -@ 8 -o aligned.bam # Mark duplicates gatk MarkDuplicates -I aligned.bam -O marked.bam -M metrics.txt # BQSR gatk BaseRecalibrator -R ref.fa -I marked.bam \ --known-sites known_sites.vcf -O recal.table gatk ApplyBQSR -R ref.fa -I marked.bam \ --bqsr-recal-file recal.table -O recal.bam # Mutect2 for somatic variants gatk Mutect2 -R ref.fa -I tumor.bam -I normal.bam \ -normal normal_sample -O somatic.vcf.gz
Python Integration
import pandas as pd import subprocess from pathlib import Path def run_pipeline(fastq_dir, output_dir, genome_index): """Run complete RNA-seq pipeline""" fastq_files = list(Path(fastq_dir).glob("*_R1.fastq.gz")) for r1 in fastq_files: sample = r1.stem.replace("_R1.fastq", "") r2 = r1.parent / f"{sample}_R2.fastq.gz" # STAR alignment cmd = f""" STAR --runThreadN 16 --genomeDir {genome_index} \ --readFilesIn {r1} {r2} --readFilesCommand zcat \ --outFileNamePrefix {output_dir}/{sample}_ \ --outSAMtype BAM SortedByCoordinate """ subprocess.run(cmd, shell=True, check=True)
See
references/conda_envs.md for environment setup.
See scripts/batch_pipeline.py for parallel processing.
<!-- AUTHOR_SIGNATURE: 9a7f3c2e-MD-BABU-MIA-2026-MSSM-SECURE -->