install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Genomics/Metagenomics/bioSkills/metagenome-visualization" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-metagenome-visuali && rm -rf "$T"
manifest:
Skills/Genomics/Metagenomics/bioSkills/metagenome-visualization/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-metagenomics-visualization description: Visualize metagenomic profiles using R (phyloseq, microbiome) and Python (matplotlib, seaborn). Create stacked bar plots, heatmaps, PCA plots, and diversity analyses. Use when creating publication-quality figures from MetaPhlAn, Bracken, or other taxonomic profiling output. tool_type: mixed primary_tool: phyloseq measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
Metagenome Visualization
Python - Stacked Bar Plot
import pandas as pd import matplotlib.pyplot as plt abundance = pd.read_csv('merged_abundance.txt', sep='\t', index_col=0) abundance = abundance[abundance.index.str.contains('s__')] abundance.index = abundance.index.str.split('|').str[-1].str.replace('s__', '') top_n = 10 top_species = abundance.sum(axis=1).nlargest(top_n).index abundance_top = abundance.loc[top_species] abundance_top.loc['Other'] = abundance.drop(top_species).sum() abundance_top.T.plot(kind='bar', stacked=True, figsize=(12, 6), colormap='tab20') plt.xlabel('Sample') plt.ylabel('Relative Abundance (%)') plt.title('Species Composition') plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left') plt.tight_layout() plt.savefig('stacked_bar.png', dpi=300)
Python - Heatmap
import pandas as pd import seaborn as sns import matplotlib.pyplot as plt abundance = pd.read_csv('merged_abundance.txt', sep='\t', index_col=0) abundance = abundance[abundance.index.str.contains('s__')] abundance.index = abundance.index.str.split('|').str[-1].str.replace('s__', '') top_species = abundance.sum(axis=1).nlargest(20).index abundance_top = abundance.loc[top_species] plt.figure(figsize=(12, 10)) sns.heatmap(abundance_top, cmap='YlOrRd', annot=False, cbar_kws={'label': 'Abundance (%)'}) plt.xlabel('Sample') plt.ylabel('Species') plt.title('Species Abundance Heatmap') plt.tight_layout() plt.savefig('heatmap.png', dpi=300)
Python - PCA
import pandas as pd import matplotlib.pyplot as plt from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler abundance = pd.read_csv('merged_abundance.txt', sep='\t', index_col=0).T scaler = StandardScaler() abundance_scaled = scaler.fit_transform(abundance) pca = PCA(n_components=2) pca_result = pca.fit_transform(abundance_scaled) plt.figure(figsize=(8, 6)) plt.scatter(pca_result[:, 0], pca_result[:, 1]) for i, sample in enumerate(abundance.index): plt.annotate(sample, (pca_result[i, 0], pca_result[i, 1])) plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)') plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)') plt.title('PCA of Sample Composition') plt.savefig('pca.png', dpi=300)
R - phyloseq Setup
library(phyloseq) library(ggplot2) library(vegan) # From MetaPhlAn merged table abundance <- read.table('merged_abundance.txt', sep = '\t', header = TRUE, row.names = 1) # Filter to species level species <- abundance[grepl('s__', rownames(abundance)), ] rownames(species) <- sapply(strsplit(rownames(species), '\\|'), tail, 1) rownames(species) <- gsub('s__', '', rownames(species)) # Create phyloseq object otu <- otu_table(as.matrix(species), taxa_are_rows = TRUE) # Sample metadata (create or load) sample_data <- data.frame( Sample = colnames(species), Group = c('Control', 'Control', 'Treatment', 'Treatment'), row.names = colnames(species) ) samp <- sample_data(sample_data) ps <- phyloseq(otu, samp)
R - Stacked Bar Plot
library(phyloseq) library(ggplot2) # Top taxa top_taxa <- names(sort(taxa_sums(ps), decreasing = TRUE))[1:10] ps_top <- prune_taxa(top_taxa, ps) # Stacked bar plot_bar(ps_top, fill = 'Species') + geom_bar(stat = 'identity', position = 'stack') + theme_minimal() + labs(x = 'Sample', y = 'Relative Abundance (%)') + theme(axis.text.x = element_text(angle = 45, hjust = 1))
R - Ordination (PCoA)
library(phyloseq) library(ggplot2) # Bray-Curtis distance ord <- ordinate(ps, method = 'PCoA', distance = 'bray') # Plot ordination plot_ordination(ps, ord, color = 'Group') + geom_point(size = 4) + stat_ellipse() + theme_minimal() + labs(title = 'PCoA of Sample Composition')
R - Alpha Diversity
library(phyloseq) library(ggplot2) # Calculate diversity metrics alpha_div <- estimate_richness(ps, measures = c('Shannon', 'Simpson', 'Observed')) # Add metadata alpha_div$Group <- sample_data(ps)$Group # Plot ggplot(alpha_div, aes(x = Group, y = Shannon, fill = Group)) + geom_boxplot() + geom_jitter(width = 0.1) + theme_minimal() + labs(title = 'Alpha Diversity by Group', y = 'Shannon Index')
R - Beta Diversity (PERMANOVA)
library(vegan) # Get abundance matrix abundance_matrix <- as(otu_table(ps), 'matrix') if (taxa_are_rows(ps)) abundance_matrix <- t(abundance_matrix) # Calculate Bray-Curtis distance dist_bc <- vegdist(abundance_matrix, method = 'bray') # PERMANOVA groups <- sample_data(ps)$Group permanova <- adonis2(dist_bc ~ groups, permutations = 999) permanova
Krona Chart
# From Kraken2 report ktImportTaxonomy -q 1 -t 5 kraken_report.txt -o krona_chart.html # From MetaPhlAn metaphlan2krona.py -p profile.txt -k krona_profile.txt ktImportText krona_profile.txt -o krona_metaphlan.html
Key Packages
Python
| Package | Purpose |
|---|---|
| matplotlib | General plotting |
| seaborn | Statistical visualizations |
| scikit-learn | PCA, clustering |
| scipy | Statistical tests |
R
| Package | Purpose |
|---|---|
| phyloseq | Microbiome data handling |
| vegan | Community ecology |
| ggplot2 | Visualization |
| microbiome | Additional analyses |
Related Skills
- kraken-classification - Generate input data
- metaphlan-profiling - Generate input data
- abundance-estimation - Process Kraken output