LLMs-Universal-Life-Science-and-Clinical-Skills- metagenome-visualization

<!--

install

source · Clone the upstream repo

git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-

Claude Code · Install into ~/.claude/skills/

T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Genomics/Metagenomics/bioSkills/metagenome-visualization" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-metagenome-visuali && rm -rf "$T"

manifest: Skills/Genomics/Metagenomics/bioSkills/metagenome-visualization/SKILL.md

source content

name: bio-metagenomics-visualization description: Visualize metagenomic profiles using R (phyloseq, microbiome) and Python (matplotlib, seaborn). Create stacked bar plots, heatmaps, PCA plots, and diversity analyses. Use when creating publication-quality figures from MetaPhlAn, Bracken, or other taxonomic profiling output. tool_type: mixed primary_tool: phyloseq measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:

read_file
run_shell_command

Metagenome Visualization

Python - Stacked Bar Plot

import pandas as pd
import matplotlib.pyplot as plt

abundance = pd.read_csv('merged_abundance.txt', sep='\t', index_col=0)
abundance = abundance[abundance.index.str.contains('s__')]
abundance.index = abundance.index.str.split('|').str[-1].str.replace('s__', '')

top_n = 10
top_species = abundance.sum(axis=1).nlargest(top_n).index
abundance_top = abundance.loc[top_species]
abundance_top.loc['Other'] = abundance.drop(top_species).sum()

abundance_top.T.plot(kind='bar', stacked=True, figsize=(12, 6), colormap='tab20')
plt.xlabel('Sample')
plt.ylabel('Relative Abundance (%)')
plt.title('Species Composition')
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left')
plt.tight_layout()
plt.savefig('stacked_bar.png', dpi=300)

Python - Heatmap

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

abundance = pd.read_csv('merged_abundance.txt', sep='\t', index_col=0)
abundance = abundance[abundance.index.str.contains('s__')]
abundance.index = abundance.index.str.split('|').str[-1].str.replace('s__', '')

top_species = abundance.sum(axis=1).nlargest(20).index
abundance_top = abundance.loc[top_species]

plt.figure(figsize=(12, 10))
sns.heatmap(abundance_top, cmap='YlOrRd', annot=False, cbar_kws={'label': 'Abundance (%)'})
plt.xlabel('Sample')
plt.ylabel('Species')
plt.title('Species Abundance Heatmap')
plt.tight_layout()
plt.savefig('heatmap.png', dpi=300)

Python - PCA

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

abundance = pd.read_csv('merged_abundance.txt', sep='\t', index_col=0).T

scaler = StandardScaler()
abundance_scaled = scaler.fit_transform(abundance)

pca = PCA(n_components=2)
pca_result = pca.fit_transform(abundance_scaled)

plt.figure(figsize=(8, 6))
plt.scatter(pca_result[:, 0], pca_result[:, 1])
for i, sample in enumerate(abundance.index):
    plt.annotate(sample, (pca_result[i, 0], pca_result[i, 1]))
plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)')
plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)')
plt.title('PCA of Sample Composition')
plt.savefig('pca.png', dpi=300)

R - phyloseq Setup

library(phyloseq)
library(ggplot2)
library(vegan)

# From MetaPhlAn merged table
abundance <- read.table('merged_abundance.txt', sep = '\t', header = TRUE, row.names = 1)

# Filter to species level
species <- abundance[grepl('s__', rownames(abundance)), ]
rownames(species) <- sapply(strsplit(rownames(species), '\\|'), tail, 1)
rownames(species) <- gsub('s__', '', rownames(species))

# Create phyloseq object
otu <- otu_table(as.matrix(species), taxa_are_rows = TRUE)

# Sample metadata (create or load)
sample_data <- data.frame(
    Sample = colnames(species),
    Group = c('Control', 'Control', 'Treatment', 'Treatment'),
    row.names = colnames(species)
)
samp <- sample_data(sample_data)

ps <- phyloseq(otu, samp)

R - Stacked Bar Plot

library(phyloseq)
library(ggplot2)

# Top taxa
top_taxa <- names(sort(taxa_sums(ps), decreasing = TRUE))[1:10]
ps_top <- prune_taxa(top_taxa, ps)

# Stacked bar
plot_bar(ps_top, fill = 'Species') +
    geom_bar(stat = 'identity', position = 'stack') +
    theme_minimal() +
    labs(x = 'Sample', y = 'Relative Abundance (%)') +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

R - Ordination (PCoA)

library(phyloseq)
library(ggplot2)

# Bray-Curtis distance
ord <- ordinate(ps, method = 'PCoA', distance = 'bray')

# Plot ordination
plot_ordination(ps, ord, color = 'Group') +
    geom_point(size = 4) +
    stat_ellipse() +
    theme_minimal() +
    labs(title = 'PCoA of Sample Composition')

R - Alpha Diversity

library(phyloseq)
library(ggplot2)

# Calculate diversity metrics
alpha_div <- estimate_richness(ps, measures = c('Shannon', 'Simpson', 'Observed'))

# Add metadata
alpha_div$Group <- sample_data(ps)$Group

# Plot
ggplot(alpha_div, aes(x = Group, y = Shannon, fill = Group)) +
    geom_boxplot() +
    geom_jitter(width = 0.1) +
    theme_minimal() +
    labs(title = 'Alpha Diversity by Group', y = 'Shannon Index')

R - Beta Diversity (PERMANOVA)

library(vegan)

# Get abundance matrix
abundance_matrix <- as(otu_table(ps), 'matrix')
if (taxa_are_rows(ps)) abundance_matrix <- t(abundance_matrix)

# Calculate Bray-Curtis distance
dist_bc <- vegdist(abundance_matrix, method = 'bray')

# PERMANOVA
groups <- sample_data(ps)$Group
permanova <- adonis2(dist_bc ~ groups, permutations = 999)
permanova

Krona Chart

# From Kraken2 report
ktImportTaxonomy -q 1 -t 5 kraken_report.txt -o krona_chart.html

# From MetaPhlAn
metaphlan2krona.py -p profile.txt -k krona_profile.txt
ktImportText krona_profile.txt -o krona_metaphlan.html

Key Packages

Python

Package	Purpose
matplotlib	General plotting
seaborn	Statistical visualizations
scikit-learn	PCA, clustering
scipy	Statistical tests

R

Package	Purpose
phyloseq	Microbiome data handling
vegan	Community ecology
ggplot2	Visualization
microbiome	Additional analyses

Related Skills

kraken-classification - Generate input data
metaphlan-profiling - Generate input data
abundance-estimation - Process Kraken output