install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Population_Genetics/epidemiological-genomics/transmission-inference" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-transmission-infer && rm -rf "$T"
manifest:
Skills/Population_Genetics/epidemiological-genomics/transmission-inference/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-epidemiological-genomics-transmission-inference description: Infer pathogen transmission networks and identify likely transmission pairs using TransPhylo and outbreak reconstruction algorithms. Estimate who-infected-whom from genomic and epidemiological data. Use when investigating outbreak transmission chains or identifying superspreaders. tool_type: r primary_tool: TransPhylo measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
Transmission Inference
TransPhylo in R
library(TransPhylo) library(ape) # Load dated phylogeny (from BEAST/TreeTime) tree <- read.nexus('dated_tree.nexus') # Convert to TransPhylo format ptree <- ptreeFromPhylo(tree, dateLastSample = 2020.5) # Estimate transmission tree # Uses MCMC to sample from posterior distribution res <- inferTTree( ptree, mcmcIterations = 100000, startNeg = 0.1, # Initial within-host effective population startOff.r = 2, # Initial R0 estimate startOff.p = 0.5, # Initial sampling probability startPi = 0.9, # Initial probability of being sampled dateT = 2020.6 # End of outbreak observation ) # Extract consensus transmission tree ttree <- extractTTree(res) # Get transmission pairs pairs <- ttree$ttree[, c('infector', 'infectee', 'time')]
Prepare Data
def prepare_for_transphylo(dated_tree_file, sample_dates, output_prefix): '''Prepare inputs for TransPhylo analysis Requirements: - Time-scaled phylogeny (from TreeTime or BEAST) - Sample collection dates - Tips must have matching names TransPhylo estimates: - Who infected whom - Unsampled cases in the transmission chain - R0 and generation time ''' from Bio import Phylo import pandas as pd tree = Phylo.read(dated_tree_file, 'nexus') # Verify all tips have dates dates_df = pd.read_csv(sample_dates, sep='\t') tip_names = {clade.name for clade in tree.get_terminals()} dated_names = set(dates_df['name']) missing = tip_names - dated_names if missing: print(f'Warning: {len(missing)} tips without dates: {missing}') return {'tree': dated_tree_file, 'dates': sample_dates}
Interpret Results
# Analyze TransPhylo output # Get median transmission tree med_tree <- medTTree(res) # Plot transmission tree plot(med_tree) # Get R0 estimate r0_samples <- res$record[, 'off.r'] cat('R0 estimate:', median(r0_samples), '\n') cat('95% CI:', quantile(r0_samples, c(0.025, 0.975)), '\n') # Identify superspreaders # Count number infected by each case infections_per_case <- table(med_tree$ttree[, 'infector']) superspreaders <- names(infections_per_case[infections_per_case > 3])
Python Alternative: outbreaker2 Wrapper
def infer_transmission_simple(distance_matrix, dates, generation_time=5): '''Simplified transmission inference Uses genomic distance and collection dates to infer likely transmission pairs. Less sophisticated than TransPhylo but doesn't require dated phylogeny. Criteria for transmission pair (A -> B): 1. A collected before B 2. Genomic distance consistent with direct transmission 3. Time difference compatible with generation time ''' import pandas as pd import numpy as np n = len(dates) transmission_pairs = [] for i in range(n): for j in range(n): if i == j: continue time_diff = dates[j] - dates[i] # Days between collection # Potential infector must be sampled first if time_diff <= 0: continue # Check if time difference is compatible # Generation time: time between infection of case and infection of secondary # Serial interval: time between symptom onset (often used as proxy) if time_diff > generation_time * 3: # Too much time continue # Check genomic distance snp_diff = distance_matrix[i, j] # Expected SNPs = rate * time # For most pathogens, direct transmission = 0-5 SNP difference expected_snps = (time_diff / 365) * 10 # Rough estimate if snp_diff <= max(5, expected_snps * 2): transmission_pairs.append({ 'infector': i, 'infectee': j, 'snp_distance': snp_diff, 'days_between': time_diff, 'confidence': 'high' if snp_diff <= 2 else 'moderate' }) return pd.DataFrame(transmission_pairs)
Network Visualization
def plot_transmission_network(pairs_df, metadata=None): '''Visualize transmission network Uses networkx to create directed graph of transmissions. ''' import networkx as nx import matplotlib.pyplot as plt G = nx.DiGraph() for _, row in pairs_df.iterrows(): G.add_edge(row['infector'], row['infectee'], weight=row.get('confidence', 1)) # Layout pos = nx.spring_layout(G) # Draw plt.figure(figsize=(12, 8)) nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=500, arrows=True, arrowsize=20) plt.title('Transmission Network') return plt.gcf()
Superspreader Analysis
def identify_superspreaders(transmission_pairs, threshold=3): '''Identify superspreading events Superspreader: Individual who infected many others Threshold typically 80/20 rule: 20% of cases cause 80% of transmission Common threshold: >3 secondary cases ''' from collections import Counter infector_counts = Counter(transmission_pairs['infector']) superspreaders = {k: v for k, v in infector_counts.items() if v >= threshold} total_transmissions = sum(infector_counts.values()) ss_transmissions = sum(superspreaders.values()) print(f'Superspreaders (>{threshold} secondary cases):') for ss, count in sorted(superspreaders.items(), key=lambda x: -x[1]): print(f' Case {ss}: {count} secondary infections') print(f'\nSuperspreading contribution: {ss_transmissions/total_transmissions:.1%}') return superspreaders
Related Skills
- epidemiological-genomics/phylodynamics - Generate dated trees
- epidemiological-genomics/pathogen-typing - Identify outbreak clones
- data-visualization/interactive-visualization - Visualize transmission