install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Immunology_Vaccines/bioSkills/neoantigen-prediction" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-neoantigen-predict && rm -rf "$T"
manifest:
Skills/Immunology_Vaccines/bioSkills/neoantigen-prediction/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-immunoinformatics-neoantigen-prediction description: Identify tumor neoantigens from somatic mutations using pVACtools for personalized cancer immunotherapy. Predict mutant peptides that bind patient HLA and may elicit T-cell responses. Use when identifying vaccine targets or checkpoint inhibitor response biomarkers from tumor sequencing data. tool_type: python primary_tool: pVACtools measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
Neoantigen Prediction
pVACtools Pipeline
# Install pVACtools pip install pvactools # Or use conda for dependencies conda create -n pvactools python=3.8 conda activate pvactools pip install pvactools # Download IEDB tools pvactools download_iedb_tools
pVACseq Workflow
# Run pVACseq on annotated VCF pvacseq run \ annotated.vcf \ sample_name \ "HLA-A*02:01,HLA-A*24:02,HLA-B*07:02,HLA-B*44:02" \ MHCflurry MHCnuggetsI \ output_dir \ -e1 8,9,10,11 \ --iedb-install-directory /path/to/iedb # Key parameters: # -e1: Epitope lengths for MHC-I (8-11) # -e2: Epitope lengths for MHC-II (15) # --binding-threshold: IC50 cutoff (default 500) # --percentile-threshold: Alternative cutoff
VCF Annotation Requirements
# pVACseq requires VEP-annotated VCF # Must include transcript and amino acid changes # Run VEP first vep -i somatic.vcf -o annotated.vcf \ --cache --offline \ --format vcf --vcf \ --plugin Downstream \ --plugin Wildtype \ --terms SO \ --symbol
Parse pVACseq Results
import pandas as pd def parse_pvacseq_results(results_file): '''Parse pVACseq output Key columns: - Mutation: Gene and amino acid change - HLA Allele: Patient HLA presenting this peptide - MT Epitope Seq: Mutant peptide sequence - WT Epitope Seq: Wild-type peptide sequence - Median MT Score: Binding affinity (nM) - Median WT Score: WT binding (for agretopicity) - Tumor DNA VAF: Variant allele frequency - Gene Expression: If RNA-seq available ''' df = pd.read_csv(results_file, sep='\t') # Filter by binding threshold strong_binders = df[df['Median MT Score'] < 500] return strong_binders def calculate_agretopicity(df): '''Calculate agretopicity (DAI) score Agretopicity = ratio of WT to MT binding Higher agretopicity means MT binds better than WT indicating mutation creates new epitope DAI (Differential Agretopicity Index): - >1: Mutant binds better (favorable) - ~1: Similar binding (less likely immunogenic) - <1: WT binds better (unfavorable) ''' df = df.copy() df['agretopicity'] = df['Median WT Score'] / df['Median MT Score'] # High agretopicity = mutation improves binding df['dai_favorable'] = df['agretopicity'] > 1 return df
Prioritize Neoantigens
def prioritize_neoantigens(df, vaf_threshold=0.1, expression_threshold=1.0): '''Prioritize neoantigens for vaccine design Criteria for good neoantigen candidates: 1. Strong MHC binding (IC50 < 500nM, ideally < 50nM) 2. High agretopicity (MT binds better than WT) 3. High tumor VAF (clonal, present in most tumor cells) 4. Expressed in tumor (if RNA-seq available) 5. Not in tolerogenic region (self-similarity check) Typical pipeline returns 10-50 candidates per patient ''' candidates = df.copy() # Filter by binding candidates = candidates[candidates['Median MT Score'] < 500] # Filter by VAF (clonal mutations preferred) if 'Tumor DNA VAF' in candidates.columns: candidates = candidates[candidates['Tumor DNA VAF'] >= vaf_threshold] # Filter by expression if 'Gene Expression' in candidates.columns: candidates = candidates[candidates['Gene Expression'] >= expression_threshold] # Calculate priority score # Lower binding affinity = better # Higher VAF = better # Higher agretopicity = better candidates['priority_score'] = ( (1 / candidates['Median MT Score']) * candidates.get('Tumor DNA VAF', 1) * candidates.get('agretopicity', 1) ) return candidates.sort_values('priority_score', ascending=False)
Alternative: Manual Neoantigen Pipeline
def manual_neoantigen_pipeline(vcf_file, hla_alleles, reference_fasta): '''Simplified neoantigen prediction without pVACtools Steps: 1. Extract coding mutations from VCF 2. Generate mutant protein sequences 3. Extract peptides around mutation 4. Predict MHC binding ''' from cyvcf2 import VCF from mhcflurry import Class1PresentationPredictor vcf = VCF(vcf_file) predictor = Class1PresentationPredictor.load() neoantigens = [] for variant in vcf: # Get amino acid change from VEP annotation if 'CSQ' not in variant.INFO: continue # Parse consequence and extract mutant peptides # ... (implementation depends on annotation format) # For each mutant peptide, predict binding for peptide in mutant_peptides: for allele in hla_alleles: pred = predictor.predict(peptides=[peptide], alleles=[allele]) if pred['mhcflurry_affinity'].values[0] < 500: neoantigens.append({ 'variant': f'{variant.CHROM}:{variant.POS}', 'peptide': peptide, 'allele': allele, 'affinity': pred['mhcflurry_affinity'].values[0] }) return neoantigens
Neoantigen Quality Metrics
def assess_neoantigen_quality(neoantigen): '''Assess multiple quality metrics for neoantigen Returns composite quality score considering: - Binding affinity - Agretopicity - Clonality (VAF) - Expression - Self-similarity ''' scores = {} # Binding (0-1, lower IC50 = higher score) ic50 = neoantigen.get('Median MT Score', 500) scores['binding'] = 1 - min(ic50 / 5000, 1) # Agretopicity (0-1) dai = neoantigen.get('agretopicity', 1) scores['agretopicity'] = min(dai / 10, 1) # Clonality (0-1) vaf = neoantigen.get('Tumor DNA VAF', 0.5) scores['clonality'] = vaf # Expression (0-1, log scale) import math expr = neoantigen.get('Gene Expression', 1) scores['expression'] = min(math.log10(expr + 1) / 3, 1) # Composite score weights = {'binding': 0.3, 'agretopicity': 0.3, 'clonality': 0.2, 'expression': 0.2} composite = sum(scores[k] * weights[k] for k in weights) return composite, scores
Related Skills
- immunoinformatics/mhc-binding-prediction - MHC binding details
- immunoinformatics/immunogenicity-scoring - Prioritization
- variant-calling/variant-calling - Input somatic mutations