install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Immunology_Vaccines/bioSkills/tcr-epitope-binding" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-tcr-epitope-bindin && rm -rf "$T"
manifest:
Skills/Immunology_Vaccines/bioSkills/tcr-epitope-binding/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-immunoinformatics-tcr-epitope-binding description: Predict TCR-epitope specificity using ERGO-II and deep learning models for T-cell receptor antigen recognition. Match TCRs to their cognate epitopes or predict TCR targets. Use when analyzing TCR repertoire specificity or identifying antigen-reactive T-cells. tool_type: python primary_tool: ERGO-II measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
TCR-Epitope Binding
ERGO-II Model
# ERGO-II uses deep learning to predict TCR-epitope binding # GitHub: https://github.com/IdoSpringer/ERGO-II def setup_ergo(): '''Setup ERGO-II for TCR-epitope prediction Requirements: - PyTorch - Pre-trained models from ERGO-II repository ERGO-II features: - Uses both CDR3 alpha and beta chains - Incorporates MHC context - Trained on VDJdb and IEDB data ''' print('ERGO-II setup:') print('1. Clone: git clone https://github.com/IdoSpringer/ERGO-II') print('2. Install: pip install torch pandas scikit-learn') print('3. Download models from repository')
TCR Input Format
def parse_tcr_data(tcr_file): '''Parse TCR sequence data Required columns: - cdr3_beta: CDR3 beta chain sequence (most informative) - cdr3_alpha: CDR3 alpha chain (optional, improves accuracy) - v_beta: V gene usage (optional) - j_beta: J gene usage (optional) CDR3 is the primary determinant of antigen specificity. Alpha chain provides ~20% additional specificity. ''' import pandas as pd df = pd.read_csv(tcr_file, sep='\t') # Validate CDR3 sequences valid_aa = set('ACDEFGHIKLMNPQRSTVWY') def is_valid_cdr3(seq): if pd.isna(seq): return False return all(aa in valid_aa for aa in seq.upper()) df['valid_beta'] = df['cdr3_beta'].apply(is_valid_cdr3) return df[df['valid_beta']]
Predict TCR-Epitope Binding
def predict_binding_simple(cdr3_beta, epitope): '''Simple TCR-epitope compatibility score This is a simplified heuristic. For accurate predictions, use ERGO-II or other deep learning models. Features considered: - CDR3 length compatibility - Amino acid composition - Hydrophobicity matching ''' # Length compatibility # TCRs recognizing similar epitopes often have similar CDR3 lengths optimal_length = len(epitope) + 5 # Rough heuristic length_score = 1 - abs(len(cdr3_beta) - optimal_length) / 10 # Charge complementarity positive = set('RKH') negative = set('DE') tcr_charge = sum(1 if aa in positive else -1 if aa in negative else 0 for aa in cdr3_beta) epitope_charge = sum(1 if aa in positive else -1 if aa in negative else 0 for aa in epitope) # Opposite charges suggest complementarity charge_score = 0.5 + (tcr_charge * -epitope_charge) / 20 return { 'cdr3_beta': cdr3_beta, 'epitope': epitope, 'length_score': max(0, min(1, length_score)), 'charge_score': max(0, min(1, charge_score)), 'combined': (length_score + charge_score) / 2 }
Match TCRs to Known Epitopes
def match_to_vdjdb(tcr_sequences, vdjdb_path='vdjdb.tsv'): '''Match TCRs to known epitopes in VDJdb VDJdb is a curated database of TCR-epitope pairs. Download from: https://vdjdb.cdr3.net/ Matching approaches: - Exact CDR3 match - Similar CDR3 (edit distance ≤1) - Cluster-based (group similar TCRs) ''' import pandas as pd from difflib import SequenceMatcher vdjdb = pd.read_csv(vdjdb_path, sep='\t') matches = [] for tcr in tcr_sequences: # Exact match exact = vdjdb[vdjdb['cdr3'] == tcr] if len(exact) > 0: matches.append({ 'query_tcr': tcr, 'match_type': 'exact', 'epitopes': exact['antigen.epitope'].tolist(), 'species': exact['antigen.species'].tolist() }) continue # Fuzzy match (1 mismatch) for _, row in vdjdb.iterrows(): similarity = SequenceMatcher(None, tcr, row['cdr3']).ratio() if similarity > 0.9: # >90% similar matches.append({ 'query_tcr': tcr, 'match_type': 'similar', 'similarity': similarity, 'db_tcr': row['cdr3'], 'epitope': row['antigen.epitope'], 'species': row['antigen.species'] }) return pd.DataFrame(matches)
TCR Clustering
def cluster_tcrs_by_specificity(tcr_sequences, method='levenshtein'): '''Cluster TCRs likely to share specificity TCRs recognizing the same epitope often have: - Similar CDR3 length - Shared motifs - Similar V gene usage Methods: - levenshtein: Edit distance clustering - tcrdist: TCRdist3 distance metric - deep: Deep learning embeddings ''' from scipy.cluster.hierarchy import linkage, fcluster from scipy.spatial.distance import pdist, squareform import numpy as np def levenshtein_distance(s1, s2): if len(s1) < len(s2): return levenshtein_distance(s2, s1) if len(s2) == 0: return len(s1) previous_row = range(len(s2) + 1) for i, c1 in enumerate(s1): current_row = [i + 1] for j, c2 in enumerate(s2): insertions = previous_row[j + 1] + 1 deletions = current_row[j] + 1 substitutions = previous_row[j] + (c1 != c2) current_row.append(min(insertions, deletions, substitutions)) previous_row = current_row return previous_row[-1] # Calculate pairwise distances n = len(tcr_sequences) distances = np.zeros((n, n)) for i in range(n): for j in range(i + 1, n): d = levenshtein_distance(tcr_sequences[i], tcr_sequences[j]) distances[i, j] = distances[j, i] = d # Cluster condensed = squareform(distances) Z = linkage(condensed, method='average') clusters = fcluster(Z, t=3, criterion='distance') # Max 3 edits return dict(zip(tcr_sequences, clusters))
Analyze Repertoire Specificity
def analyze_repertoire_specificity(tcr_df, epitope_db): '''Analyze antigen specificity of TCR repertoire Reports: - Fraction matching known epitopes - Epitope diversity - Potential public TCRs (shared across individuals) ''' results = { 'total_tcrs': len(tcr_df), 'unique_cdr3': tcr_df['cdr3_beta'].nunique(), 'matched_epitopes': 0, 'epitope_distribution': {} } # Match to database matched = match_to_vdjdb(tcr_df['cdr3_beta'].unique(), epitope_db) if len(matched) > 0: results['matched_epitopes'] = len(matched['query_tcr'].unique()) results['epitope_distribution'] = matched['epitope'].value_counts().to_dict() return results
Related Skills
- tcr-bcr-analysis/mixcr-analysis - TCR repertoire sequencing analysis
- immunoinformatics/mhc-binding-prediction - Epitope context
- single-cell/clustering - Single-cell TCR analysis