LLMs-Universal-Life-Science-and-Clinical-Skills- epitope-prediction

<!--

install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Immunology_Vaccines/bioSkills/epitope-prediction" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-epitope-prediction && rm -rf "$T"
manifest: Skills/Immunology_Vaccines/bioSkills/epitope-prediction/SKILL.md
source content
<!-- # COPYRIGHT NOTICE # This file is part of the "Universal Biomedical Skills" project. # Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu> # All Rights Reserved. # # This code is proprietary and confidential. # Unauthorized copying of this file, via any medium is strictly prohibited. # # Provenance: Authenticated by MD BABU MIA -->

name: bio-immunoinformatics-epitope-prediction description: Predict B-cell and T-cell epitopes using BepiPred, IEDB tools, and structure-based methods for vaccine and antibody design. Identify immunogenic regions in antigens. Use when designing vaccines, mapping antibody binding sites, or predicting immunogenic peptides. tool_type: python primary_tool: BepiPred measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:

  • read_file
  • run_shell_command

Epitope Prediction

B-Cell Epitope Prediction

BepiPred-2.0 (Sequence-Based)

import requests

def predict_bcell_epitopes_iedb(sequence, method='bepipred2'):
    '''Predict B-cell epitopes using IEDB API

    Methods:
    - bepipred2: Deep learning (recommended)
    - bepipred: Original BepiPred
    - emini: Surface accessibility
    - kolaskar-tongaonkar: Antigenicity
    - parker: Hydrophilicity

    BepiPred-2.0 uses deep learning on crystal structures
    Threshold: >0.5 predicted as epitope (default)
    '''
    url = 'http://tools-cluster-interface.iedb.org/tools_api/bcell/'

    params = {
        'method': method,
        'sequence_text': sequence
    }

    response = requests.post(url, data=params)

    # Parse response (tab-separated)
    lines = response.text.strip().split('\n')
    header = lines[0].split('\t')
    data = [line.split('\t') for line in lines[1:]]

    return header, data

Parse BepiPred Results

import pandas as pd

def parse_bepipred_results(header, data, threshold=0.5):
    '''Parse BepiPred output and identify epitope regions

    Output columns:
    - Position: Amino acid position
    - Residue: Amino acid
    - Score: BepiPred score (higher = more likely epitope)

    Epitope threshold:
    - >0.5: Default, balanced sensitivity/specificity
    - >0.6: More stringent, fewer false positives
    - >0.4: More sensitive, more candidates
    '''
    df = pd.DataFrame(data, columns=header)
    df['Score'] = df['Score'].astype(float)
    df['Position'] = df['Position'].astype(int)

    # Identify epitope regions
    df['is_epitope'] = df['Score'] > threshold

    # Find continuous epitope regions
    epitopes = []
    current_epitope = []

    for _, row in df.iterrows():
        if row['is_epitope']:
            current_epitope.append(row)
        else:
            if len(current_epitope) >= 5:  # Minimum epitope length
                epitopes.append({
                    'start': current_epitope[0]['Position'],
                    'end': current_epitope[-1]['Position'],
                    'sequence': ''.join(r['Residue'] for r in current_epitope),
                    'avg_score': sum(r['Score'] for r in current_epitope) / len(current_epitope)
                })
            current_epitope = []

    return df, epitopes

T-Cell Epitope Prediction

def predict_tcell_epitopes_iedb(sequence, alleles, method='recommended'):
    '''Predict T-cell epitopes using IEDB

    MHC-I methods:
    - recommended: Consensus of methods
    - netmhcpan_ba: NetMHCpan binding affinity
    - netmhcpan_el: NetMHCpan eluted ligand

    MHC-II methods:
    - recommended
    - netmhciipan
    '''
    url = 'http://tools-cluster-interface.iedb.org/tools_api/mhci/'

    results = []
    for allele in alleles:
        params = {
            'method': method,
            'sequence_text': sequence,
            'allele': allele,
            'length': '9'  # Most common for MHC-I
        }

        response = requests.post(url, data=params)
        # Parse results...

    return results

Linear vs Conformational Epitopes

def classify_epitope_type(epitope_info):
    '''Classify epitope as linear or conformational

    Linear (continuous) epitopes:
    - Consecutive amino acids in primary sequence
    - ~10% of B-cell epitopes
    - Easier to predict from sequence

    Conformational (discontinuous) epitopes:
    - Non-consecutive residues brought together by folding
    - ~90% of B-cell epitopes
    - Requires structure for prediction
    '''
    pass


def predict_conformational_epitopes(pdb_file, chain='A'):
    '''Predict conformational B-cell epitopes from structure

    Uses surface accessibility and protrusion index.
    Requires 3D structure (PDB/mmCIF).

    Tools:
    - DiscoTope 2.0 (structure-based)
    - ElliPro (protrusion)
    - SEPPA 3.0
    '''
    # Structure-based prediction requires specialized tools
    # Usually accessed via web servers

    print('For conformational epitopes:')
    print('- DiscoTope: http://tools.iedb.org/discotope/')
    print('- ElliPro: http://tools.iedb.org/ellipro/')
    pass

Combine Multiple Predictions

def consensus_epitope_prediction(sequence, methods=['bepipred2', 'emini', 'parker']):
    '''Combine multiple prediction methods

    Consensus approach improves reliability:
    - Regions predicted by multiple methods more reliable
    - Different methods capture different properties

    Scoring:
    - 3/3 methods: High confidence
    - 2/3 methods: Moderate confidence
    - 1/3 methods: Low confidence
    '''
    all_results = {}

    for method in methods:
        header, data = predict_bcell_epitopes_iedb(sequence, method)
        df = pd.DataFrame(data, columns=header)
        all_results[method] = df

    # Combine scores
    consensus = all_results[methods[0]][['Position', 'Residue']].copy()

    for method in methods:
        threshold = 0.5 if method == 'bepipred2' else 0  # Method-specific thresholds
        all_results[method]['is_epitope'] = all_results[method]['Score'].astype(float) > threshold
        consensus[method] = all_results[method]['is_epitope'].astype(int)

    consensus['consensus_score'] = consensus[methods].sum(axis=1)
    consensus['confidence'] = consensus['consensus_score'].map({
        3: 'high', 2: 'moderate', 1: 'low', 0: 'none'
    })

    return consensus

Epitope Mapping from Experimental Data

def map_epitopes_from_peptide_array(array_results, overlap=11):
    '''Map epitopes from peptide array experiments

    Peptide arrays test binding of overlapping peptides
    covering the entire antigen sequence.

    Args:
        array_results: Dict mapping peptide -> signal intensity
        overlap: Overlap between consecutive peptides

    Returns:
        Epitope map with per-residue scores
    '''
    # Implementation would process experimental binding data
    pass

Related Skills

  • immunoinformatics/mhc-binding-prediction - T-cell epitope prediction
  • immunoinformatics/immunogenicity-scoring - Epitope ranking
  • structural-biology/geometric-analysis - Structure-based epitopes
<!-- AUTHOR_SIGNATURE: 9a7f3c2e-MD-BABU-MIA-2026-MSSM-SECURE -->