install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Structural_Biology/bioSkills/alphafold-predictions" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-alphafold-predicti && rm -rf "$T"
manifest:
Skills/Structural_Biology/bioSkills/alphafold-predictions/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-structural-biology-alphafold-predictions description: Access and analyze AlphaFold protein structure predictions. Use when predicted structures are needed for proteins without experimental structures, or for confidence scores (pLDDT). tool_type: python primary_tool: requests measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
AlphaFold Predictions
Download and analyze AlphaFold predicted protein structures from the AlphaFold Protein Structure Database.
Download Structures
Single Structure by UniProt ID
import requests def download_alphafold(uniprot_id, output_dir='.'): '''Download AlphaFold structure for UniProt accession''' base_url = 'https://alphafold.ebi.ac.uk/files' pdb_url = f'{base_url}/AF-{uniprot_id}-F1-model_v4.pdb' cif_url = f'{base_url}/AF-{uniprot_id}-F1-model_v4.cif' response = requests.get(pdb_url) if response.status_code == 200: output_path = f'{output_dir}/AF-{uniprot_id}-F1-model_v4.pdb' with open(output_path, 'w') as f: f.write(response.text) return output_path return None pdb_file = download_alphafold('P04637') # Human p53
Check Availability
def check_alphafold_exists(uniprot_id): '''Check if AlphaFold prediction exists''' url = f'https://alphafold.ebi.ac.uk/api/prediction/{uniprot_id}' response = requests.get(url) return response.status_code == 200 if check_alphafold_exists('P04637'): print('AlphaFold structure available')
Get Metadata
def get_alphafold_info(uniprot_id): '''Get AlphaFold prediction metadata''' url = f'https://alphafold.ebi.ac.uk/api/prediction/{uniprot_id}' response = requests.get(url) if response.status_code == 200: return response.json()[0] return None info = get_alphafold_info('P04637') print(f"Gene: {info['gene']}") print(f"Organism: {info['organismScientificName']}") print(f"Model version: {info['latestVersion']}")
File Types Available
Database version v4 (current as of 2025). The version number refers to the database release, not the AlphaFold model version.
| File | URL Pattern | Description |
|---|---|---|
| PDB | | Structure coordinates |
| mmCIF | | Structure with metadata |
| PAE JSON | | Predicted aligned error |
def download_pae(uniprot_id, output_dir='.'): '''Download PAE (predicted aligned error) matrix''' url = f'https://alphafold.ebi.ac.uk/files/AF-{uniprot_id}-F1-predicted_aligned_error_v4.json' response = requests.get(url) if response.status_code == 200: output_path = f'{output_dir}/AF-{uniprot_id}-F1-pae.json' with open(output_path, 'w') as f: f.write(response.text) return output_path return None
Analyze pLDDT Confidence Scores
Extract from PDB B-factors
AlphaFold stores pLDDT scores in the B-factor column.
from Bio.PDB import PDBParser def extract_plddt(pdb_file): '''Extract pLDDT confidence scores from AlphaFold PDB''' parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', pdb_file) residue_plddt = {} for model in structure: for chain in model: for residue in chain: if residue.id[0] == ' ': # Standard residue ca = residue['CA'] if 'CA' in residue else list(residue.get_atoms())[0] residue_plddt[residue.id[1]] = ca.get_bfactor() return residue_plddt plddt = extract_plddt('AF-P04637-F1-model_v4.pdb') avg_plddt = sum(plddt.values()) / len(plddt) print(f'Average pLDDT: {avg_plddt:.1f}')
Confidence Interpretation
| pLDDT | Confidence | Interpretation |
|---|---|---|
| >90 | Very high | High accuracy, can be used as experimental |
| 70-90 | Confident | Good backbone, may have sidechain errors |
| 50-70 | Low | Caution, may be disordered |
| <50 | Very low | Likely disordered or wrong |
Plot pLDDT per Residue
import matplotlib.pyplot as plt def plot_plddt(plddt_dict, output='plddt_plot.png'): residues = sorted(plddt_dict.keys()) scores = [plddt_dict[r] for r in residues] plt.figure(figsize=(12, 4)) plt.fill_between(residues, scores, alpha=0.3) plt.plot(residues, scores) plt.axhline(y=70, color='orange', linestyle='--', label='Confident threshold') plt.axhline(y=90, color='green', linestyle='--', label='Very high threshold') plt.xlabel('Residue') plt.ylabel('pLDDT') plt.ylim(0, 100) plt.legend() plt.savefig(output) plt.close() plot_plddt(plddt)
Analyze PAE (Predicted Aligned Error)
import json import numpy as np import matplotlib.pyplot as plt def load_pae(pae_file): '''Load PAE matrix from JSON''' with open(pae_file) as f: data = json.load(f) # AlphaFold v4 format if 'predicted_aligned_error' in data[0]: return np.array(data[0]['predicted_aligned_error']) # Older format return np.array(data['predicted_aligned_error']) def plot_pae(pae_matrix, output='pae_plot.png'): plt.figure(figsize=(8, 8)) plt.imshow(pae_matrix, cmap='Greens_r', vmin=0, vmax=30) plt.colorbar(label='Expected position error (A)') plt.xlabel('Scored residue') plt.ylabel('Aligned residue') plt.title('Predicted Aligned Error') plt.savefig(output) plt.close() pae = load_pae('AF-P04637-F1-pae.json') plot_pae(pae)
PAE Interpretation
- Low PAE (green): Residues have well-defined relative positions
- High PAE (white): Uncertain relative positions (flexible linkers, domains)
- Diagonal blocks: Distinct structural domains
Batch Download
def batch_download_alphafold(uniprot_ids, output_dir='.'): '''Download multiple AlphaFold structures''' import os os.makedirs(output_dir, exist_ok=True) results = {} for uid in uniprot_ids: pdb_file = download_alphafold(uid, output_dir) results[uid] = pdb_file if pdb_file: print(f'Downloaded: {uid}') else: print(f'Not found: {uid}') return results ids = ['P04637', 'P53_HUMAN', 'Q9Y6K9'] files = batch_download_alphafold(ids, 'alphafold_structures')
Compare with Experimental Structure
from Bio.PDB import PDBParser, Superimposer def compare_structures(alphafold_pdb, experimental_pdb): '''Calculate RMSD between AlphaFold and experimental structure''' parser = PDBParser(QUIET=True) af_struct = parser.get_structure('af', alphafold_pdb) exp_struct = parser.get_structure('exp', experimental_pdb) # Get CA atoms from first chain af_atoms = [r['CA'] for r in af_struct[0].get_residues() if 'CA' in r] exp_atoms = [r['CA'] for r in exp_struct[0].get_residues() if 'CA' in r] # Align by length (simple approach) min_len = min(len(af_atoms), len(exp_atoms)) af_atoms = af_atoms[:min_len] exp_atoms = exp_atoms[:min_len] super_imposer = Superimposer() super_imposer.set_atoms(exp_atoms, af_atoms) rmsd = super_imposer.rms return rmsd
Related Skills
- structural-biology/structure-io - Load and parse PDB/mmCIF files
- structural-biology/geometric-analysis - RMSD, superimposition
- database-access/uniprot-access - Get UniProt IDs for proteins
- structural-biology/structure-navigation - Navigate structure hierarchy