install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Structural_Biology/bioSkills/geometric-analysis" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-geometric-analysis && rm -rf "$T"
manifest:
Skills/Structural_Biology/bioSkills/geometric-analysis/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-pdb-geometric-analysis description: Perform geometric calculations on protein structures using Biopython Bio.PDB. Use when measuring distances, angles, and dihedrals, superimposing structures, calculating RMSD, or computing solvent accessible surface area (SASA). tool_type: python primary_tool: Bio.PDB measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
Geometric Analysis
Measure distances, angles, and dihedrals. Superimpose structures and calculate RMSD. Find neighbor atoms and contacts.
Required Imports
from Bio.PDB import PDBParser, NeighborSearch, Superimposer from Bio.PDB import calc_angle, calc_dihedral import numpy as np
Distance Between Atoms
from Bio.PDB import PDBParser parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb') chain = structure[0]['A'] atom1 = chain[100]['CA'] atom2 = chain[200]['CA'] # Direct subtraction gives distance distance = atom1 - atom2 print(f'Distance: {distance:.2f} Angstroms') # Or use numpy import numpy as np distance = np.linalg.norm(atom1.coord - atom2.coord)
Distance Matrix
import numpy as np from Bio.PDB import PDBParser parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb') ca_atoms = [r['CA'] for r in structure.get_residues() if r.has_id('CA') and r.id[0] == ' '] n = len(ca_atoms) dist_matrix = np.zeros((n, n)) for i in range(n): for j in range(i + 1, n): dist = ca_atoms[i] - ca_atoms[j] dist_matrix[i, j] = dist dist_matrix[j, i] = dist print(f'Distance matrix shape: {dist_matrix.shape}')
Angle Between Three Atoms
from Bio.PDB import PDBParser, calc_angle parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb') residue = structure[0]['A'][100] n = residue['N'] ca = residue['CA'] c = residue['C'] # calc_angle takes Vector objects (atom.coord returns array, use atom.get_vector()) angle_rad = calc_angle(n.get_vector(), ca.get_vector(), c.get_vector()) angle_deg = np.degrees(angle_rad) print(f'N-CA-C angle: {angle_deg:.1f} degrees')
Dihedral Angles
from Bio.PDB import PDBParser, calc_dihedral import numpy as np parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb') chain = structure[0]['A'] # Calculate phi angle (C-N-CA-C) res_prev = chain[99] res_curr = chain[100] phi = calc_dihedral( res_prev['C'].get_vector(), res_curr['N'].get_vector(), res_curr['CA'].get_vector(), res_curr['C'].get_vector() ) print(f'Phi: {np.degrees(phi):.1f} degrees') # Calculate psi angle (N-CA-C-N) res_next = chain[101] psi = calc_dihedral( res_curr['N'].get_vector(), res_curr['CA'].get_vector(), res_curr['C'].get_vector(), res_next['N'].get_vector() ) print(f'Psi: {np.degrees(psi):.1f} degrees')
Ramachandran Angles for All Residues
from Bio.PDB import PDBParser, PPBuilder, calc_dihedral import numpy as np parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb') ppb = PPBuilder() phi_psi = [] for pp in ppb.build_peptides(structure): angles = pp.get_phi_psi_list() for residue, (phi, psi) in zip(pp, angles): if phi is not None and psi is not None: phi_psi.append((residue.resname, np.degrees(phi), np.degrees(psi))) for name, phi, psi in phi_psi[:10]: print(f'{name}: phi={phi:.1f}, psi={psi:.1f}')
Finding Neighbor Atoms
from Bio.PDB import PDBParser, NeighborSearch parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb') # Build search tree from all atoms all_atoms = list(structure.get_atoms()) ns = NeighborSearch(all_atoms) # Find atoms within radius of a point center = structure[0]['A'][100]['CA'].coord radius = 5.0 # Angstroms neighbors = ns.search(center, radius) print(f'Found {len(neighbors)} atoms within {radius}A') # Find atoms within radius of another atom ca_atom = structure[0]['A'][100]['CA'] neighbors = ns.search(ca_atom.coord, 5.0)
Finding Residue Contacts
from Bio.PDB import PDBParser, NeighborSearch parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb') all_atoms = list(structure.get_atoms()) ns = NeighborSearch(all_atoms) # Find all atom pairs within distance contact_distance = 4.0 contacts = ns.search_all(contact_distance, level='R') # R = residue level print(f'Found {len(contacts)} residue contacts within {contact_distance}A') for res1, res2 in contacts[:10]: print(f' {res1.resname}{res1.id[1]} - {res2.resname}{res2.id[1]}')
Contact Levels
from Bio.PDB import NeighborSearch # search_all returns pairs at specified level # Level codes: A=atom, R=residue, C=chain, M=model, S=structure atom_contacts = ns.search_all(4.0, level='A') # Atom pairs residue_contacts = ns.search_all(4.0, level='R') # Residue pairs chain_contacts = ns.search_all(10.0, level='C') # Chain pairs
Superimposing Structures
from Bio.PDB import PDBParser, Superimposer parser = PDBParser(QUIET=True) ref_structure = parser.get_structure('ref', 'reference.pdb') mobile_structure = parser.get_structure('mobile', 'mobile.pdb') # Get CA atoms from both structures ref_atoms = [r['CA'] for r in ref_structure.get_residues() if r.has_id('CA') and r.id[0] == ' '] mobile_atoms = [r['CA'] for r in mobile_structure.get_residues() if r.has_id('CA') and r.id[0] == ' '] # Ensure same number of atoms n = min(len(ref_atoms), len(mobile_atoms)) ref_atoms = ref_atoms[:n] mobile_atoms = mobile_atoms[:n] # Superimpose sup = Superimposer() sup.set_atoms(ref_atoms, mobile_atoms) print(f'RMSD before: {sup.rms:.2f} Angstroms') # Apply transformation to all atoms in mobile structure sup.apply(mobile_structure.get_atoms())
Calculating RMSD
from Bio.PDB import PDBParser, Superimposer import numpy as np parser = PDBParser(QUIET=True) struct1 = parser.get_structure('s1', 'structure1.pdb') struct2 = parser.get_structure('s2', 'structure2.pdb') atoms1 = [r['CA'] for r in struct1.get_residues() if r.has_id('CA') and r.id[0] == ' '] atoms2 = [r['CA'] for r in struct2.get_residues() if r.has_id('CA') and r.id[0] == ' '] # Using Superimposer (with alignment) sup = Superimposer() sup.set_atoms(atoms1, atoms2) rmsd_aligned = sup.rms print(f'RMSD (aligned): {rmsd_aligned:.2f} A') # Raw RMSD (no alignment) coords1 = np.array([a.coord for a in atoms1]) coords2 = np.array([a.coord for a in atoms2]) rmsd_raw = np.sqrt(np.mean(np.sum((coords1 - coords2) ** 2, axis=1))) print(f'RMSD (raw): {rmsd_raw:.2f} A')
CEAligner for Dissimilar Structures
from Bio.PDB import PDBParser, CEAligner parser = PDBParser(QUIET=True) ref = parser.get_structure('ref', 'reference.pdb') mobile = parser.get_structure('mobile', 'query.pdb') # CE alignment works for structures with different sequences aligner = CEAligner() aligner.set_reference(ref) aligner.align(mobile) print(f'RMSD: {aligner.rms:.2f} A') # CEAligner automatically modifies mobile structure coordinates
Center of Mass
from Bio.PDB import PDBParser import numpy as np parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb') # Unweighted center (geometric center) coords = np.array([a.coord for a in structure.get_atoms()]) center = coords.mean(axis=0) print(f'Geometric center: {center}') # Mass-weighted center (approximate - uses atom count) # For accurate mass-weighted, use element masses atoms = list(structure.get_atoms()) masses = {'C': 12.0, 'N': 14.0, 'O': 16.0, 'S': 32.0, 'H': 1.0} total_mass = 0 weighted_sum = np.zeros(3) for atom in atoms: mass = masses.get(atom.element, 12.0) weighted_sum += mass * atom.coord total_mass += mass center_of_mass = weighted_sum / total_mass print(f'Center of mass: {center_of_mass}')
Radius of Gyration
import numpy as np from Bio.PDB import PDBParser parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb') coords = np.array([a.coord for a in structure.get_atoms()]) center = coords.mean(axis=0) # Radius of gyration rg = np.sqrt(np.mean(np.sum((coords - center) ** 2, axis=1))) print(f'Radius of gyration: {rg:.2f} A')
Finding Surface Residues
from Bio.PDB import PDBParser, NeighborSearch parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb') # Simple approach: residues with few neighbors all_atoms = list(structure.get_atoms()) ns = NeighborSearch(all_atoms) surface_residues = [] for residue in structure.get_residues(): if residue.id[0] != ' ': continue if not residue.has_id('CA'): continue ca = residue['CA'] neighbors = ns.search(ca.coord, 10.0, level='R') if len(neighbors) < 15: # Threshold for surface surface_residues.append(residue) print(f'Surface residues: {len(surface_residues)}')
Vector Operations
from Bio.PDB import PDBParser from Bio.PDB.vectors import Vector, rotaxis parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb') # Get vectors from atoms atom1 = structure[0]['A'][100]['CA'] atom2 = structure[0]['A'][101]['CA'] v1 = atom1.get_vector() v2 = atom2.get_vector() # Vector operations diff = v2 - v1 print(f'Distance vector: {diff}') print(f'Length: {diff.norm():.2f}') # Normalize unit = diff.normalized() # Cross product cross = v1 ** v2 # Dot product dot = v1 * v2
Chi Angles (Sidechain Dihedrals)
from Bio.PDB import PDBParser, calc_dihedral import numpy as np parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb') # Chi1 angle for a residue (N-CA-CB-CG) residue = structure[0]['A'][100] if residue.has_id('CB') and residue.has_id('CG'): chi1 = calc_dihedral( residue['N'].get_vector(), residue['CA'].get_vector(), residue['CB'].get_vector(), residue['CG'].get_vector() ) print(f'Chi1: {np.degrees(chi1):.1f} degrees')
Solvent Accessible Surface Area (SASA)
from Bio.PDB import PDBParser from Bio.PDB.SASA import ShrakeRupley parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb') # Calculate SASA using Shrake-Rupley algorithm sr = ShrakeRupley() sr.compute(structure, level='S') # S=structure, M=model, C=chain, R=residue, A=atom # Access total SASA print(f'Total SASA: {structure.sasa:.2f} A^2') # Access per-residue SASA for residue in structure.get_residues(): if hasattr(residue, 'sasa'): print(f'{residue.resname}{residue.id[1]}: {residue.sasa:.2f} A^2')
SASA with Custom Parameters
from Bio.PDB import PDBParser from Bio.PDB.SASA import ShrakeRupley parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb') # Higher precision (more points = slower but more accurate) sr = ShrakeRupley(probe_radius=1.4, n_points=960) sr.compute(structure, level='R') # Per-atom SASA for atom in structure.get_atoms(): print(f'{atom.name}: {atom.sasa:.2f} A^2')
Identifying Buried vs Exposed Residues
from Bio.PDB import PDBParser from Bio.PDB.SASA import ShrakeRupley parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', 'protein.pdb') sr = ShrakeRupley() sr.compute(structure, level='R') buried = [] exposed = [] for residue in structure.get_residues(): if residue.id[0] != ' ': continue if hasattr(residue, 'sasa'): if residue.sasa < 10.0: # Threshold in A^2 buried.append(residue) else: exposed.append(residue) print(f'Buried: {len(buried)}, Exposed: {len(exposed)}')
Related Skills
- structure-io - Parse and write structure files
- structure-navigation - Access chains, residues, atoms
- structure-modification - Transform coordinates, modify structures
- alignment/pairwise-alignment - Sequence alignment for structure comparison