install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Clinical/Clinical_Databases/somatic-signatures" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-somatic-signatures && rm -rf "$T"
manifest:
Skills/Clinical/Clinical_Databases/somatic-signatures/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-clinical-databases-somatic-signatures description: Extract and analyze mutational signatures from somatic variants using SigProfiler or MutationalPatterns to characterize mutagenic processes. Use when identifying DNA damage mechanisms or etiology in cancer genomes. tool_type: mixed primary_tool: SigProfilerExtractor measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
Somatic Mutational Signatures
SigProfiler Workflow
Install and Generate Matrix
from SigProfilerMatrixGenerator import install as genInstall from SigProfilerMatrixGenerator.scripts import SigProfilerMatrixGeneratorFunc as matGen # Install reference genome (one-time) genInstall.install('GRCh38') # Generate mutational matrix from VCF # Input: Directory containing VCF files # Output: SBS96 matrix (96 trinucleotide contexts) matrices = matGen.SigProfilerMatrixGeneratorFunc( project='my_project', genome='GRCh38', vcfFiles='/path/to/vcf_directory', plot=True, exome=False # Set True for WES )
Extract Signatures
from SigProfilerExtractor import sigpro as sig # De novo signature extraction # Determines optimal number of signatures automatically sig.sigProfilerExtractor( input_type='matrix', output='extraction_output', input_data='my_project/output/SBS/my_project.SBS96.all', reference_genome='GRCh38', minimum_signatures=1, maximum_signatures=10, nmf_replicates=100, cpu=-1 # Use all cores )
Decompose to COSMIC Signatures
from SigProfilerAssignment import Analyzer as Analyze # Fit to known COSMIC signatures Analyze.cosmic_fit( samples='my_project/output/SBS/my_project.SBS96.all', output='assignment_output', input_type='matrix', genome_build='GRCh38', signature_database='SBS_GRCh38_GRCh38' )
MutationalPatterns (R)
Load and Analyze
library(MutationalPatterns) library(BSgenome.Hsapiens.UCSC.hg38) # Load VCF files vcf_files <- list.files('vcf_dir', pattern = '\\.vcf$', full.names = TRUE) sample_names <- gsub('.vcf', '', basename(vcf_files)) vcfs <- read_vcfs_as_granges( vcf_files, sample_names, ref_genome = 'BSgenome.Hsapiens.UCSC.hg38' ) # Generate 96-context mutation matrix mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = 'BSgenome.Hsapiens.UCSC.hg38') # Visualize spectrum plot_96_profile(mut_mat)
Fit to COSMIC Signatures
# Load COSMIC signatures (v3.2) cosmic_sigs <- get_known_signatures(muttype = 'snv') # Fit samples to signatures fit_result <- fit_to_signatures(mut_mat, cosmic_sigs) # Plot contribution plot_contribution(fit_result$contribution, cosmic_sigs, mode = 'absolute') # Relative contribution plot_contribution(fit_result$contribution, cosmic_sigs, mode = 'relative')
De Novo Extraction
# Extract de novo signatures using NMF # Determine optimal rank estimate <- estimate_rank(mut_mat, rank_range = 2:8, nrun = 50) plot(estimate) # Extract signatures nmf_res <- extract_signatures(mut_mat, rank = 4, nrun = 100) # Compare to COSMIC cos_sim <- cos_sim_matrix(nmf_res$signatures, cosmic_sigs) plot_cosine_heatmap(cos_sim)
COSMIC Signature Etiology
# Common COSMIC signatures and their etiologies SIGNATURE_ETIOLOGY = { 'SBS1': 'Spontaneous deamination (age-related)', 'SBS2': 'APOBEC activity', 'SBS3': 'Defective HR/BRCA1/2', 'SBS4': 'Tobacco smoking', 'SBS5': 'Unknown (age-related)', 'SBS6': 'MMR deficiency', 'SBS7a': 'UV exposure', 'SBS7b': 'UV exposure', 'SBS10a': 'POLE mutation', 'SBS10b': 'POLE mutation', 'SBS13': 'APOBEC activity', 'SBS15': 'MMR deficiency', 'SBS17a': 'Unknown', 'SBS17b': 'Unknown', 'SBS18': 'ROS damage', 'SBS22': 'Aristolochic acid', 'SBS26': 'MMR deficiency', 'SBS44': 'MMR deficiency', } def interpret_signatures(contributions): '''Interpret signature contributions''' interpretations = [] for sig, contrib in contributions.items(): if contrib > 0.05: # >5% contribution threshold etiology = SIGNATURE_ETIOLOGY.get(sig, 'Unknown') interpretations.append({ 'signature': sig, 'contribution': contrib, 'etiology': etiology }) return sorted(interpretations, key=lambda x: x['contribution'], reverse=True)
Signature Categories
| Category | Signatures | Mechanism |
|---|---|---|
| Age-related | SBS1, SBS5 | Spontaneous deamination, clock-like |
| APOBEC | SBS2, SBS13 | Cytidine deaminase activity |
| MMR deficiency | SBS6, SBS15, SBS26, SBS44 | Mismatch repair defects |
| HR deficiency | SBS3 | BRCA1/2, homologous recombination |
| POLE mutation | SBS10a, SBS10b | Proofreading defects |
| UV damage | SBS7a, SBS7b | Pyrimidine dimers |
| Smoking | SBS4 | Tobacco carcinogens |
| Platinum therapy | SBS31, SBS35 | Treatment-related |
Cosine Similarity
import numpy as np def cosine_similarity(sig1, sig2): '''Calculate cosine similarity between two signatures''' dot_product = np.dot(sig1, sig2) norm1 = np.linalg.norm(sig1) norm2 = np.linalg.norm(sig2) return dot_product / (norm1 * norm2) # Threshold: >0.8 considered similar # >0.9 considered same signature
Clinical Applications
def signature_clinical_implications(dominant_signatures): '''Clinical implications of mutational signatures''' implications = [] for sig in dominant_signatures: if sig == 'SBS3': implications.append({ 'signature': 'SBS3', 'implication': 'HR deficiency - may respond to PARP inhibitors', 'testing': 'Consider BRCA1/2 testing' }) elif sig in ['SBS6', 'SBS15', 'SBS26', 'SBS44']: implications.append({ 'signature': sig, 'implication': 'MMR deficiency - may respond to immunotherapy', 'testing': 'Consider MSI testing' }) elif sig in ['SBS2', 'SBS13']: implications.append({ 'signature': sig, 'implication': 'APOBEC activity - associated with high TMB', 'testing': 'Consider TMB assessment' }) return implications
Related Skills
- clinical-databases/tumor-mutational-burden - TMB calculation
- variant-calling/somatic-variant-calling - Input variants
- data-visualization/heatmaps-clustering - Signature visualization