Claude-skill-registry biological-expert
Expert-level biology, biotechnology, genetics, bioinformatics, and computational biology
install
source · Clone the upstream repo
git clone https://github.com/majiayu000/claude-skill-registry
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/majiayu000/claude-skill-registry "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/data/biological-expert" ~/.claude/skills/majiayu000-claude-skill-registry-biological-expert && rm -rf "$T"
manifest:
skills/data/biological-expert/SKILL.mdsource content
Biological Sciences Expert
Expert guidance for biology, biotechnology, genetics, bioinformatics, and computational biology applications.
Core Concepts
Molecular Biology
- DNA, RNA, and protein structure
- Central dogma (transcription, translation)
- Gene expression and regulation
- Genetic mutations and variations
- CRISPR and gene editing
- Protein folding and structure
Genomics & Bioinformatics
- DNA sequencing (Sanger, NGS, long-read)
- Genome assembly and annotation
- Sequence alignment (BLAST, BLAT)
- Variant calling and analysis
- RNA-seq analysis
- Phylogenetic analysis
Systems Biology
- Metabolic pathways
- Protein-protein interactions
- Gene regulatory networks
- Mathematical modeling
- Pathway analysis
- Network biology
DNA Sequence Analysis
from Bio import SeqIO, Seq from Bio.Seq import Seq from Bio.SeqUtils import gc_fraction, molecular_weight from typing import Dict, List class DNAAnalyzer: """Analyze DNA sequences""" def __init__(self, sequence: str): self.sequence = Seq(sequence.upper()) def basic_stats(self) -> Dict: """Calculate basic sequence statistics""" return { "length": len(self.sequence), "gc_content": gc_fraction(self.sequence) * 100, "molecular_weight": molecular_weight(self.sequence, "DNA"), "nucleotide_counts": self._count_nucleotides() } def _count_nucleotides(self) -> Dict[str, int]: """Count each nucleotide""" return { 'A': self.sequence.count('A'), 'T': self.sequence.count('T'), 'G': self.sequence.count('G'), 'C': self.sequence.count('C') } def transcribe(self) -> str: """Transcribe DNA to RNA""" return str(self.sequence.transcribe()) def translate(self, table: int = 1) -> str: """Translate DNA to protein""" return str(self.sequence.translate(table=table)) def reverse_complement(self) -> str: """Get reverse complement""" return str(self.sequence.reverse_complement()) def find_orfs(self, min_length: int = 100) -> List[Dict]: """Find Open Reading Frames""" orfs = [] for strand, seq in [(+1, self.sequence), (-1, self.sequence.reverse_complement())]: for frame in range(3): trans = seq[frame:].translate(to_stop=False) for i, aa in enumerate(trans): if aa == 'M': # Start codon for j in range(i + 1, len(trans)): if trans[j] == '*': # Stop codon orf_len = (j - i) * 3 if orf_len >= min_length: orfs.append({ "strand": strand, "frame": frame, "start": i * 3 + frame, "end": j * 3 + frame, "length": orf_len, "protein": str(trans[i:j]) }) break return orfs def find_motif(self, motif: str) -> List[int]: """Find motif positions in sequence""" positions = [] motif = motif.upper() for i in range(len(self.sequence) - len(motif) + 1): if str(self.sequence[i:i+len(motif)]) == motif: positions.append(i) return positions
Sequence Alignment
from Bio import pairwise2 from Bio.pairwise2 import format_alignment import numpy as np class SequenceAligner: """Perform sequence alignments""" @staticmethod def global_alignment(seq1: str, seq2: str, match: float = 2, mismatch: float = -1, gap_open: float = -0.5, gap_extend: float = -0.1): """Perform global alignment (Needleman-Wunsch)""" alignments = pairwise2.align.globalms( seq1, seq2, match, mismatch, gap_open, gap_extend ) best = alignments[0] return { "aligned_seq1": best.seqA, "aligned_seq2": best.seqB, "score": best.score, "identity": SequenceAligner._calculate_identity(best.seqA, best.seqB) } @staticmethod def local_alignment(seq1: str, seq2: str, match: float = 2, mismatch: float = -1, gap_open: float = -0.5, gap_extend: float = -0.1): """Perform local alignment (Smith-Waterman)""" alignments = pairwise2.align.localms( seq1, seq2, match, mismatch, gap_open, gap_extend ) best = alignments[0] return { "aligned_seq1": best.seqA, "aligned_seq2": best.seqB, "score": best.score, "identity": SequenceAligner._calculate_identity(best.seqA, best.seqB) } @staticmethod def _calculate_identity(seq1: str, seq2: str) -> float: """Calculate sequence identity percentage""" matches = sum(1 for a, b in zip(seq1, seq2) if a == b and a != '-') return (matches / min(len(seq1), len(seq2))) * 100
Genomic Variant Analysis
from dataclasses import dataclass from typing import Optional @dataclass class Variant: chromosome: str position: int reference: str alternate: str quality: float genotype: str depth: int allele_frequency: Optional[float] = None class VariantAnnotator: """Annotate genetic variants""" def __init__(self): self.gene_annotations = {} def annotate_variant(self, variant: Variant) -> Dict: """Annotate variant with functional consequences""" annotation = { "variant": f"{variant.chromosome}:{variant.position}{variant.reference}>{variant.alternate}", "type": self._classify_variant_type(variant), "effect": self._predict_effect(variant), "quality": variant.quality, "depth": variant.depth } if variant.allele_frequency: annotation["allele_frequency"] = variant.allele_frequency annotation["rarity"] = self._classify_rarity(variant.allele_frequency) return annotation def _classify_variant_type(self, variant: Variant) -> str: """Classify variant type""" ref_len = len(variant.reference) alt_len = len(variant.alternate) if ref_len == 1 and alt_len == 1: return "SNV" # Single Nucleotide Variant elif ref_len < alt_len: return "INSERTION" elif ref_len > alt_len: return "DELETION" else: return "INDEL" def _predict_effect(self, variant: Variant) -> str: """Predict variant effect on protein""" # Simplified effect prediction if self._classify_variant_type(variant) == "SNV": # Would check if it's in coding region, causes stop codon, etc. return "MISSENSE" return "UNKNOWN" def _classify_rarity(self, af: float) -> str: """Classify variant rarity""" if af > 0.05: return "COMMON" elif af > 0.01: return "LOW_FREQUENCY" else: return "RARE"
RNA-seq Analysis
import pandas as pd import numpy as np from scipy import stats class RNASeqAnalyzer: """Analyze RNA-seq expression data""" def __init__(self, counts_matrix: pd.DataFrame): """ counts_matrix: genes x samples matrix of raw counts """ self.counts = counts_matrix self.normalized = None def normalize_counts(self, method: str = "tpm"): """Normalize count data""" if method == "tpm": # Transcripts Per Million self.normalized = (self.counts / self.counts.sum(axis=0)) * 1e6 elif method == "log2": # Log2 transformation self.normalized = np.log2(self.counts + 1) return self.normalized def differential_expression(self, condition1: List[str], condition2: List[str], method: str = "ttest") -> pd.DataFrame: """Perform differential expression analysis""" results = [] for gene in self.counts.index: expr1 = self.counts.loc[gene, condition1] expr2 = self.counts.loc[gene, condition2] if method == "ttest": statistic, pvalue = stats.ttest_ind(expr1, expr2) fc = expr2.mean() / (expr1.mean() + 1) log2fc = np.log2(fc) results.append({ "gene": gene, "mean_condition1": expr1.mean(), "mean_condition2": expr2.mean(), "fold_change": fc, "log2_fold_change": log2fc, "p_value": pvalue, "significant": pvalue < 0.05 and abs(log2fc) > 1 }) return pd.DataFrame(results) def identify_marker_genes(self, threshold_fc: float = 2, threshold_pval: float = 0.05) -> List[str]: """Identify significantly differentially expressed genes""" # This would use the differential_expression results pass
Best Practices
Data Analysis
- Use appropriate statistical tests
- Account for multiple testing correction
- Validate results with independent methods
- Document data preprocessing steps
- Use version control for analysis scripts
- Maintain reproducible workflows
Sequence Analysis
- Quality control of sequencing data
- Use appropriate reference genomes
- Validate variant calls
- Consider batch effects
- Use established bioinformatics tools
- Benchmark against known datasets
Computational Biology
- Use efficient data structures for large datasets
- Parallelize computationally intensive tasks
- Validate biological interpretations
- Consult domain experts
- Document assumptions clearly
- Use standardized file formats (FASTA, VCF, BAM)
Anti-Patterns
❌ No quality control of input data ❌ Ignoring batch effects ❌ No multiple testing correction ❌ Over-interpreting correlations ❌ Inadequate sample sizes ❌ Not validating computational predictions ❌ Ignoring biological context
Resources
- Biopython: https://biopython.org/
- NCBI Resources: https://www.ncbi.nlm.nih.gov/
- Ensembl: https://www.ensembl.org/
- Galaxy Project: https://galaxyproject.org/
- Bioconductor: https://www.bioconductor.org/