install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/3D_Genomics/hi-c-analysis/tad-detection" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-tad-detection && rm -rf "$T"
manifest:
Skills/3D_Genomics/hi-c-analysis/tad-detection/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-hi-c-analysis-tad-detection description: Call topologically associating domains (TADs) from Hi-C data using insulation score, HiCExplorer, and other methods. Identify domain boundaries and hierarchical domain structure. Use when calling TADs from Hi-C insulation scores. tool_type: mixed primary_tool: cooltools measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
TAD Detection
Call topologically associating domains from Hi-C contact matrices.
Required Imports
import cooler import cooltools import numpy as np import pandas as pd import matplotlib.pyplot as plt import bioframe
Compute Insulation Score
clr = cooler.Cooler('matrix.mcool::resolutions/10000') view_df = bioframe.make_viewframe(clr.chromsizes) # Compute insulation score insulation = cooltools.insulation( clr, window_bp=[100000, 200000, 500000], # Multiple window sizes ignore_diags=2, ) print(insulation.head()) # Columns include: chrom, start, end, log2_insulation_score_100000, etc.
Call TAD Boundaries
# Find boundaries (local minima in insulation score) boundaries = cooltools.find_insulation( clr, window_bp=200000, # Single window ignore_diags=2, min_dist_bad_bin=0, ) # Filter significant boundaries boundaries['is_boundary'] = boundaries['boundary_strength'] > 0.1 strong_boundaries = boundaries[boundaries['is_boundary']] print(f'Found {len(strong_boundaries)} TAD boundaries')
Extract TAD Regions
def boundaries_to_tads(boundaries_df, chrom): '''Convert boundary positions to TAD intervals''' chr_bounds = boundaries_df[ (boundaries_df['chrom'] == chrom) & (boundaries_df['is_boundary']) ].sort_values('start') tads = [] starts = [0] + list(chr_bounds['start']) ends = list(chr_bounds['start']) + [boundaries_df[boundaries_df['chrom'] == chrom]['end'].max()] for start, end in zip(starts, ends): if end > start: tads.append({'chrom': chrom, 'start': start, 'end': end}) return pd.DataFrame(tads) tads_chr1 = boundaries_to_tads(boundaries, 'chr1') print(f'chr1 TADs: {len(tads_chr1)}') print(tads_chr1.head())
Using HiCExplorer (CLI)
# Compute TADs with HiCExplorer hicFindTADs \ -m matrix.cool \ --outPrefix tads \ --correctForMultipleTesting fdr \ --minDepth 60000 \ --maxDepth 200000 \ --step 10000 \ --thresholdComparisons 0.05 # Output files: # tads_domains.bed - TAD intervals # tads_boundaries.bed - Boundary positions # tads_score.bedgraph - Insulation score track
Using HiCExplorer in Python
# After running hicFindTADs tads = pd.read_csv('tads_domains.bed', sep='\t', header=None, names=['chrom', 'start', 'end']) boundaries = pd.read_csv('tads_boundaries.bed', sep='\t', header=None, names=['chrom', 'start', 'end', 'score']) print(f'TADs: {len(tads)}') print(f'Boundaries: {len(boundaries)}')
TAD Statistics
# Calculate TAD sizes tads['size'] = tads['end'] - tads['start'] print('TAD size statistics:') print(f' Mean: {tads["size"].mean() / 1000:.0f} kb') print(f' Median: {tads["size"].median() / 1000:.0f} kb') print(f' Min: {tads["size"].min() / 1000:.0f} kb') print(f' Max: {tads["size"].max() / 1000:.0f} kb') # Size distribution plt.hist(tads['size'] / 1000, bins=50) plt.xlabel('TAD size (kb)') plt.ylabel('Count') plt.title('TAD size distribution') plt.savefig('tad_sizes.png', dpi=150)
Plot Insulation Score
fig, ax = plt.subplots(figsize=(15, 3)) chr_data = insulation[insulation['chrom'] == 'chr1'] ax.plot(chr_data['start'] / 1e6, chr_data['log2_insulation_score_200000']) # Mark boundaries bounds = chr_data[chr_data['is_boundary']] ax.scatter(bounds['start'] / 1e6, bounds['log2_insulation_score_200000'], color='red', s=20, zorder=5) ax.set_xlabel('Position (Mb)') ax.set_ylabel('Insulation score (log2)') ax.set_title('chr1 insulation score (red = boundaries)') plt.tight_layout() plt.savefig('insulation_track.png', dpi=150)
Compare TAD Boundaries Between Conditions
# Load boundaries from two conditions bounds1 = pd.read_csv('condition1_boundaries.bed', sep='\t', names=['chrom', 'start', 'end']) bounds2 = pd.read_csv('condition2_boundaries.bed', sep='\t', names=['chrom', 'start', 'end']) # Find overlapping boundaries (within tolerance) tolerance = 50000 # 50kb def find_overlaps(df1, df2, tol): overlaps = [] for _, b1 in df1.iterrows(): matches = df2[ (df2['chrom'] == b1['chrom']) & (abs(df2['start'] - b1['start']) <= tol) ] if len(matches) > 0: overlaps.append(b1) return pd.DataFrame(overlaps) shared = find_overlaps(bounds1, bounds2, tolerance) print(f'Shared boundaries: {len(shared)}') print(f'Condition 1 specific: {len(bounds1) - len(shared)}') print(f'Condition 2 specific: {len(bounds2) - len(shared)}')
Hierarchical TADs
# Compute insulation at multiple scales windows = [100000, 200000, 500000, 1000000] insulation_multi = cooltools.insulation(clr, window_bp=windows, ignore_diags=2) # Boundaries at each scale represent different hierarchy levels for w in windows: col = f'is_boundary_{w}' n_bounds = insulation_multi[col].sum() print(f'Window {w/1000:.0f}kb: {n_bounds} boundaries')
Export TADs
# Save as BED tads[['chrom', 'start', 'end']].to_csv( 'tads.bed', sep='\t', index=False, header=False ) # Save boundaries as BED boundaries[boundaries['is_boundary']][['chrom', 'start', 'end', 'boundary_strength']].to_csv( 'boundaries.bed', sep='\t', index=False, header=False ) # Save insulation as bedGraph insulation[['chrom', 'start', 'end', 'log2_insulation_score_200000']].to_csv( 'insulation.bedgraph', sep='\t', index=False, header=False )
Related Skills
- hic-data-io - Load Hi-C matrices
- hic-visualization - Visualize TADs on contact matrices
- compartment-analysis - Compartments operate at larger scale than TADs