LLMs-Universal-Life-Science-and-Clinical-Skills- tad-detection

<!--

install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/3D_Genomics/hi-c-analysis/tad-detection" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-tad-detection && rm -rf "$T"
manifest: Skills/3D_Genomics/hi-c-analysis/tad-detection/SKILL.md
source content
<!-- # COPYRIGHT NOTICE # This file is part of the "Universal Biomedical Skills" project. # Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu> # All Rights Reserved. # # This code is proprietary and confidential. # Unauthorized copying of this file, via any medium is strictly prohibited. # # Provenance: Authenticated by MD BABU MIA -->

name: bio-hi-c-analysis-tad-detection description: Call topologically associating domains (TADs) from Hi-C data using insulation score, HiCExplorer, and other methods. Identify domain boundaries and hierarchical domain structure. Use when calling TADs from Hi-C insulation scores. tool_type: mixed primary_tool: cooltools measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:

  • read_file
  • run_shell_command

TAD Detection

Call topologically associating domains from Hi-C contact matrices.

Required Imports

import cooler
import cooltools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import bioframe

Compute Insulation Score

clr = cooler.Cooler('matrix.mcool::resolutions/10000')
view_df = bioframe.make_viewframe(clr.chromsizes)

# Compute insulation score
insulation = cooltools.insulation(
    clr,
    window_bp=[100000, 200000, 500000],  # Multiple window sizes
    ignore_diags=2,
)

print(insulation.head())
# Columns include: chrom, start, end, log2_insulation_score_100000, etc.

Call TAD Boundaries

# Find boundaries (local minima in insulation score)
boundaries = cooltools.find_insulation(
    clr,
    window_bp=200000,  # Single window
    ignore_diags=2,
    min_dist_bad_bin=0,
)

# Filter significant boundaries
boundaries['is_boundary'] = boundaries['boundary_strength'] > 0.1
strong_boundaries = boundaries[boundaries['is_boundary']]
print(f'Found {len(strong_boundaries)} TAD boundaries')

Extract TAD Regions

def boundaries_to_tads(boundaries_df, chrom):
    '''Convert boundary positions to TAD intervals'''
    chr_bounds = boundaries_df[
        (boundaries_df['chrom'] == chrom) &
        (boundaries_df['is_boundary'])
    ].sort_values('start')

    tads = []
    starts = [0] + list(chr_bounds['start'])
    ends = list(chr_bounds['start']) + [boundaries_df[boundaries_df['chrom'] == chrom]['end'].max()]

    for start, end in zip(starts, ends):
        if end > start:
            tads.append({'chrom': chrom, 'start': start, 'end': end})

    return pd.DataFrame(tads)

tads_chr1 = boundaries_to_tads(boundaries, 'chr1')
print(f'chr1 TADs: {len(tads_chr1)}')
print(tads_chr1.head())

Using HiCExplorer (CLI)

# Compute TADs with HiCExplorer
hicFindTADs \
    -m matrix.cool \
    --outPrefix tads \
    --correctForMultipleTesting fdr \
    --minDepth 60000 \
    --maxDepth 200000 \
    --step 10000 \
    --thresholdComparisons 0.05

# Output files:
# tads_domains.bed - TAD intervals
# tads_boundaries.bed - Boundary positions
# tads_score.bedgraph - Insulation score track

Using HiCExplorer in Python

# After running hicFindTADs
tads = pd.read_csv('tads_domains.bed', sep='\t', header=None,
                   names=['chrom', 'start', 'end'])
boundaries = pd.read_csv('tads_boundaries.bed', sep='\t', header=None,
                         names=['chrom', 'start', 'end', 'score'])

print(f'TADs: {len(tads)}')
print(f'Boundaries: {len(boundaries)}')

TAD Statistics

# Calculate TAD sizes
tads['size'] = tads['end'] - tads['start']

print('TAD size statistics:')
print(f'  Mean: {tads["size"].mean() / 1000:.0f} kb')
print(f'  Median: {tads["size"].median() / 1000:.0f} kb')
print(f'  Min: {tads["size"].min() / 1000:.0f} kb')
print(f'  Max: {tads["size"].max() / 1000:.0f} kb')

# Size distribution
plt.hist(tads['size'] / 1000, bins=50)
plt.xlabel('TAD size (kb)')
plt.ylabel('Count')
plt.title('TAD size distribution')
plt.savefig('tad_sizes.png', dpi=150)

Plot Insulation Score

fig, ax = plt.subplots(figsize=(15, 3))

chr_data = insulation[insulation['chrom'] == 'chr1']
ax.plot(chr_data['start'] / 1e6, chr_data['log2_insulation_score_200000'])

# Mark boundaries
bounds = chr_data[chr_data['is_boundary']]
ax.scatter(bounds['start'] / 1e6, bounds['log2_insulation_score_200000'],
           color='red', s=20, zorder=5)

ax.set_xlabel('Position (Mb)')
ax.set_ylabel('Insulation score (log2)')
ax.set_title('chr1 insulation score (red = boundaries)')
plt.tight_layout()
plt.savefig('insulation_track.png', dpi=150)

Compare TAD Boundaries Between Conditions

# Load boundaries from two conditions
bounds1 = pd.read_csv('condition1_boundaries.bed', sep='\t',
                      names=['chrom', 'start', 'end'])
bounds2 = pd.read_csv('condition2_boundaries.bed', sep='\t',
                      names=['chrom', 'start', 'end'])

# Find overlapping boundaries (within tolerance)
tolerance = 50000  # 50kb

def find_overlaps(df1, df2, tol):
    overlaps = []
    for _, b1 in df1.iterrows():
        matches = df2[
            (df2['chrom'] == b1['chrom']) &
            (abs(df2['start'] - b1['start']) <= tol)
        ]
        if len(matches) > 0:
            overlaps.append(b1)
    return pd.DataFrame(overlaps)

shared = find_overlaps(bounds1, bounds2, tolerance)
print(f'Shared boundaries: {len(shared)}')
print(f'Condition 1 specific: {len(bounds1) - len(shared)}')
print(f'Condition 2 specific: {len(bounds2) - len(shared)}')

Hierarchical TADs

# Compute insulation at multiple scales
windows = [100000, 200000, 500000, 1000000]
insulation_multi = cooltools.insulation(clr, window_bp=windows, ignore_diags=2)

# Boundaries at each scale represent different hierarchy levels
for w in windows:
    col = f'is_boundary_{w}'
    n_bounds = insulation_multi[col].sum()
    print(f'Window {w/1000:.0f}kb: {n_bounds} boundaries')

Export TADs

# Save as BED
tads[['chrom', 'start', 'end']].to_csv(
    'tads.bed', sep='\t', index=False, header=False
)

# Save boundaries as BED
boundaries[boundaries['is_boundary']][['chrom', 'start', 'end', 'boundary_strength']].to_csv(
    'boundaries.bed', sep='\t', index=False, header=False
)

# Save insulation as bedGraph
insulation[['chrom', 'start', 'end', 'log2_insulation_score_200000']].to_csv(
    'insulation.bedgraph', sep='\t', index=False, header=False
)

Related Skills

  • hic-data-io - Load Hi-C matrices
  • hic-visualization - Visualize TADs on contact matrices
  • compartment-analysis - Compartments operate at larger scale than TADs
<!-- AUTHOR_SIGNATURE: 9a7f3c2e-MD-BABU-MIA-2026-MSSM-SECURE -->