Claude-skill-registry bio-hi-c-analysis-hic-data-io
Load, convert, and manipulate Hi-C contact matrices using cooler format. Read .cool/.mcool files, convert from .hic format, access matrix data, and export to different formats. Use when loading or converting Hi-C contact matrices.
install
source · Clone the upstream repo
git clone https://github.com/majiayu000/claude-skill-registry
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/majiayu000/claude-skill-registry "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/data/hic-data-io" ~/.claude/skills/majiayu000-claude-skill-registry-bio-hi-c-analysis-hic-data-io && rm -rf "$T"
manifest:
skills/data/hic-data-io/SKILL.mdsource content
Hi-C Data I/O
Load and manipulate Hi-C contact matrices in cooler format.
Required Imports
import cooler import numpy as np import pandas as pd
Load a Cooler File
# Load a .cool file clr = cooler.Cooler('matrix.cool') # Basic info print(f'Chromosomes: {clr.chromnames}') print(f'Bin size: {clr.binsize}') print(f'Number of bins: {clr.info["nbins"]}') print(f'Sum of counts: {clr.info["sum"]}')
Load Multi-Resolution Cooler (.mcool)
# List available resolutions resolutions = cooler.fileops.list_coolers('matrix.mcool') print(f'Available resolutions: {resolutions}') # Load specific resolution clr = cooler.Cooler('matrix.mcool::resolutions/10000') print(f'Loaded at {clr.binsize}bp resolution')
Access Bin Information
# Get bin table (genomic coordinates) bins = clr.bins()[:] print(bins.head()) # Columns: chrom, start, end, weight (if balanced) # Get bins for a chromosome chr1_bins = clr.bins().fetch('chr1') print(f'chr1 has {len(chr1_bins)} bins')
Access Pixel (Contact) Information
# Get all contacts as DataFrame pixels = clr.pixels()[:] print(pixels.head()) # Columns: bin1_id, bin2_id, count # Get contacts for a region region_pixels = clr.pixels().fetch('chr1:0-10000000')
Extract Contact Matrix
# Get matrix for a chromosome matrix = clr.matrix(balance=True).fetch('chr1') print(f'Matrix shape: {matrix.shape}') # Get matrix for a region region_matrix = clr.matrix(balance=True).fetch('chr1:50000000-60000000') # Get raw (unbalanced) matrix raw_matrix = clr.matrix(balance=False).fetch('chr1') # Sparse matrix for memory efficiency from scipy import sparse sparse_matrix = clr.matrix(balance=True, sparse=True).fetch('chr1')
Extract Submatrix (Two Regions)
# Get contacts between two regions region1 = 'chr1:50000000-60000000' region2 = 'chr1:70000000-80000000' submatrix = clr.matrix(balance=True).fetch(region1, region2) print(f'Submatrix shape: {submatrix.shape}') # Inter-chromosomal contacts inter_matrix = clr.matrix(balance=True).fetch('chr1', 'chr2')
Convert from .hic to Cooler
# Using hic2cool CLI hic2cool convert input.hic output.mcool -r 0 # All resolutions # Specific resolution hic2cool convert input.hic output.cool -r 10000
# Python alternative using hic2cool import hic2cool hic2cool.hic2cool_convert('input.hic', 'output.mcool', resolution=0)
Convert from Text Formats
# From pairs file to cooler # First create bins import bioframe chromsizes = bioframe.fetch_chromsizes('hg38') bins = cooler.binnify(chromsizes, binsize=10000) # Then aggregate pairs cooler.create_cooler( 'output.cool', bins, pixels=None, # Will be loaded from pairs dtypes={'count': int}, ) # Or use cooler cload # cooler cload pairs -c1 2 -p1 3 -c2 4 -p2 5 chromsizes.txt:10000 pairs.txt output.cool
Create Cooler from Matrix
import cooler import numpy as np import bioframe # Create bins chromsizes = bioframe.fetch_chromsizes('hg38') bins = cooler.binnify(chromsizes, binsize=10000) # Create pixel dataframe from matrix n_bins = len(bins) # matrix = np.random.poisson(1, (n_bins, n_bins)) # Your matrix here # matrix = np.triu(matrix) # Upper triangle # Convert to pixels pixels = [] for i in range(n_bins): for j in range(i, n_bins): if matrix[i, j] > 0: pixels.append({'bin1_id': i, 'bin2_id': j, 'count': matrix[i, j]}) pixels_df = pd.DataFrame(pixels) # Create cooler cooler.create_cooler('new.cool', bins, pixels_df)
Merge Cooler Files
# Merge multiple cooler files cooler.merge_coolers('merged.cool', ['sample1.cool', 'sample2.cool'])
Coarsen Resolution
# Create lower resolution from high resolution cooler.coarsen_cooler('hires.cool', 'lowres.cool', factor=10) # 10x coarser # Or using zoomify for multiple resolutions cooler.zoomify_cooler('input.cool', 'output.mcool', resolutions=[10000, 50000, 100000, 500000])
Export to Other Formats
# Export matrix to numpy matrix = clr.matrix(balance=True).fetch('chr1') np.save('chr1_matrix.npy', matrix) # Export to text np.savetxt('chr1_matrix.txt', matrix, delimiter='\t') # Export pixels to CSV pixels = clr.pixels()[:] pixels.to_csv('pixels.csv', index=False)
Dump to Pairs Format
# Using cooler dump cooler dump -t pixels --join matrix.cool > pairs.txt # Dump bins cooler dump -t bins matrix.cool > bins.txt
Access Metadata
# Get all metadata print(clr.info) # Specific metadata print(f'Genome assembly: {clr.info.get("genome-assembly", "Unknown")}') print(f'Creation date: {clr.info.get("creation-date", "Unknown")}') # Check if balanced if 'weight' in clr.bins().columns: print('Matrix has balancing weights')
List Cooler Contents
# For mcool coolers = cooler.fileops.list_coolers('multi.mcool') print(f'Available: {coolers}') # Check if valid cooler is_valid = cooler.fileops.is_cooler('file.cool') print(f'Valid cooler: {is_valid}')
Related Skills
- matrix-operations - Balance and normalize matrices
- hic-visualization - Visualize contact matrices
- contact-pairs - Process raw Hi-C pairs