install
source · Clone the upstream repo
git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Imaging_Analysis/imaging-mass-cytometry/phenotyping" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-phenotyping && rm -rf "$T"
manifest:
Skills/Imaging_Analysis/imaging-mass-cytometry/phenotyping/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-imaging-mass-cytometry-phenotyping description: Cell type assignment from marker expression in IMC data. Covers manual gating, clustering, and automated classification approaches. Use when assigning cell types to segmented IMC cells based on protein marker expression or when phenotyping cells in multiplexed imaging data. tool_type: python primary_tool: scanpy measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
Cell Phenotyping for IMC
Load Single-Cell Data
import anndata as ad import scanpy as sc import pandas as pd import numpy as np # Load from h5ad adata = ad.read_h5ad('imc_segmented.h5ad') # Or create from CSVs intensities = pd.read_csv('cell_intensities.csv') cell_info = pd.read_csv('cell_info.csv') adata = ad.AnnData(X=intensities.values) adata.var_names = intensities.columns adata.obs = cell_info
Data Transformation
# Arcsinh transformation (standard for cytometry) def arcsinh_transform(adata, cofactor=5): adata.X = np.arcsinh(adata.X / cofactor) return adata adata = arcsinh_transform(adata) # Z-score normalization sc.pp.scale(adata, max_value=10)
Clustering-Based Phenotyping
# PCA and neighbors sc.pp.pca(adata, n_comps=15) sc.pp.neighbors(adata, n_neighbors=15, n_pcs=15) # Clustering sc.tl.leiden(adata, resolution=0.5) # UMAP for visualization sc.tl.umap(adata) # Plot sc.pl.umap(adata, color='leiden', save='_clusters.png')
Manual Gating
def gate_cells(adata, marker, threshold, above=True): '''Gate cells based on marker expression''' values = adata[:, marker].X.flatten() if above: return values > threshold else: return values < threshold # Example gating strategy for T cells adata.obs['CD45_pos'] = gate_cells(adata, 'CD45', 1.5) adata.obs['CD3_pos'] = gate_cells(adata, 'CD3', 1.0) adata.obs['CD8_pos'] = gate_cells(adata, 'CD8', 0.8) adata.obs['CD4_pos'] = gate_cells(adata, 'CD4', 0.8) # Assign cell types def assign_cell_type(row): if not row['CD45_pos']: return 'Other' if not row['CD3_pos']: return 'Non-T immune' if row['CD8_pos']: return 'CD8 T cell' if row['CD4_pos']: return 'CD4 T cell' return 'T cell (other)' adata.obs['cell_type'] = adata.obs.apply(assign_cell_type, axis=1)
Cluster Annotation
# Find marker genes per cluster sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon') sc.pl.rank_genes_groups_heatmap(adata, n_genes=5, save='_markers.png') # Manual annotation based on markers cluster_annotation = { '0': 'Epithelial', '1': 'CD8 T cell', '2': 'CD4 T cell', '3': 'Macrophage', '4': 'Stromal', '5': 'B cell' } adata.obs['cell_type'] = adata.obs['leiden'].map(cluster_annotation)
SOM-Based Clustering (FlowSOM-Style)
# FlowSOM-style clustering using minisom # Note: For authentic FlowSOM, use the R CATALYST package which wraps FlowSOM # This Python approach approximates the SOM + meta-clustering concept from minisom import MiniSom from sklearn.cluster import AgglomerativeClustering # Markers for clustering phenotype_markers = ['CD45', 'CD3', 'CD8', 'CD4', 'CD20', 'CD68', 'E-cadherin'] X = adata[:, phenotype_markers].X # Self-Organizing Map som = MiniSom(10, 10, X.shape[1], sigma=1.5, learning_rate=0.5) som.random_weights_init(X) som.train_random(X, 1000) # Get cluster assignments winner_coordinates = np.array([som.winner(x) for x in X]) som_clusters = winner_coordinates[:, 0] * 10 + winner_coordinates[:, 1] # Meta-clustering meta_clustering = AgglomerativeClustering(n_clusters=10) meta_labels = meta_clustering.fit_predict(som.get_weights().reshape(-1, X.shape[1])) # Assign to cells adata.obs['som_cluster'] = [meta_labels[c] for c in som_clusters]
Automated Annotation
# Use reference-based annotation (similar to CellTypist) from sklearn.neighbors import KNeighborsClassifier # If you have a reference dataset with known labels ref_data = ad.read_h5ad('reference_imc.h5ad') # Train classifier knn = KNeighborsClassifier(n_neighbors=15) knn.fit(ref_data.X, ref_data.obs['cell_type']) # Predict adata.obs['predicted_type'] = knn.predict(adata.X) adata.obs['prediction_prob'] = knn.predict_proba(adata.X).max(axis=1)
Visualize Phenotypes
import matplotlib.pyplot as plt # UMAP colored by cell type sc.pl.umap(adata, color='cell_type', save='_celltypes.png') # Heatmap of markers by cell type sc.pl.matrixplot(adata, phenotype_markers, groupby='cell_type', dendrogram=True, cmap='RdBu_r', save='_heatmap.png') # Spatial plot colored by cell type fig, ax = plt.subplots(figsize=(10, 10)) spatial = adata.obsm['spatial'] for ct in adata.obs['cell_type'].unique(): mask = adata.obs['cell_type'] == ct ax.scatter(spatial[mask, 0], spatial[mask, 1], s=1, label=ct, alpha=0.7) ax.legend(markerscale=5) ax.set_aspect('equal') plt.savefig('spatial_celltypes.png', dpi=150)
Cell Type Frequencies
# Frequencies per image/ROI freq = adata.obs.groupby(['image_id', 'cell_type']).size().unstack(fill_value=0) freq_pct = freq.div(freq.sum(axis=1), axis=0) * 100 # Plot freq_pct.plot(kind='bar', stacked=True, figsize=(12, 6)) plt.ylabel('Percentage') plt.title('Cell Type Composition') plt.tight_layout() plt.savefig('celltype_frequencies.png')
Save Results
# Add annotations to adata adata.write('imc_phenotyped.h5ad') # Export cell types adata.obs[['cell_id', 'cell_type', 'centroid_x', 'centroid_y']].to_csv('cell_phenotypes.csv', index=False)
Related Skills
- cell-segmentation - Generate single-cell data
- spatial-analysis - Analyze spatial patterns of cell types
- single-cell/cell-annotation - Similar annotation concepts