LLMs-Universal-Life-Science-and-Clinical-Skills- phenotyping

<!--

install

source · Clone the upstream repo

git clone https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills-

Claude Code · Install into ~/.claude/skills/

T=$(mktemp -d) && git clone --depth=1 https://github.com/mdbabumiamssm/LLMs-Universal-Life-Science-and-Clinical-Skills- "$T" && mkdir -p ~/.claude/skills && cp -r "$T/Skills/Imaging_Analysis/imaging-mass-cytometry/phenotyping" ~/.claude/skills/mdbabumiamssm-llms-universal-life-science-and-clinical-skills-phenotyping && rm -rf "$T"

manifest: Skills/Imaging_Analysis/imaging-mass-cytometry/phenotyping/SKILL.md

source content

name: bio-imaging-mass-cytometry-phenotyping description: Cell type assignment from marker expression in IMC data. Covers manual gating, clustering, and automated classification approaches. Use when assigning cell types to segmented IMC cells based on protein marker expression or when phenotyping cells in multiplexed imaging data. tool_type: python primary_tool: scanpy measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:

read_file
run_shell_command

Cell Phenotyping for IMC

Load Single-Cell Data

import anndata as ad
import scanpy as sc
import pandas as pd
import numpy as np

# Load from h5ad
adata = ad.read_h5ad('imc_segmented.h5ad')

# Or create from CSVs
intensities = pd.read_csv('cell_intensities.csv')
cell_info = pd.read_csv('cell_info.csv')

adata = ad.AnnData(X=intensities.values)
adata.var_names = intensities.columns
adata.obs = cell_info

Data Transformation

# Arcsinh transformation (standard for cytometry)
def arcsinh_transform(adata, cofactor=5):
    adata.X = np.arcsinh(adata.X / cofactor)
    return adata

adata = arcsinh_transform(adata)

# Z-score normalization
sc.pp.scale(adata, max_value=10)

Clustering-Based Phenotyping

# PCA and neighbors
sc.pp.pca(adata, n_comps=15)
sc.pp.neighbors(adata, n_neighbors=15, n_pcs=15)

# Clustering
sc.tl.leiden(adata, resolution=0.5)

# UMAP for visualization
sc.tl.umap(adata)

# Plot
sc.pl.umap(adata, color='leiden', save='_clusters.png')

Manual Gating

def gate_cells(adata, marker, threshold, above=True):
    '''Gate cells based on marker expression'''
    values = adata[:, marker].X.flatten()
    if above:
        return values > threshold
    else:
        return values < threshold

# Example gating strategy for T cells
adata.obs['CD45_pos'] = gate_cells(adata, 'CD45', 1.5)
adata.obs['CD3_pos'] = gate_cells(adata, 'CD3', 1.0)
adata.obs['CD8_pos'] = gate_cells(adata, 'CD8', 0.8)
adata.obs['CD4_pos'] = gate_cells(adata, 'CD4', 0.8)

# Assign cell types
def assign_cell_type(row):
    if not row['CD45_pos']:
        return 'Other'
    if not row['CD3_pos']:
        return 'Non-T immune'
    if row['CD8_pos']:
        return 'CD8 T cell'
    if row['CD4_pos']:
        return 'CD4 T cell'
    return 'T cell (other)'

adata.obs['cell_type'] = adata.obs.apply(assign_cell_type, axis=1)

Cluster Annotation

# Find marker genes per cluster
sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon')
sc.pl.rank_genes_groups_heatmap(adata, n_genes=5, save='_markers.png')

# Manual annotation based on markers
cluster_annotation = {
    '0': 'Epithelial',
    '1': 'CD8 T cell',
    '2': 'CD4 T cell',
    '3': 'Macrophage',
    '4': 'Stromal',
    '5': 'B cell'
}

adata.obs['cell_type'] = adata.obs['leiden'].map(cluster_annotation)

SOM-Based Clustering (FlowSOM-Style)

# FlowSOM-style clustering using minisom
# Note: For authentic FlowSOM, use the R CATALYST package which wraps FlowSOM
# This Python approach approximates the SOM + meta-clustering concept
from minisom import MiniSom
from sklearn.cluster import AgglomerativeClustering

# Markers for clustering
phenotype_markers = ['CD45', 'CD3', 'CD8', 'CD4', 'CD20', 'CD68', 'E-cadherin']
X = adata[:, phenotype_markers].X

# Self-Organizing Map
som = MiniSom(10, 10, X.shape[1], sigma=1.5, learning_rate=0.5)
som.random_weights_init(X)
som.train_random(X, 1000)

# Get cluster assignments
winner_coordinates = np.array([som.winner(x) for x in X])
som_clusters = winner_coordinates[:, 0] * 10 + winner_coordinates[:, 1]

# Meta-clustering
meta_clustering = AgglomerativeClustering(n_clusters=10)
meta_labels = meta_clustering.fit_predict(som.get_weights().reshape(-1, X.shape[1]))

# Assign to cells
adata.obs['som_cluster'] = [meta_labels[c] for c in som_clusters]

Automated Annotation

# Use reference-based annotation (similar to CellTypist)
from sklearn.neighbors import KNeighborsClassifier

# If you have a reference dataset with known labels
ref_data = ad.read_h5ad('reference_imc.h5ad')

# Train classifier
knn = KNeighborsClassifier(n_neighbors=15)
knn.fit(ref_data.X, ref_data.obs['cell_type'])

# Predict
adata.obs['predicted_type'] = knn.predict(adata.X)
adata.obs['prediction_prob'] = knn.predict_proba(adata.X).max(axis=1)

Visualize Phenotypes

import matplotlib.pyplot as plt

# UMAP colored by cell type
sc.pl.umap(adata, color='cell_type', save='_celltypes.png')

# Heatmap of markers by cell type
sc.pl.matrixplot(adata, phenotype_markers, groupby='cell_type',
                  dendrogram=True, cmap='RdBu_r', save='_heatmap.png')

# Spatial plot colored by cell type
fig, ax = plt.subplots(figsize=(10, 10))
spatial = adata.obsm['spatial']
for ct in adata.obs['cell_type'].unique():
    mask = adata.obs['cell_type'] == ct
    ax.scatter(spatial[mask, 0], spatial[mask, 1], s=1, label=ct, alpha=0.7)
ax.legend(markerscale=5)
ax.set_aspect('equal')
plt.savefig('spatial_celltypes.png', dpi=150)

Cell Type Frequencies

# Frequencies per image/ROI
freq = adata.obs.groupby(['image_id', 'cell_type']).size().unstack(fill_value=0)
freq_pct = freq.div(freq.sum(axis=1), axis=0) * 100

# Plot
freq_pct.plot(kind='bar', stacked=True, figsize=(12, 6))
plt.ylabel('Percentage')
plt.title('Cell Type Composition')
plt.tight_layout()
plt.savefig('celltype_frequencies.png')

Save Results

# Add annotations to adata
adata.write('imc_phenotyped.h5ad')

# Export cell types
adata.obs[['cell_id', 'cell_type', 'centroid_x', 'centroid_y']].to_csv('cell_phenotypes.csv', index=False)

Related Skills

cell-segmentation - Generate single-cell data
spatial-analysis - Analyze spatial patterns of cell types
single-cell/cell-annotation - Similar annotation concepts