install
source · Clone the upstream repo
git clone https://github.com/FreedomIntelligence/OpenClaw-Medical-Skills
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/FreedomIntelligence/OpenClaw-Medical-Skills "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/bio-expression-matrix-gene-id-mapping" ~/.claude/skills/freedomintelligence-openclaw-medical-skills-bio-expression-matrix-gene-id-mappin && rm -rf "$T"
OpenClaw · Install into ~/.openclaw/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/FreedomIntelligence/OpenClaw-Medical-Skills "$T" && mkdir -p ~/.openclaw/skills && cp -r "$T/skills/bio-expression-matrix-gene-id-mapping" ~/.openclaw/skills/freedomintelligence-openclaw-medical-skills-bio-expression-matrix-gene-id-mappin && rm -rf "$T"
manifest:
skills/bio-expression-matrix-gene-id-mapping/SKILL.mdsource content
<!--
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
-->
name: bio-expression-matrix-gene-id-mapping description: Convert between gene identifier systems including Ensembl, Entrez, HGNC symbols, and UniProt. Use when mapping IDs for pathway analysis or matching different data sources. tool_type: mixed primary_tool: biomaRt measurable_outcome: Execute skill workflow successfully with valid output within 15 minutes. allowed-tools:
- read_file
- run_shell_command
Gene ID Mapping
Python: mygene
import mygene import pandas as pd mg = mygene.MyGeneInfo() # Ensembl to Symbol ensembl_ids = ['ENSG00000141510', 'ENSG00000012048', 'ENSG00000141736'] results = mg.querymany(ensembl_ids, scopes='ensembl.gene', fields='symbol', species='human') mapping = {r['query']: r.get('symbol', None) for r in results} # {'ENSG00000141510': 'TP53', 'ENSG00000012048': 'BRCA1', 'ENSG00000141736': 'ERBB2'} # Symbol to Entrez symbols = ['TP53', 'BRCA1', 'ERBB2'] results = mg.querymany(symbols, scopes='symbol', fields='entrezgene', species='human') mapping = {r['query']: r.get('entrezgene', None) for r in results} # Ensembl to multiple fields results = mg.querymany(ensembl_ids, scopes='ensembl.gene', fields=['symbol', 'entrezgene', 'uniprot'], species='human')
Python: pyensembl
from pyensembl import EnsemblRelease # Load Ensembl release (downloads automatically first time) ensembl = EnsemblRelease(110, species='human') # or 'mouse' # Gene ID to symbol gene = ensembl.gene_by_id('ENSG00000141510') print(gene.gene_name) # TP53 # Symbol to gene ID gene = ensembl.genes_by_name('TP53')[0] print(gene.gene_id) # ENSG00000141510 # Batch conversion def ensembl_to_symbol(ensembl_ids, release=110): ens = EnsemblRelease(release, species='human') mapping = {} for eid in ensembl_ids: try: gene = ens.gene_by_id(eid.split('.')[0]) # Remove version mapping[eid] = gene.gene_name except ValueError: mapping[eid] = None return mapping
Python: gseapy
import gseapy as gp # Ensembl to Symbol using Enrichr gene_list = ['ENSG00000141510', 'ENSG00000012048'] converted = gp.biomart.ensembl2name(gene_list, organism='hsapiens')
R: biomaRt
library(biomaRt) # Connect to Ensembl ensembl <- useEnsembl(biomart='genes', dataset='hsapiens_gene_ensembl') # Ensembl to Symbol ensembl_ids <- c('ENSG00000141510', 'ENSG00000012048', 'ENSG00000141736') results <- getBM( attributes=c('ensembl_gene_id', 'hgnc_symbol', 'entrezgene_id'), filters='ensembl_gene_id', values=ensembl_ids, mart=ensembl ) # Symbol to Ensembl symbols <- c('TP53', 'BRCA1', 'ERBB2') results <- getBM( attributes=c('hgnc_symbol', 'ensembl_gene_id'), filters='hgnc_symbol', values=symbols, mart=ensembl ) # All available attributes listAttributes(ensembl)
R: org.db Packages
library(org.Hs.eg.db) # Human library(AnnotationDbi) # Ensembl to Symbol ensembl_ids <- c('ENSG00000141510', 'ENSG00000012048') symbols <- mapIds(org.Hs.eg.db, keys=ensembl_ids, keytype='ENSEMBL', column='SYMBOL') # Symbol to Entrez symbols <- c('TP53', 'BRCA1') entrez <- mapIds(org.Hs.eg.db, keys=symbols, keytype='SYMBOL', column='ENTREZID') # Available keytypes keytypes(org.Hs.eg.db) # ENSEMBL, ENSEMBLPROT, ENSEMBLTRANS, ENTREZID, SYMBOL, UNIPROT, etc.
Apply Mapping to Count Matrix
import pandas as pd import mygene def map_count_matrix_ids(counts, from_type='ensembl.gene', to_type='symbol', species='human'): '''Map gene IDs in count matrix index.''' mg = mygene.MyGeneInfo() # Remove version numbers from Ensembl IDs clean_ids = [g.split('.')[0] for g in counts.index] # Query mygene results = mg.querymany(clean_ids, scopes=from_type, fields=to_type, species=species) # Build mapping mapping = {} for r in results: if to_type in r: mapping[r['query']] = r[to_type] # Apply mapping new_index = [mapping.get(g.split('.')[0], g) for g in counts.index] counts_mapped = counts.copy() counts_mapped.index = new_index # Handle duplicates (sum) counts_mapped = counts_mapped.groupby(counts_mapped.index).sum() return counts_mapped # Usage counts_symbols = map_count_matrix_ids(counts, 'ensembl.gene', 'symbol')
R Equivalent
library(biomaRt) map_count_matrix_ids <- function(counts, from_type='ensembl_gene_id', to_type='hgnc_symbol') { ensembl <- useEnsembl(biomart='genes', dataset='hsapiens_gene_ensembl') # Remove version numbers clean_ids <- gsub('\\..*', '', rownames(counts)) # Get mapping mapping <- getBM( attributes=c(from_type, to_type), filters=from_type, values=clean_ids, mart=ensembl ) # Merge and aggregate duplicates counts$gene_id <- clean_ids merged <- merge(counts, mapping, by.x='gene_id', by.y=from_type, all.x=TRUE) merged$gene_id <- NULL # Use symbol as rowname, sum duplicates rownames(merged) <- merged[[to_type]] merged[[to_type]] <- NULL counts_mapped <- aggregate(. ~ rownames(merged), data=merged, FUN=sum) rownames(counts_mapped) <- counts_mapped[,1] counts_mapped <- counts_mapped[,-1] return(counts_mapped) }
Handle Unmapped IDs
def robust_id_mapping(gene_ids, from_type, to_type, species='human'): '''Map IDs with fallback for unmapped genes.''' import mygene mg = mygene.MyGeneInfo() clean_ids = [g.split('.')[0] for g in gene_ids] results = mg.querymany(clean_ids, scopes=from_type, fields=to_type, species=species) mapping = {} unmapped = [] for r in results: original = gene_ids[clean_ids.index(r['query'])] if to_type in r: mapping[original] = r[to_type] else: mapping[original] = original # Keep original if unmapped unmapped.append(original) print(f'Mapped: {len(gene_ids) - len(unmapped)}/{len(gene_ids)}') print(f'Unmapped: {len(unmapped)}') return mapping, unmapped
Common ID Types
| Type | Example | Use Case |
|---|---|---|
| Ensembl Gene | ENSG00000141510 | RNA-seq, GTF files |
| Ensembl Transcript | ENST00000269305 | Transcript-level analysis |
| Entrez Gene | 7157 | NCBI databases, KEGG |
| HGNC Symbol | TP53 | Human readable |
| UniProt | P04637 | Protein databases |
| RefSeq | NM_000546 | NCBI RefSeq |
Related Skills
- expression-matrix/counts-ingest - Load count data
- expression-matrix/metadata-joins - Add annotations
- pathway-analysis/go-enrichment - Requires Entrez IDs
- pathway-analysis/kegg-pathways - Requires Entrez IDs