Skills huggingface
install
source · Clone the upstream repo
git clone https://github.com/TerminalSkills/skills
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/TerminalSkills/skills "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/huggingface" ~/.claude/skills/terminalskills-skills-huggingface && rm -rf "$T"
manifest:
skills/huggingface/SKILL.mdsafety · automated scan (low risk)
This is a pattern-based risk scan, not a security review. Our crawler flagged:
- pip install
Always read a skill's source content before installing. Patterns alone don't mean the skill is malicious — but they warrant attention.
source content
Hugging Face
Installation
# Install core libraries pip install transformers datasets tokenizers accelerate huggingface_hub
# Login to Hugging Face Hub huggingface-cli login # Or set token as environment variable export HF_TOKEN="hf_xxxxxxxxxxxxxxxxxxxx"
Quick Inference with Pipelines
# quick_inference.py — Run inference using built-in pipelines from transformers import pipeline # Text classification classifier = pipeline("sentiment-analysis") result = classifier("I love using Hugging Face!") print(result) # [{'label': 'POSITIVE', 'score': 0.9998}] # Text generation generator = pipeline("text-generation", model="meta-llama/Llama-2-7b-chat-hf") output = generator("Explain transformers in one sentence:", max_new_tokens=50) print(output[0]["generated_text"]) # Named entity recognition ner = pipeline("ner", grouped_entities=True) entities = ner("Hugging Face is based in New York City.") print(entities)
Loading Models and Tokenizers
# load_model.py — Load a model and tokenizer manually for more control from transformers import AutoTokenizer, AutoModelForCausalLM import torch model_name = "mistralai/Mistral-7B-Instruct-v0.2" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto", # Automatically distribute across GPUs load_in_4bit=True, # Quantized loading for less VRAM ) inputs = tokenizer("What is machine learning?", return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens=100, temperature=0.7) print(tokenizer.decode(outputs[0], skip_special_tokens=True))
Tokenizer Usage
# tokenizer_basics.py — Understand tokenization for debugging and preprocessing from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") text = "Hugging Face makes NLP easy!" encoded = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512) print(f"Input IDs: {encoded['input_ids']}") print(f"Tokens: {tokenizer.convert_ids_to_tokens(encoded['input_ids'][0])}") print(f"Token count: {len(encoded['input_ids'][0])}") # Batch encoding texts = ["First sentence.", "Second longer sentence here."] batch = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
Fine-Tuning with Trainer API
# fine_tune.py — Fine-tune a model on a custom dataset using the Trainer API from transformers import ( AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer ) from datasets import load_dataset import numpy as np from sklearn.metrics import accuracy_score model_name = "distilbert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2) dataset = load_dataset("imdb") def tokenize(batch): return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=256) tokenized = dataset.map(tokenize, batched=True) def compute_metrics(eval_pred): logits, labels = eval_pred preds = np.argmax(logits, axis=-1) return {"accuracy": accuracy_score(labels, preds)} training_args = TrainingArguments( output_dir="./results", num_train_epochs=3, per_device_train_batch_size=16, per_device_eval_batch_size=64, eval_strategy="epoch", save_strategy="epoch", learning_rate=2e-5, weight_decay=0.01, logging_steps=100, fp16=True, push_to_hub=False, ) trainer = Trainer( model=model, args=training_args, train_dataset=tokenized["train"].select(range(5000)), eval_dataset=tokenized["test"].select(range(1000)), compute_metrics=compute_metrics, ) trainer.train() trainer.evaluate()
LoRA / PEFT Fine-Tuning
# lora_finetune.py — Parameter-efficient fine-tuning with LoRA from transformers import AutoModelForCausalLM, AutoTokenizer from peft import LoraConfig, get_peft_model, TaskType import torch model_name = "meta-llama/Llama-2-7b-hf" model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto") tokenizer = AutoTokenizer.from_pretrained(model_name) lora_config = LoraConfig( task_type=TaskType.CAUSAL_LM, r=16, lora_alpha=32, lora_dropout=0.05, target_modules=["q_proj", "v_proj", "k_proj", "o_proj"], ) model = get_peft_model(model, lora_config) model.print_trainable_parameters() # trainable params: 4,194,304 || all params: 6,742,609,920 || trainable%: 0.062
Datasets Library
# datasets_usage.py — Load, process, and create custom datasets from datasets import load_dataset, Dataset # Load from Hub squad = load_dataset("squad", split="train[:1000]") print(squad[0]) # Load custom CSV/JSON dataset = Dataset.from_dict({ "text": ["Hello world", "Goodbye world"], "label": [1, 0], }) # Save and load locally dataset.save_to_disk("./my_dataset") loaded = Dataset.load_from_disk("./my_dataset") # Push to Hub dataset.push_to_hub("my-username/my-dataset", private=True)
Publishing Models to Hub
# push_to_hub.py — Save and share your trained model on Hugging Face Hub from transformers import AutoModelForSequenceClassification, AutoTokenizer model_name = "my-org/my-fine-tuned-model" # After training, push model and tokenizer model.push_to_hub(model_name, private=True) tokenizer.push_to_hub(model_name, private=True) # Create a model card from huggingface_hub import ModelCard card = ModelCard.from_template( card_data={"license": "mit", "datasets": ["imdb"], "language": "en"}, template_str="# My Model\n\nFine-tuned DistilBERT on IMDB.\n\n{{ card_data }}", ) card.push_to_hub(model_name)
Inference API
# inference_api.py — Use the serverless Inference API for quick model access from huggingface_hub import InferenceClient client = InferenceClient(token="hf_xxxxxxxxxxxx") # Text generation response = client.text_generation( "The future of AI is", model="mistralai/Mistral-7B-Instruct-v0.2", max_new_tokens=100, ) # Image generation image = client.text_to_image("A cat wearing a space suit") image.save("space_cat.png") # Embeddings embeddings = client.feature_extraction("Hello world", model="sentence-transformers/all-MiniLM-L6-v2")
Key Concepts
- AutoClasses:
,AutoModel
automatically detect the right architectureAutoTokenizer - device_map="auto": Distributes model layers across available GPUs/CPU
- load_in_4bit/8bit: Quantization via bitsandbytes for reduced memory usage
- PEFT/LoRA: Train only a small fraction of parameters — faster and cheaper
- Datasets streaming:
for huge datasets without downloadingload_dataset(..., streaming=True) - Trainer: High-level API handling training loops, evaluation, checkpointing, and logging