Claude-skill-registry-data instructor

Structured outputs with Instructor. Extract typed data from LLMs using Pydantic models and validation. Use for data extraction, structured generation, and type-safe LLM responses.

install

source · Clone the upstream repo

git clone https://github.com/majiayu000/claude-skill-registry-data

Claude Code · Install into ~/.claude/skills/

T=$(mktemp -d) && git clone --depth=1 https://github.com/majiayu000/claude-skill-registry-data "$T" && mkdir -p ~/.claude/skills && cp -r "$T/data/instructor-housegarofalo-claude-code-base" ~/.claude/skills/majiayu000-claude-skill-registry-data-instructor && rm -rf "$T"

manifest: data/instructor-housegarofalo-claude-code-base/SKILL.md

source content

Instructor

Expert guidance for structured LLM outputs with Pydantic validation.

Triggers

Use this skill when:

Extracting structured data from LLM responses
Building type-safe LLM applications
Validating LLM outputs with Pydantic
Implementing data extraction pipelines
Working with structured generation
Keywords: instructor, structured output, pydantic, data extraction, validation, typed responses

Installation

pip install instructor

Quick Start

import instructor
from pydantic import BaseModel
from openai import OpenAI

# Patch OpenAI client
client = instructor.from_openai(OpenAI())

class User(BaseModel):
    name: str
    age: int

# Extract structured data
user = client.chat.completions.create(
    model="gpt-4o",
    response_model=User,
    messages=[
        {"role": "user", "content": "John is 25 years old"}
    ]
)

print(user)  # User(name='John', age=25)

Pydantic Models

Basic Models

from pydantic import BaseModel, Field
from typing import Optional, List
from enum import Enum

class Priority(str, Enum):
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"

class Task(BaseModel):
    title: str = Field(description="Task title")
    description: str = Field(description="Detailed description")
    priority: Priority = Field(description="Task priority level")
    due_date: Optional[str] = Field(None, description="Due date in YYYY-MM-DD format")
    tags: List[str] = Field(default_factory=list, description="Related tags")

task = client.chat.completions.create(
    model="gpt-4o",
    response_model=Task,
    messages=[
        {"role": "user", "content": "Create a task for reviewing the Q4 report, high priority, due next Friday"}
    ]
)

Nested Models

from pydantic import BaseModel
from typing import List

class Address(BaseModel):
    street: str
    city: str
    country: str
    postal_code: str

class Person(BaseModel):
    name: str
    email: str
    address: Address
    phone_numbers: List[str]

person = client.chat.completions.create(
    model="gpt-4o",
    response_model=Person,
    messages=[
        {"role": "user", "content": """
        Extract: John Smith, john@email.com, lives at 123 Main St,
        New York, USA 10001. Phone: 555-1234, 555-5678
        """}
    ]
)

Validation

Field Validators

from pydantic import BaseModel, field_validator
from typing import List

class SearchQuery(BaseModel):
    query: str
    filters: List[str]

    @field_validator('query')
    @classmethod
    def query_not_empty(cls, v):
        if not v.strip():
            raise ValueError("Query cannot be empty")
        return v.strip()

    @field_validator('filters')
    @classmethod
    def validate_filters(cls, v):
        valid = ["date", "author", "category"]
        for f in v:
            if f not in valid:
                raise ValueError(f"Invalid filter: {f}")
        return v

Model Validators

from pydantic import BaseModel, model_validator

class DateRange(BaseModel):
    start_date: str
    end_date: str

    @model_validator(mode='after')
    def validate_dates(self):
        if self.start_date > self.end_date:
            raise ValueError("Start date must be before end date")
        return self

Retries

from instructor import from_openai
from tenacity import retry, stop_after_attempt

client = from_openai(
    OpenAI(),
    mode=instructor.Mode.TOOLS
)

# Automatic retries on validation failure
user = client.chat.completions.create(
    model="gpt-4o",
    response_model=User,
    max_retries=3,
    messages=[{"role": "user", "content": "Extract user info"}]
)

Streaming

from instructor import from_openai, Partial
from pydantic import BaseModel
from typing import List

class Report(BaseModel):
    title: str
    sections: List[str]
    summary: str

client = from_openai(OpenAI())

# Stream partial results
for partial in client.chat.completions.create_partial(
    model="gpt-4o",
    response_model=Report,
    messages=[{"role": "user", "content": "Write a report on AI trends"}],
):
    print(partial)  # Partial[Report] with available fields

Iterable Extraction

from instructor import from_openai
from pydantic import BaseModel
from typing import Iterable

class Product(BaseModel):
    name: str
    price: float
    category: str

client = from_openai(OpenAI())

products: Iterable[Product] = client.chat.completions.create_iterable(
    model="gpt-4o",
    response_model=Product,
    messages=[
        {"role": "user", "content": """
        Extract products:
        - iPhone 15 Pro: $999, Electronics
        - Nike Air Max: $150, Footwear
        - MacBook Pro: $2499, Electronics
        """}
    ]
)

for product in products:
    print(product)

Multiple Providers

Anthropic

import instructor
from anthropic import Anthropic

client = instructor.from_anthropic(Anthropic())

user = client.messages.create(
    model="claude-3-5-sonnet-20241022",
    max_tokens=1024,
    response_model=User,
    messages=[{"role": "user", "content": "John is 25"}]
)

Ollama

import instructor
from openai import OpenAI

client = instructor.from_openai(
    OpenAI(base_url="http://localhost:11434/v1", api_key="ollama"),
    mode=instructor.Mode.JSON
)

user = client.chat.completions.create(
    model="llama3.1",
    response_model=User,
    messages=[{"role": "user", "content": "John is 25"}]
)

LiteLLM

import instructor
import litellm

client = instructor.from_litellm(litellm.completion)

user = client(
    model="gpt-4o",
    response_model=User,
    messages=[{"role": "user", "content": "John is 25"}]
)

Advanced Patterns

Chain of Thought

from pydantic import BaseModel, Field

class Reasoning(BaseModel):
    chain_of_thought: str = Field(
        description="Step by step reasoning before the answer"
    )
    answer: str = Field(description="Final answer")

result = client.chat.completions.create(
    model="gpt-4o",
    response_model=Reasoning,
    messages=[
        {"role": "user", "content": "What is 25 * 47?"}
    ]
)
print(result.chain_of_thought)
print(result.answer)

Classification

from enum import Enum, auto

class Sentiment(str, Enum):
    POSITIVE = "positive"
    NEGATIVE = "negative"
    NEUTRAL = "neutral"

class SentimentAnalysis(BaseModel):
    text: str
    sentiment: Sentiment
    confidence: float = Field(ge=0, le=1)

analysis = client.chat.completions.create(
    model="gpt-4o",
    response_model=SentimentAnalysis,
    messages=[
        {"role": "user", "content": "Analyze: 'I love this product!'"}
    ]
)