Vibe-Skills hypothesis-testing

install

source · Clone the upstream repo

git clone https://github.com/foryourhealth111-pixel/Vibe-Skills

Claude Code · Install into ~/.claude/skills/

T=$(mktemp -d) && git clone --depth=1 https://github.com/foryourhealth111-pixel/Vibe-Skills "$T" && mkdir -p ~/.claude/skills && cp -r "$T/bundled/skills/hypothesis-testing" ~/.claude/skills/foryourhealth111-pixel-vibe-skills-hypothesis-testing && rm -rf "$T"

manifest: bundled/skills/hypothesis-testing/SKILL.md

Hypothesis Property-Based Testing

Hypothesis is a powerful property-based testing library that automatically generates test cases to find edge cases and validate properties of your code.

Core Concept

Traditional example-based testing:

def test_addition():
    assert add(2, 3) == 5
    assert add(0, 0) == 0
    assert add(-1, 1) == 0

Property-based testing with Hypothesis:

from hypothesis import given
import hypothesis.strategies as st

@given(st.integers(), st.integers())
def test_addition_commutative(a, b):
    """Addition is commutative for ALL integers."""
    assert add(a, b) == add(b, a)

Hypothesis generates hundreds of test cases automatically, including edge cases you might not think of.

Installation

# Install hypothesis with pytest integration
uv add --dev hypothesis pytest

# Optional plugins
uv add --dev hypothesis[numpy]      # NumPy strategies
uv add --dev hypothesis[pandas]     # Pandas strategies
uv add --dev hypothesis[django]     # Django model strategies

Configuration

pyproject.toml Configuration

[tool.pytest.ini_options]
# Hypothesis settings
addopts = [
    "--hypothesis-show-statistics",  # Show test statistics
    "--hypothesis-seed=0",            # Reproducible tests (optional)
]

[tool.hypothesis]
# Maximum number of examples to generate
max_examples = 200  # Default: 100, CI: 200+

# Deadline for each test case (milliseconds)
deadline = 1000  # Default: 200ms, None to disable

# Verbosity level (quiet, normal, verbose, debug)
verbosity = "normal"

# Fail fast on first error
derandomize = false  # Set to true for deterministic tests

# Database for example storage
database = ".hypothesis/examples"  # Store found failures

# Profile-specific settings
[tool.hypothesis.profiles.dev]
max_examples = 50
deadline = 1000
verbosity = "normal"

[tool.hypothesis.profiles.ci]
max_examples = 500
deadline = 5000
verbosity = "verbose"

[tool.hypothesis.profiles.debug]
max_examples = 10
deadline = null
verbosity = "debug"

Activate Profile

# tests/conftest.py
from hypothesis import settings, Verbosity

# Set default profile based on environment
import os
if os.getenv("CI"):
    settings.load_profile("ci")
else:
    settings.load_profile("dev")

# Or configure programmatically
settings.register_profile("custom", max_examples=100, deadline=500)
settings.load_profile("custom")

Basic Usage

Simple Property Tests

from hypothesis import given, example
import hypothesis.strategies as st

# Test numeric properties
@given(st.integers())
def test_absolute_value_non_negative(x):
    """abs(x) is always non-negative."""
    assert abs(x) >= 0

@given(st.integers(), st.integers())
def test_addition_associative(a, b, c):
    """Addition is associative: (a + b) + c == a + (b + c)."""
    assert (a + b) + c == a + (b + c)

# Test string properties
@given(st.text())
def test_string_length(s):
    """Length of reversed string equals original."""
    assert len(s[::-1]) == len(s)

@given(st.text(), st.text())
def test_string_concatenation(s1, s2):
    """String concatenation length is sum of lengths."""
    result = s1 + s2
    assert len(result) == len(s1) + len(s2)

# Add explicit examples alongside generated ones
@given(st.integers())
@example(0)
@example(-1)
@example(2**31 - 1)
def test_with_explicit_examples(x):
    """Test with both generated and explicit examples."""
    assert process(x) is not None

Testing Functions

from hypothesis import given, assume
import hypothesis.strategies as st

def safe_divide(a: float, b: float) -> float:
    """Divide a by b, avoiding division by zero."""
    if b == 0:
        raise ValueError("Division by zero")
    return a / b

@given(st.floats(allow_nan=False, allow_infinity=False),
       st.floats(allow_nan=False, allow_infinity=False))
def test_safe_divide(a, b):
    """Test safe_divide with all valid floats."""
    assume(b != 0)  # Skip cases where b is zero

    result = safe_divide(a, b)

    # Properties to verify
    assert isinstance(result, float)
    assert result * b == pytest.approx(a)  # Inverse operation

@given(st.floats())
def test_divide_by_zero_raises(a):
    """Division by zero raises ValueError."""
    with pytest.raises(ValueError, match="Division by zero"):
        safe_divide(a, 0)

Strategies

Built-in Strategies

import hypothesis.strategies as st

# Primitives
st.none()                    # None
st.booleans()                # True/False
st.integers()                # Any integer
st.integers(min_value=0, max_value=100)  # Bounded integers
st.floats()                  # Any float
st.floats(min_value=0.0, max_value=1.0)  # Bounded floats
st.decimals()                # Decimal numbers
st.fractions()               # Fraction objects
st.complex_numbers()         # Complex numbers

# Text and bytes
st.text()                    # Unicode strings
st.text(alphabet="abc")      # Limited alphabet
st.text(min_size=1, max_size=10)  # Bounded length
st.binary()                  # Bytes
st.characters()              # Single characters

# Collections
st.lists(st.integers())      # Lists of integers
st.lists(st.text(), min_size=1, max_size=10)  # Bounded lists
st.tuples(st.integers(), st.text())  # Fixed-size tuples
st.sets(st.integers())       # Sets
st.frozensets(st.text())     # Frozen sets
st.dictionaries(keys=st.text(), values=st.integers())  # Dicts

# Special types
st.uuids()                   # UUID objects
st.datetimes()               # datetime objects
st.dates()                   # date objects
st.times()                   # time objects
st.timedeltas()              # timedelta objects

# Constrained types
st.emails()                  # Valid email addresses
st.ip_addresses()            # IP addresses (v4 and v6)
st.urls()                    # Valid URLs

Composite Strategies

from hypothesis import given
from hypothesis.strategies import composite
import hypothesis.strategies as st

# Define custom strategy
@composite
def users(draw):
    """Generate user objects."""
    return {
        "id": draw(st.integers(min_value=1)),
        "name": draw(st.text(min_size=1, max_size=50)),
        "email": draw(st.emails()),
        "age": draw(st.integers(min_value=0, max_value=120)),
        "active": draw(st.booleans())
    }

@given(users())
def test_user_validation(user):
    """Test user validation with generated users."""
    assert user["id"] > 0
    assert len(user["name"]) > 0
    assert "@" in user["email"]
    assert 0 <= user["age"] <= 120

# Complex composite strategy
@composite
def http_requests(draw):
    """Generate HTTP request objects."""
    method = draw(st.sampled_from(["GET", "POST", "PUT", "DELETE"]))
    path = draw(st.text(alphabet="abcdefghijklmnopqrstuvwxyz/", min_size=1))
    headers = draw(st.dictionaries(
        keys=st.text(alphabet="abcdefghijklmnopqrstuvwxyz-", min_size=1),
        values=st.text()
    ))

    body = None
    if method in ["POST", "PUT"]:
        body = draw(st.one_of(st.none(), st.text(), st.binary()))

    return {
        "method": method,
        "path": f"/{path}",
        "headers": headers,
        "body": body
    }

@given(http_requests())
def test_request_handler(request):
    """Test HTTP request handler with various requests."""
    response = handle_request(request)
    assert response.status_code in [200, 201, 400, 404, 500]

Data Classes and Models

from dataclasses import dataclass
from hypothesis import given
from hypothesis.strategies import builds
import hypothesis.strategies as st

@dataclass
class Point:
    x: float
    y: float

# Generate Point instances
@given(builds(Point, x=st.floats(), y=st.floats()))
def test_point_distance(point):
    """Test distance calculation for points."""
    origin = Point(0.0, 0.0)
    distance = calculate_distance(origin, point)
    assert distance >= 0

# More complex model
@dataclass
class User:
    id: int
    name: str
    email: str
    age: int

# Strategy with validation
def valid_users():
    return builds(
        User,
        id=st.integers(min_value=1),
        name=st.text(min_size=1, max_size=100),
        email=st.emails(),
        age=st.integers(min_value=0, max_value=120)
    )

@given(valid_users())
def test_user_serialization(user):
    """Test user serialization round-trip."""
    json_data = user.to_json()
    restored = User.from_json(json_data)
    assert restored == user

Strategy Combinators

import hypothesis.strategies as st

# one_of: Choose from multiple strategies
st.one_of(st.none(), st.integers(), st.text())

# sampled_from: Sample from a list
st.sampled_from(["admin", "user", "guest"])

# just: Always return a specific value
st.just(42)

# lists with constraints
st.lists(
    st.integers(min_value=0),
    min_size=1,
    max_size=10,
    unique=True  # No duplicates
)

# dictionaries with constraints
st.dictionaries(
    keys=st.text(min_size=1),
    values=st.integers(),
    min_size=1,
    max_size=5
)

# tuples with mixed types
st.tuples(st.integers(), st.text(), st.booleans())

# fixed_dictionaries: Dictionary with specific keys
st.fixed_dictionaries({
    "id": st.integers(min_value=1),
    "name": st.text(),
    "optional": st.one_of(st.none(), st.text())
})

# recursive: Generate recursive structures
json_strategy = st.recursive(
    st.one_of(st.none(), st.booleans(), st.floats(), st.text()),
    lambda children: st.lists(children) | st.dictionaries(st.text(), children),
    max_leaves=10
)

Advanced Patterns

Stateful Testing

from hypothesis.stateful import RuleBasedStateMachine, rule, invariant
import hypothesis.strategies as st

class ShoppingCartMachine(RuleBasedStateMachine):
    """Test shopping cart with stateful operations."""

    def __init__(self):
        super().__init__()
        self.cart = ShoppingCart()
        self.items_added = []

    @rule(item=st.text(min_size=1), quantity=st.integers(min_value=1, max_value=10))
    def add_item(self, item, quantity):
        """Add item to cart."""
        self.cart.add(item, quantity)
        self.items_added.append((item, quantity))

    @rule(item=st.text())
    def remove_item(self, item):
        """Remove item from cart."""
        try:
            self.cart.remove(item)
            self.items_added = [(i, q) for i, q in self.items_added if i != item]
        except ValueError:
            # Item not in cart, expected
            pass

    @rule()
    def clear_cart(self):
        """Clear entire cart."""
        self.cart.clear()
        self.items_added = []

    @invariant()
    def cart_is_consistent(self):
        """Cart item count matches what we've added."""
        expected_items = {item: qty for item, qty in self.items_added}
        actual_items = self.cart.get_items()
        assert expected_items == actual_items

    @invariant()
    def total_is_non_negative(self):
        """Cart total is always non-negative."""
        assert self.cart.get_total() >= 0

# Run the state machine
TestShoppingCart = ShoppingCartMachine.TestCase

Shrinking and Example Database

from hypothesis import given, settings, example
import hypothesis.strategies as st

@given(st.lists(st.integers()))
def test_list_processing(items):
    """Test list processing - Hypothesis will shrink failing examples."""
    result = process_list(items)
    assert result is not None

# When a test fails, Hypothesis:
# 1. Finds the failing input
# 2. Shrinks it to the simplest failing case
# 3. Stores it in .hypothesis/examples database
# 4. Replays it on future runs

# Example: If test fails on [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# Hypothesis shrinks to smallest failing case: [1, 2]

# Disable shrinking for debugging
@given(st.lists(st.integers()))
@settings(max_examples=100, phases=["generate"])  # Skip shrink phase
def test_without_shrinking(items):
    """Test without shrinking for faster debugging."""
    assert process_list(items) is not None

Targeted Property Testing

from hypothesis import given, target
import hypothesis.strategies as st

@given(st.lists(st.integers()))
def test_sort_with_targeting(items):
    """Guide Hypothesis toward larger lists."""
    # Target larger lists to test scalability
    target(float(len(items)))

    sorted_items = sorted(items)
    assert all(sorted_items[i] <= sorted_items[i+1]
               for i in range(len(sorted_items) - 1))

@given(st.floats(min_value=0.0, max_value=1.0))
def test_with_edge_targeting(probability):
    """Guide Hypothesis toward edge values (0.0 and 1.0)."""
    # Target values close to edges
    target(abs(probability - 0.5))  # Prefer extreme values

    result = simulate_with_probability(probability)
    assert 0 <= result <= 1

Hypothesis with Async Code

import pytest
from hypothesis import given
import hypothesis.strategies as st

# Async property test
@pytest.mark.asyncio
@given(st.integers())
async def test_async_function(value):
    """Test async function with property-based testing."""
    result = await async_process(value)
    assert result is not None

# Async with composite strategies
@pytest.mark.asyncio
@given(st.lists(st.integers(), min_size=1))
async def test_async_batch_processing(items):
    """Test async batch processing."""
    results = await process_batch(items)
    assert len(results) == len(items)
    assert all(r is not None for r in results)

When to Use Hypothesis vs Example-Based Tests

Use Hypothesis (Property-Based) When:

Testing mathematical properties

@given(st.integers(), st.integers())
def test_addition_commutative(a, b):
    assert a + b == b + a

Testing invariants across many inputs

@given(st.lists(st.integers()))
def test_sort_idempotent(items):
    sorted_once = sorted(items)
    sorted_twice = sorted(sorted_once)
    assert sorted_once == sorted_twice

Finding edge cases

@given(st.text())
def test_parse_input(text):
    # Hypothesis will find weird edge cases like:
    # "", "\x00", "🔥", very long strings, etc.
    result = parse(text)
    assert result is not None

Testing serialization round-trips

@given(st.from_type(MyData))
def test_serialization(data):
    json_str = data.to_json()
    restored = MyData.from_json(json_str)
    assert restored == data

Testing APIs with many parameters

@given(
    st.text(),
    st.integers(min_value=1),
    st.booleans(),
    st.sampled_from(["option1", "option2"])
)
def test_api_call(name, count, flag, option):
    response = api_call(name, count, flag, option)
    assert response.status in [200, 400]

Use Example-Based Tests When:

Testing specific known edge cases

def test_empty_list():
    assert process([]) == []

def test_single_item():
    assert process([1]) == [1]

Testing exact business logic

def test_discount_calculation():
    # Exact prices from requirements
    assert calculate_discount(100, 0.1) == 10
    assert calculate_discount(50, 0.2) == 10

Testing error messages

def test_validation_error_message():
    with pytest.raises(ValueError, match="Email must contain @"):
        validate_email("invalid")

Testing integration with external systems

def test_api_integration():
    # Specific API behavior
    response = api.get("/users/123")
    assert response["name"] == "Test User"

Testing UI behavior

def test_button_click():
    button.click()
    assert button.text == "Clicked"

Hybrid Approach (Best Practice)

from hypothesis import given, example
import hypothesis.strategies as st

# Combine property-based + example-based
@given(st.integers())
@example(0)           # Explicit edge case
@example(-1)          # Explicit edge case
@example(2**31 - 1)   # Explicit edge case
def test_absolute_value(x):
    """Test with both generated and explicit examples."""
    result = abs(x)
    assert result >= 0
    assert result == abs(-x)

# Property test + regression test
@given(st.lists(st.integers()))
@example([1, 2, 3])  # Known good case
@example([])         # Edge case
@example([42] * 1000)  # Performance edge case
def test_list_processing(items):
    """Test general property + specific known cases."""
    result = process_list(items)
    assert len(result) == len(items)

Best Practices

1. Start with Simple Properties

# Start simple
@given(st.integers())
def test_increment(x):
    assert x + 1 > x

# Then add complexity
@given(st.integers(), st.integers())
def test_addition_properties(a, b):
    # Commutative
    assert a + b == b + a
    # Associative
    assert (a + 1) + b == a + (1 + b)

2. Use assume() Sparingly

# BAD: Too many assumes (slow)
@given(st.integers(), st.integers())
def test_slow(a, b):
    assume(a > 0)
    assume(b > 0)
    assume(a < 100)
    assume(b < 100)
    assert a + b < 200

# GOOD: Use constrained strategies
@given(st.integers(min_value=1, max_value=99),
       st.integers(min_value=1, max_value=99))
def test_fast(a, b):
    assert a + b < 200

3. Test Invariants, Not Implementation

# BAD: Tests implementation details
@given(st.lists(st.integers()))
def test_sort_implementation(items):
    result = my_sort(items)
    # Checks specific algorithm behavior
    assert result.pivot_index == len(items) // 2

# GOOD: Tests invariants
@given(st.lists(st.integers()))
def test_sort_properties(items):
    result = my_sort(items)
    # Checks output properties
    assert len(result) == len(items)
    assert sorted(result) == result
    assert set(result) == set(items)

4. Use @example() for Regression Tests

@given(st.lists(st.integers()))
@example([])              # Found bug with empty list
@example([1, 1, 1])       # Found bug with duplicates
@example([-2**31])        # Found bug with min int
def test_with_regressions(items):
    """Property test + regression tests."""
    result = process(items)
    assert result is not None

5. Configure for Different Environments

from hypothesis import given, settings, Verbosity

# Development: Fast feedback
@given(st.lists(st.integers()))
@settings(max_examples=50, deadline=500)
def test_dev(items):
    assert process(items) is not None

# CI: Thorough testing
@given(st.lists(st.integers()))
@settings(max_examples=500, deadline=5000, verbosity=Verbosity.verbose)
def test_ci(items):
    assert process(items) is not None

Common Patterns

Testing Encoding/Decoding

@given(st.from_type(MyData))
def test_json_roundtrip(data):
    """JSON encoding/decoding preserves data."""
    json_str = data.to_json()
    restored = MyData.from_json(json_str)
    assert restored == data

@given(st.binary())
def test_base64_roundtrip(data):
    """Base64 encoding/decoding preserves data."""
    encoded = base64.b64encode(data)
    decoded = base64.b64decode(encoded)
    assert decoded == data

Testing Parsers

@given(st.text())
def test_parser_does_not_crash(text):
    """Parser handles any input without crashing."""
    try:
        result = parse(text)
        # If parsing succeeds, verify result is valid
        assert isinstance(result, ParsedData)
    except ParseError:
        # Parse errors are expected for invalid input
        pass

Testing Database Operations

@given(st.lists(valid_users(), max_size=10))
def test_batch_insert(users):
    """Batch insert preserves all users."""
    db.batch_insert(users)

    for user in users:
        retrieved = db.get_user(user.id)
        assert retrieved == user

@given(valid_users())
def test_update_preserves_id(user):
    """Updating user preserves ID."""
    db.save(user)
    original_id = user.id

    user.name = "Updated Name"
    db.save(user)

    assert user.id == original_id

CI Integration

# .github/workflows/test.yml
name: Tests

on: [push, pull_request]

jobs:
  test:
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v4

      - name: Install uv
        uses: astral-sh/setup-uv@v2

      - name: Set up Python
        run: uv python install 3.12

      - name: Install dependencies
        run: uv sync --all-extras --dev

      - name: Run hypothesis tests (CI profile)
        run: |
          uv run pytest \
            --hypothesis-show-statistics \
            --hypothesis-profile=ci \
            --hypothesis-seed=${{ github.run_number }}

      - name: Upload hypothesis database
        uses: actions/upload-artifact@v4
        if: failure()
        with:
          name: hypothesis-examples
          path: .hypothesis/

Debugging Failing Tests

from hypothesis import given, settings, Verbosity, Phase
import hypothesis.strategies as st

# Debug mode: Verbose output, no shrinking
@given(st.lists(st.integers()))
@settings(
    verbosity=Verbosity.debug,
    max_examples=10,
    phases=[Phase.generate],  # Skip shrinking
    print_blob=True           # Print input data
)
def test_debug(items):
    """Debug failing test with full output."""
    result = buggy_function(items)
    assert result is not None

# Reproduce specific failing example
@given(st.lists(st.integers()))
@example([1, 2, -2147483648])  # Specific failing case
def test_reproduce_failure(items):
    """Reproduce and fix specific failure."""
    result = process(items)
    assert result is not None

Resources

Hypothesis Documentation: https://hypothesis.readthedocs.io/
Hypothesis for Property-Based Testing: https://increment.com/testing/in-praise-of-property-based-testing/
PBT vs Example-Based: https://hypothesis.works/articles/what-is-property-based-testing/
Hypothesis Strategies: https://hypothesis.readthedocs.io/en/latest/data.html
Stateful Testing: https://hypothesis.readthedocs.io/en/latest/stateful.html

Summary

Hypothesis provides property-based testing for Python:

@given decorator: Generate test inputs automatically
Strategies: Built-in and custom data generators
Shrinking: Automatically minimize failing examples
Stateful testing: Test complex state machines
Example database: Store and replay failing cases
Use for: Properties, invariants, round-trips, edge case discovery
Combine with: Example-based tests for comprehensive coverage
CI integration: Run with more examples in CI environments