Skillshub coreweave-sdk-patterns
install
source · Clone the upstream repo
git clone https://github.com/ComeOnOliver/skillshub
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/ComeOnOliver/skillshub "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/jeremylongshore/claude-code-plugins-plus-skills/coreweave-sdk-patterns" ~/.claude/skills/comeonoliver-skillshub-coreweave-sdk-patterns && rm -rf "$T"
manifest:
skills/jeremylongshore/claude-code-plugins-plus-skills/coreweave-sdk-patterns/SKILL.mdsource content
CoreWeave SDK Patterns
Overview
CoreWeave is Kubernetes-native -- use kubectl, Kubernetes Python client, or Helm for programmatic management. These patterns cover GPU-aware deployment templates, inference client wrappers, and node affinity configurations.
Instructions
GPU Affinity Helper
# coreweave_helpers.py from dataclasses import dataclass @dataclass class GPUConfig: gpu_class: str # A100_PCIE_80GB, H100_SXM5, L40, etc. gpu_count: int = 1 memory_gb: int = 32 cpu_cores: int = 4 GPU_CATALOG = { "a100-80gb": GPUConfig("A100_PCIE_80GB", memory_gb=48, cpu_cores=8), "h100-80gb": GPUConfig("H100_SXM5", memory_gb=64, cpu_cores=12), "l40": GPUConfig("L40", memory_gb=24, cpu_cores=4), "a100-8x": GPUConfig("A100_NVLINK_A100_SXM4_80GB", gpu_count=8, memory_gb=256, cpu_cores=64), } def gpu_affinity_block(gpu_class: str) -> dict: return { "nodeAffinity": { "requiredDuringSchedulingIgnoredDuringExecution": { "nodeSelectorTerms": [{ "matchExpressions": [{ "key": "gpu.nvidia.com/class", "operator": "In", "values": [gpu_class], }] }] } } } def gpu_resources(config: GPUConfig) -> dict: return { "limits": { "nvidia.com/gpu": str(config.gpu_count), "memory": f"{config.memory_gb}Gi", "cpu": str(config.cpu_cores), }, "requests": { "nvidia.com/gpu": str(config.gpu_count), "memory": f"{config.memory_gb // 2}Gi", "cpu": str(config.cpu_cores // 2), }, }
Inference Client Wrapper
# inference_client.py import requests from typing import Optional class CoreWeaveInferenceClient: def __init__(self, endpoint: str, timeout: int = 30): self.endpoint = endpoint.rstrip("/") self.timeout = timeout self.session = requests.Session() def generate(self, prompt: str, max_tokens: int = 256, **kwargs) -> str: resp = self.session.post( f"{self.endpoint}/v1/completions", json={"prompt": prompt, "max_tokens": max_tokens, **kwargs}, timeout=self.timeout, ) resp.raise_for_status() return resp.json()["choices"][0]["text"] def chat(self, messages: list[dict], **kwargs) -> str: resp = self.session.post( f"{self.endpoint}/v1/chat/completions", json={"messages": messages, **kwargs}, timeout=self.timeout, ) resp.raise_for_status() return resp.json()["choices"][0]["message"]["content"] def health(self) -> bool: try: resp = self.session.get(f"{self.endpoint}/health", timeout=5) return resp.status_code == 200 except Exception: return False
Deployment Template Generator
import yaml def generate_inference_deployment( name: str, image: str, gpu_type: str = "a100-80gb", replicas: int = 1, port: int = 8000, ) -> str: config = GPU_CATALOG[gpu_type] return yaml.dump({ "apiVersion": "apps/v1", "kind": "Deployment", "metadata": {"name": name}, "spec": { "replicas": replicas, "selector": {"matchLabels": {"app": name}}, "template": { "metadata": {"labels": {"app": name}}, "spec": { "containers": [{ "name": name, "image": image, "ports": [{"containerPort": port}], "resources": gpu_resources(config), }], "affinity": gpu_affinity_block(config.gpu_class), }, }, }, })
Error Handling
| Error | Cause | Solution |
|---|---|---|
| GPU class not found | Typo in node label | Use exact values from |
| OOM on inference | Model too large for GPU | Use larger GPU or quantized model |
| Connection refused | Service not ready | Check pod readiness probe |
Resources
Next Steps
Apply patterns in
coreweave-core-workflow-a for KServe inference deployments.