Claude-code-plugins openrouter-sdk-patterns
install
source · Clone the upstream repo
git clone https://github.com/jeremylongshore/claude-code-plugins-plus-skills
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/jeremylongshore/claude-code-plugins-plus-skills "$T" && mkdir -p ~/.claude/skills && cp -r "$T/plugins/saas-packs/openrouter-pack/skills/openrouter-sdk-patterns" ~/.claude/skills/jeremylongshore-claude-code-plugins-openrouter-sdk-patterns && rm -rf "$T"
manifest:
plugins/saas-packs/openrouter-pack/skills/openrouter-sdk-patterns/SKILL.mdsource content
OpenRouter SDK Patterns
Overview
Build production-grade OpenRouter client wrappers using the OpenAI SDK. The OpenAI Python/TypeScript SDKs work natively with OpenRouter by changing
base_url to https://openrouter.ai/api/v1. This skill covers typed wrappers, retry strategies, middleware, and reusable patterns.
Python: Production Client Wrapper
import os, time, hashlib, json, logging from dataclasses import dataclass from typing import Optional from openai import OpenAI, APIError, RateLimitError, APITimeoutError log = logging.getLogger("openrouter") @dataclass class CompletionResult: content: str model: str prompt_tokens: int completion_tokens: int generation_id: str latency_ms: float class OpenRouterClient: def __init__( self, api_key: Optional[str] = None, app_name: str = "my-app", app_url: str = "https://my-app.com", max_retries: int = 3, timeout: float = 60.0, ): self.client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key=api_key or os.environ["OPENROUTER_API_KEY"], max_retries=max_retries, # Built-in SDK retry with backoff timeout=timeout, default_headers={ "HTTP-Referer": app_url, "X-Title": app_name, }, ) self._cache: dict[str, CompletionResult] = {} def complete( self, prompt: str, model: str = "anthropic/claude-3.5-sonnet", system: str = "", max_tokens: int = 1024, temperature: float = 0.7, cache: bool = False, **extra_params, ) -> CompletionResult: messages = [] if system: messages.append({"role": "system", "content": system}) messages.append({"role": "user", "content": prompt}) # Optional caching (deterministic requests only) cache_key = None if cache and temperature == 0: cache_key = hashlib.sha256( json.dumps({"model": model, "messages": messages, "max_tokens": max_tokens}).encode() ).hexdigest() if cache_key in self._cache: log.debug(f"Cache hit: {cache_key[:12]}") return self._cache[cache_key] start = time.monotonic() response = self.client.chat.completions.create( model=model, messages=messages, max_tokens=max_tokens, temperature=temperature, **extra_params, ) latency = (time.monotonic() - start) * 1000 result = CompletionResult( content=response.choices[0].message.content or "", model=response.model, prompt_tokens=response.usage.prompt_tokens, completion_tokens=response.usage.completion_tokens, generation_id=response.id, latency_ms=round(latency, 1), ) log.info(f"[{result.model}] {result.prompt_tokens}+{result.completion_tokens} tokens, {result.latency_ms}ms") if cache_key: self._cache[cache_key] = result return result def check_credits(self) -> dict: """Check remaining credits and rate limits.""" import requests resp = requests.get( "https://openrouter.ai/api/v1/auth/key", headers={"Authorization": f"Bearer {self.client.api_key}"}, ) return resp.json()["data"] # Usage or_client = OpenRouterClient(app_name="my-saas") result = or_client.complete("Explain recursion", model="openai/gpt-4o-mini", max_tokens=200) print(f"{result.content}\n---\n{result.model} | {result.latency_ms}ms | {result.prompt_tokens}+{result.completion_tokens} tokens")
TypeScript: Production Client Wrapper
import OpenAI from "openai"; interface CompletionResult { content: string; model: string; promptTokens: number; completionTokens: number; generationId: string; latencyMs: number; } class OpenRouterClient { private client: OpenAI; constructor(opts: { apiKey?: string; appName?: string; appUrl?: string } = {}) { this.client = new OpenAI({ baseURL: "https://openrouter.ai/api/v1", apiKey: opts.apiKey ?? process.env.OPENROUTER_API_KEY, maxRetries: 3, timeout: 60_000, defaultHeaders: { "HTTP-Referer": opts.appUrl ?? "https://my-app.com", "X-Title": opts.appName ?? "My App", }, }); } async complete( prompt: string, opts: { model?: string; system?: string; maxTokens?: number; temperature?: number } = {} ): Promise<CompletionResult> { const messages: OpenAI.ChatCompletionMessageParam[] = []; if (opts.system) messages.push({ role: "system", content: opts.system }); messages.push({ role: "user", content: prompt }); const start = performance.now(); const res = await this.client.chat.completions.create({ model: opts.model ?? "anthropic/claude-3.5-sonnet", messages, max_tokens: opts.maxTokens ?? 1024, temperature: opts.temperature ?? 0.7, }); const latency = Math.round(performance.now() - start); return { content: res.choices[0].message.content ?? "", model: res.model, promptTokens: res.usage?.prompt_tokens ?? 0, completionTokens: res.usage?.completion_tokens ?? 0, generationId: res.id, latencyMs: latency, }; } } // Usage const or = new OpenRouterClient({ appName: "my-saas" }); const result = await or.complete("Explain recursion", { model: "openai/gpt-4o-mini", maxTokens: 200 }); console.log(result.content, `\n${result.model} | ${result.latencyMs}ms`);
Retry Strategy
The OpenAI SDK has built-in retries with exponential backoff for:
- 429 (rate limit) -- respects
headerRetry-After - 5xx (server errors) -- retries with backoff
- Connection errors -- retries on network failures
# Configure via constructor client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key="sk-or-v1-...", max_retries=5, # Default is 2 timeout=120.0, # Per-request timeout in seconds )
For custom retry logic beyond the SDK:
import tenacity @tenacity.retry( retry=tenacity.retry_if_exception_type((RateLimitError, APITimeoutError)), wait=tenacity.wait_exponential(min=1, max=60), stop=tenacity.stop_after_attempt(5), before_sleep=lambda state: log.warning(f"Retry {state.attempt_number}: {state.outcome.exception()}"), ) def robust_complete(client, **kwargs): return client.chat.completions.create(**kwargs)
Middleware Pattern
from functools import wraps from typing import Callable def with_cost_tracking(fn: Callable) -> Callable: """Middleware that logs cost per request.""" total_cost = {"value": 0.0} @wraps(fn) def wrapper(*args, **kwargs): result = fn(*args, **kwargs) # Query generation cost asynchronously import requests gen = requests.get( f"https://openrouter.ai/api/v1/generation?id={result.id}", headers={"Authorization": f"Bearer {args[0].api_key}"}, ).json() cost = float(gen.get("data", {}).get("total_cost", 0)) total_cost["value"] += cost log.info(f"Request cost: ${cost:.6f} | Session total: ${total_cost['value']:.4f}") return result wrapper.total_cost = total_cost return wrapper
Error Handling
| Exception | HTTP | Cause | Fix |
|---|---|---|---|
| 401 | Bad API key | Check |
| 429 | Too many requests | SDK auto-retries; increase |
| -- | Response too slow | Increase ; use streaming |
| 400 | Invalid params | Check model ID, messages format |
Enterprise Considerations
- Centralize all OpenRouter calls through a single client wrapper for consistent logging, retries, and cost tracking
- Type all response shapes with dataclasses/interfaces for compile-time safety
- Use dependency injection to swap between OpenRouter and direct provider clients in tests
- Set
based on your SLA (2 for interactive, 5 for batch)max_retries - Wrap middleware in try/catch so instrumentation never breaks the main request flow