Claude-code-plugins-plus-skills openrouter-routing-rules
install
source · Clone the upstream repo
git clone https://github.com/jeremylongshore/claude-code-plugins-plus-skills
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/jeremylongshore/claude-code-plugins-plus-skills "$T" && mkdir -p ~/.claude/skills && cp -r "$T/plugins/saas-packs/openrouter-pack/skills/openrouter-routing-rules" ~/.claude/skills/jeremylongshore-claude-code-plugins-plus-skills-openrouter-routing-rules && rm -rf "$T"
manifest:
plugins/saas-packs/openrouter-pack/skills/openrouter-routing-rules/SKILL.mdsource content
OpenRouter Routing Rules
Overview
Beyond simple task-based model selection, production systems need configurable routing rules that consider user tier, cost budget, time of day, model availability, and feature requirements. This skill covers building a rules engine for OpenRouter model selection with config-driven rules, dynamic conditions, and override capabilities.
Rules Engine
import os, json, time from dataclasses import dataclass from typing import Optional, Callable from openai import OpenAI client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key=os.environ["OPENROUTER_API_KEY"], default_headers={"HTTP-Referer": "https://my-app.com", "X-Title": "my-app"}, ) @dataclass class RoutingContext: user_tier: str = "free" # "free" | "basic" | "pro" | "enterprise" task_type: str = "general" # "chat" | "code" | "analysis" | "classification" budget_remaining: float = 0.0 # Remaining daily budget in dollars prompt_tokens_est: int = 0 # Estimated prompt tokens needs_tools: bool = False # Requires function calling needs_vision: bool = False # Requires image input max_latency_ms: int = 30000 # Latency SLA @dataclass class RoutingRule: name: str priority: int # Lower = higher priority condition: Callable[[RoutingContext], bool] model: str fallbacks: list[str] = None max_tokens: int = 1024 def matches(self, ctx: RoutingContext) -> bool: try: return self.condition(ctx) except Exception: return False # Define rules in priority order RULES = [ # Rule 1: Free users get free models only RoutingRule( name="free-tier", priority=1, condition=lambda ctx: ctx.user_tier == "free", model="google/gemma-2-9b-it:free", fallbacks=["meta-llama/llama-3.1-8b-instruct"], max_tokens=512, ), # Rule 2: Low budget → cheap models RoutingRule( name="low-budget", priority=2, condition=lambda ctx: ctx.budget_remaining < 1.0 and ctx.user_tier != "enterprise", model="openai/gpt-4o-mini", fallbacks=["meta-llama/llama-3.1-8b-instruct"], max_tokens=512, ), # Rule 3: Tool calling required → tool-capable models RoutingRule( name="tools-required", priority=3, condition=lambda ctx: ctx.needs_tools, model="openai/gpt-4o", fallbacks=["anthropic/claude-3.5-sonnet"], ), # Rule 4: Vision required RoutingRule( name="vision-required", priority=4, condition=lambda ctx: ctx.needs_vision, model="openai/gpt-4o", fallbacks=["anthropic/claude-3.5-sonnet", "google/gemini-2.0-flash-001"], ), # Rule 5: Code tasks → Claude RoutingRule( name="code-tasks", priority=5, condition=lambda ctx: ctx.task_type == "code", model="anthropic/claude-3.5-sonnet", fallbacks=["openai/gpt-4o"], ), # Rule 6: Latency-sensitive → fast models RoutingRule( name="low-latency", priority=6, condition=lambda ctx: ctx.max_latency_ms < 3000, model="openai/gpt-4o-mini", fallbacks=["anthropic/claude-3-haiku"], ), # Rule 7: Enterprise gets premium RoutingRule( name="enterprise-default", priority=7, condition=lambda ctx: ctx.user_tier == "enterprise", model="anthropic/claude-3.5-sonnet", fallbacks=["openai/gpt-4o", "openai/gpt-4o-mini"], ), # Rule 8: Default catch-all RoutingRule( name="default", priority=99, condition=lambda ctx: True, # Always matches model="openai/gpt-4o-mini", fallbacks=["meta-llama/llama-3.1-8b-instruct"], ), ] def evaluate_rules(ctx: RoutingContext) -> RoutingRule: """Find the first matching rule (sorted by priority).""" sorted_rules = sorted(RULES, key=lambda r: r.priority) for rule in sorted_rules: if rule.matches(ctx): return rule return sorted_rules[-1] # Default catch-all
Config-Driven Rules (JSON)
RULES_CONFIG = { "rules": [ { "name": "free-tier", "priority": 1, "conditions": {"user_tier": "free"}, "model": "google/gemma-2-9b-it:free", "max_tokens": 512, }, { "name": "code-pro", "priority": 5, "conditions": {"task_type": "code", "user_tier": ["pro", "enterprise"]}, "model": "anthropic/claude-3.5-sonnet", "max_tokens": 2048, }, { "name": "default", "priority": 99, "conditions": {}, "model": "openai/gpt-4o-mini", }, ] } def match_config_rule(ctx: RoutingContext, rule_config: dict) -> bool: """Match a context against config-driven conditions.""" conditions = rule_config.get("conditions", {}) for key, expected in conditions.items(): actual = getattr(ctx, key, None) if isinstance(expected, list): if actual not in expected: return False elif actual != expected: return False return True
Routed Completion
def routed_completion(messages: list[dict], ctx: RoutingContext, **kwargs): """Execute completion with rule-based routing.""" rule = evaluate_rules(ctx) extra_body = {} if rule.fallbacks: extra_body = { "models": [rule.model] + rule.fallbacks, "route": "fallback", } response = client.chat.completions.create( model=rule.model, messages=messages, max_tokens=rule.max_tokens, extra_body=extra_body or None, **kwargs, ) return { "content": response.choices[0].message.content, "model": response.model, "rule": rule.name, "tokens": response.usage.prompt_tokens + response.usage.completion_tokens, } # Usage ctx = RoutingContext(user_tier="pro", task_type="code", budget_remaining=50.0) result = routed_completion( [{"role": "user", "content": "Refactor this function..."}], ctx=ctx, ) print(f"Rule: {result['rule']}, Model: {result['model']}")
A/B Testing Rules
import random def ab_test_routing(ctx: RoutingContext, test_name: str, variant_b_pct: float = 0.10): """Route a percentage of traffic to variant B for comparison.""" rule = evaluate_rules(ctx) if random.random() < variant_b_pct: # Variant B: try a different model return RoutingRule( name=f"{rule.name}:variant-b", priority=rule.priority, condition=rule.condition, model="openai/gpt-4o", # Test against a different model fallbacks=rule.fallbacks, max_tokens=rule.max_tokens, ) return rule
Error Handling
| Error | Cause | Fix |
|---|---|---|
| No rule matched | Missing default catch-all | Always include a default rule |
| Rule condition error | Dynamic check raised exception | Wrap condition in try/catch; return False on error |
| Wrong model selected | Rule priority incorrect | Log matching rule name; review priority ordering |
| Config parse error | Invalid JSON rule definition | Validate config at startup; fail fast |
Enterprise Considerations
- Store rules in a config file or database for hot-reloading without redeployment
- Log every routing decision (rule name, model, context) for analytics and debugging
- Use A/B testing to validate rule changes before full rollout
- Always include a default catch-all rule with a reliable, affordable model
- Version your rule configurations and track changes alongside code deployments
- Combine routing rules with budget enforcement (see openrouter-cost-controls)