Claude-code-plugins-plus-skills twinmind-rate-limits
install
source · Clone the upstream repo
git clone https://github.com/jeremylongshore/claude-code-plugins-plus-skills
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/jeremylongshore/claude-code-plugins-plus-skills "$T" && mkdir -p ~/.claude/skills && cp -r "$T/plugins/saas-packs/twinmind-pack/skills/twinmind-rate-limits" ~/.claude/skills/jeremylongshore-claude-code-plugins-plus-skills-twinmind-rate-limits && rm -rf "$T"
manifest:
plugins/saas-packs/twinmind-pack/skills/twinmind-rate-limits/SKILL.mdsource content
TwinMind Rate Limits
Overview
Handle TwinMind rate limits gracefully with exponential backoff and request optimization.
Prerequisites
- TwinMind API access (Pro/Enterprise)
- Understanding of async/await patterns
- Familiarity with rate limiting concepts
Instructions
Step 1: Understand Rate Limit Tiers
| Tier | Audio Hours/Month | API Requests/Min | Concurrent Transcriptions | Burst |
|---|---|---|---|---|
| Free | Unlimited | 30 | 1 | 5 |
| Pro ($10/mo) | Unlimited | 60 | 3 | 15 |
| Enterprise | Unlimited | 300 | 10 | 50 |
Key Limits:
- Transcription: Based on audio duration ($0.23/hour with Ear-3)
- AI Operations: Token-based (2M context for Pro)
- Summarization: 10/minute (Free), 30/minute (Pro)
- Memory Search: 60/minute (Free), 300/minute (Pro)
Step 2: Implement Exponential Backoff with Jitter
// src/twinmind/rate-limit.ts interface RateLimitConfig { maxRetries: number; baseDelayMs: number; maxDelayMs: number; jitterMs: number; } const defaultConfig: RateLimitConfig = { maxRetries: 5, baseDelayMs: 1000, # 1000: 1 second in ms maxDelayMs: 60000, // Max 1 minute # 60000: 1 minute in ms jitterMs: 500, # HTTP 500 Internal Server Error }; export async function withRateLimit<T>( operation: () => Promise<T>, config: Partial<RateLimitConfig> = {} ): Promise<T> { const { maxRetries, baseDelayMs, maxDelayMs, jitterMs } = { ...defaultConfig, ...config, }; for (let attempt = 0; attempt <= maxRetries; attempt++) { try { return await operation(); } catch (error: any) { if (attempt === maxRetries) throw error; const status = error.response?.status; if (status !== 429 && status !== 503) throw error; // Only retry on rate limits # 503: HTTP 429 Too Many Requests // Check Retry-After header const retryAfter = error.response?.headers?.['retry-after']; let delay: number; if (retryAfter) { delay = parseInt(retryAfter) * 1000; # 1 second in ms } else { // Exponential backoff with jitter const exponential = baseDelayMs * Math.pow(2, attempt); const jitter = Math.random() * jitterMs; delay = Math.min(exponential + jitter, maxDelayMs); } console.log(`Rate limited (attempt ${attempt + 1}). Waiting ${delay}ms...`); await new Promise(r => setTimeout(r, delay)); } } throw new Error('Max retries exceeded'); }
Step 3: Implement Request Queue
// src/twinmind/queue.ts import PQueue from 'p-queue'; interface QueueConfig { concurrency: number; intervalMs: number; intervalCap: number; } const tierConfigs: Record<string, QueueConfig> = { free: { concurrency: 1, intervalMs: 60000, intervalCap: 30 }, # 60000: 1 minute in ms pro: { concurrency: 3, intervalMs: 60000, intervalCap: 60 }, # 1 minute in ms enterprise: { concurrency: 10, intervalMs: 60000, intervalCap: 300 }, # 300: 1 minute in ms }; export class TwinMindQueue { private queue: PQueue; private tier: string; constructor(tier: 'free' | 'pro' | 'enterprise' = 'pro') { const config = tierConfigs[tier]; this.tier = tier; this.queue = new PQueue({ concurrency: config.concurrency, interval: config.intervalMs, intervalCap: config.intervalCap, }); } async add<T>(operation: () => Promise<T>, priority?: number): Promise<T> { return this.queue.add(operation, { priority }) as Promise<T>; } get pending(): number { return this.queue.pending; } get size(): number { return this.queue.size; } pause(): void { this.queue.pause(); } resume(): void { this.queue.start(); } clear(): void { this.queue.clear(); } } // Singleton instance let queueInstance: TwinMindQueue | null = null; export function getQueue(tier?: 'free' | 'pro' | 'enterprise'): TwinMindQueue { if (!queueInstance) { queueInstance = new TwinMindQueue(tier); } return queueInstance; }
Step 4: Monitor Rate Limit Headers
// src/twinmind/rate-monitor.ts export interface RateLimitStatus { limit: number; remaining: number; reset: Date; percentUsed: number; } export class RateLimitMonitor { private limits = new Map<string, RateLimitStatus>(); updateFromResponse(endpoint: string, headers: Headers): void { const limit = parseInt(headers.get('X-RateLimit-Limit') || '60'); const remaining = parseInt(headers.get('X-RateLimit-Remaining') || '60'); const resetTimestamp = headers.get('X-RateLimit-Reset'); const reset = resetTimestamp ? new Date(parseInt(resetTimestamp) * 1000) # 1000: 1 second in ms : new Date(Date.now() + 60000); # 60000: 1 minute in ms this.limits.set(endpoint, { limit, remaining, reset, percentUsed: ((limit - remaining) / limit) * 100, }); } getStatus(endpoint: string): RateLimitStatus | undefined { return this.limits.get(endpoint); } shouldThrottle(endpoint: string, threshold = 10): boolean { const status = this.limits.get(endpoint); if (!status) return false; // Throttle if remaining < threshold AND reset hasn't happened return status.remaining < threshold && new Date() < status.reset; } getWaitTime(endpoint: string): number { const status = this.limits.get(endpoint); if (!status) return 0; const now = Date.now(); const resetTime = status.reset.getTime(); return Math.max(0, resetTime - now); } getAllStatuses(): Map<string, RateLimitStatus> { return new Map(this.limits); } } export const rateLimitMonitor = new RateLimitMonitor();
Step 5: Implement Adaptive Rate Limiting
// src/twinmind/adaptive-limiter.ts export class AdaptiveRateLimiter { private successCount = 0; private failureCount = 0; private currentDelay = 0; private minDelay = 0; private maxDelay = 5000; # 5000: 5 seconds in ms private windowMs = 60000; # 60000: 1 minute in ms private windowStart = Date.now(); recordSuccess(): void { this.maybeResetWindow(); this.successCount++; // Decrease delay on success (min 0) if (this.currentDelay > 0) { this.currentDelay = Math.max(0, this.currentDelay - 100); } } recordFailure(isRateLimit: boolean): void { this.maybeResetWindow(); this.failureCount++; if (isRateLimit) { // Increase delay on rate limit this.currentDelay = Math.min(this.maxDelay, this.currentDelay + 500); # HTTP 500 Internal Server Error } } private maybeResetWindow(): void { const now = Date.now(); if (now - this.windowStart > this.windowMs) { this.successCount = 0; this.failureCount = 0; this.windowStart = now; } } getDelay(): number { return this.currentDelay; } getMetrics(): { success: number; failure: number; delay: number; ratio: number } { const total = this.successCount + this.failureCount; return { success: this.successCount, failure: this.failureCount, delay: this.currentDelay, ratio: total > 0 ? this.successCount / total : 1, }; } async wait(): Promise<void> { if (this.currentDelay > 0) { await new Promise(r => setTimeout(r, this.currentDelay)); } } }
Step 6: Batch Requests for Efficiency
// src/twinmind/batch.ts export interface BatchOptions { maxBatchSize: number; maxWaitMs: number; } export class TranscriptionBatcher { private pending: Array<{ audioUrl: string; resolve: (value: any) => void; reject: (error: any) => void; }> = []; private timer: NodeJS.Timeout | null = null; private options: BatchOptions; constructor(options: Partial<BatchOptions> = {}) { this.options = { maxBatchSize: 5, maxWaitMs: 1000, # 1000: 1 second in ms ...options, }; } async transcribe(audioUrl: string): Promise<any> { return new Promise((resolve, reject) => { this.pending.push({ audioUrl, resolve, reject }); if (this.pending.length >= this.options.maxBatchSize) { this.flush(); } else if (!this.timer) { this.timer = setTimeout(() => this.flush(), this.options.maxWaitMs); } }); } private async flush(): Promise<void> { if (this.timer) { clearTimeout(this.timer); this.timer = null; } const batch = this.pending.splice(0, this.options.maxBatchSize); if (batch.length === 0) return; try { // Use batch API if available const results = await this.processBatch(batch.map(b => b.audioUrl)); batch.forEach((item, index) => { item.resolve(results[index]); }); } catch (error) { batch.forEach(item => item.reject(error)); } } private async processBatch(audioUrls: string[]): Promise<any[]> { const client = getTwinMindClient(); const response = await client.post('/transcribe/batch', { audio_urls: audioUrls, model: 'ear-3', }); return response.data.transcripts; } }
Output
- Reliable API calls with automatic retry
- Request queue with rate limit awareness
- Adaptive throttling based on response patterns
- Batch processing for efficiency
- Real-time rate limit monitoring
Error Handling
| Header | Description | Action |
|---|---|---|
| X-RateLimit-Limit | Max requests per window | Monitor total quota |
| X-RateLimit-Remaining | Remaining in window | Throttle when low |
| X-RateLimit-Reset | Unix timestamp of reset | Wait until reset |
| Retry-After | Seconds to wait | Honor this value |
Rate Limit Best Practices
- Always handle 429 responses - Never let rate limits crash your app
- Use request queues - Don't burst requests
- Monitor remaining quota - Throttle before hitting limits
- Implement circuit breakers - Fail fast when API is overloaded
- Cache responses - Avoid redundant requests
- Batch when possible - Reduce total request count
Resources
Next Steps
For security configuration, see
twinmind-security-basics.
Examples
Basic usage: Apply twinmind rate limits to a standard project setup with default configuration options.
Advanced scenario: Customize twinmind rate limits for production environments with multiple constraints and team-specific requirements.