Claude-skill-registry langfuse-rate-limits

install

source · Clone the upstream repo

git clone https://github.com/majiayu000/claude-skill-registry

Claude Code · Install into ~/.claude/skills/

T=$(mktemp -d) && git clone --depth=1 https://github.com/majiayu000/claude-skill-registry "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/data/langfuse-rate-limits" ~/.claude/skills/majiayu000-claude-skill-registry-langfuse-rate-limits && rm -rf "$T"

manifest: skills/data/langfuse-rate-limits/SKILL.md

Langfuse Rate Limits

Overview

Handle Langfuse rate limits gracefully with batching and backoff strategies.

Prerequisites

Langfuse SDK installed
Understanding of async/await patterns
High-volume trace workload

Rate Limit Tiers

Tier	Events/min	Events/hour	Batch Size
Free	1,000	10,000	15
Pro	10,000	100,000	50
Enterprise	Custom	Custom	Custom

Instructions

Step 1: Configure Optimal Batching

import { Langfuse } from "langfuse";

// High-volume configuration
const langfuse = new Langfuse({
  publicKey: process.env.LANGFUSE_PUBLIC_KEY!,
  secretKey: process.env.LANGFUSE_SECRET_KEY!,
  // Batching settings
  flushAt: 50,           // Batch 50 events before sending
  flushInterval: 5000,   // Or flush every 5 seconds
  // Timeout settings
  requestTimeout: 30000, // 30 second timeout for large batches
});

Step 2: Implement Exponential Backoff

interface RetryConfig {
  maxRetries: number;
  baseDelayMs: number;
  maxDelayMs: number;
  jitterMs: number;
}

async function withBackoff<T>(
  operation: () => Promise<T>,
  config: RetryConfig = {
    maxRetries: 5,
    baseDelayMs: 1000,
    maxDelayMs: 30000,
    jitterMs: 500,
  }
): Promise<T> {
  for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
    try {
      return await operation();
    } catch (error: any) {
      if (attempt === config.maxRetries) throw error;

      // Only retry on rate limits (429) or server errors (5xx)
      const status = error.status || error.response?.status;
      if (status !== 429 && (status < 500 || status >= 600)) {
        throw error;
      }

      // Check for Retry-After header
      const retryAfter = error.headers?.get?.("Retry-After");
      let delay: number;

      if (retryAfter) {
        delay = parseInt(retryAfter) * 1000;
      } else {
        // Exponential backoff with jitter
        const exponentialDelay = config.baseDelayMs * Math.pow(2, attempt);
        const jitter = Math.random() * config.jitterMs;
        delay = Math.min(exponentialDelay + jitter, config.maxDelayMs);
      }

      console.warn(
        `Rate limited. Attempt ${attempt + 1}/${config.maxRetries}. ` +
        `Retrying in ${delay}ms...`
      );

      await new Promise((r) => setTimeout(r, delay));
    }
  }

  throw new Error("Unreachable");
}

Step 3: Rate Limit-Aware Wrapper

class RateLimitedLangfuse {
  private langfuse: Langfuse;
  private pendingEvents: number = 0;
  private maxConcurrent: number = 100;
  private queue: Array<() => void> = [];

  constructor(config?: ConstructorParameters<typeof Langfuse>[0]) {
    this.langfuse = new Langfuse({
      ...config,
      flushAt: 50,
      flushInterval: 5000,
    });
  }

  private async waitForCapacity(): Promise<void> {
    if (this.pendingEvents < this.maxConcurrent) {
      this.pendingEvents++;
      return;
    }

    return new Promise((resolve) => {
      this.queue.push(() => {
        this.pendingEvents++;
        resolve();
      });
    });
  }

  private releaseCapacity(): void {
    this.pendingEvents--;
    const next = this.queue.shift();
    if (next) next();
  }

  async trace(
    params: Parameters<typeof this.langfuse.trace>[0]
  ): Promise<ReturnType<typeof this.langfuse.trace>> {
    await this.waitForCapacity();
    try {
      return this.langfuse.trace(params);
    } finally {
      this.releaseCapacity();
    }
  }

  async flush(): Promise<void> {
    return this.langfuse.flushAsync();
  }

  async shutdown(): Promise<void> {
    return this.langfuse.shutdownAsync();
  }
}

Step 4: Sampling for High Volume

interface SamplingConfig {
  rate: number; // 0.0 to 1.0
  alwaysSample: (trace: TraceParams) => boolean;
}

class SampledLangfuse {
  private langfuse: Langfuse;
  private config: SamplingConfig;

  constructor(
    langfuseConfig: ConstructorParameters<typeof Langfuse>[0],
    samplingConfig: SamplingConfig = { rate: 1.0, alwaysSample: () => false }
  ) {
    this.langfuse = new Langfuse(langfuseConfig);
    this.config = samplingConfig;
  }

  trace(params: Parameters<typeof this.langfuse.trace>[0]) {
    // Always sample errors and specific conditions
    if (this.config.alwaysSample(params)) {
      return this.langfuse.trace(params);
    }

    // Random sampling
    if (Math.random() > this.config.rate) {
      // Return no-op trace
      return createNoOpTrace();
    }

    return this.langfuse.trace({
      ...params,
      metadata: {
        ...params.metadata,
        sampled: true,
        sampleRate: this.config.rate,
      },
    });
  }
}

// Usage: Sample 10% of traces, but always sample errors
const sampledLangfuse = new SampledLangfuse(
  { publicKey: "...", secretKey: "..." },
  {
    rate: 0.1,
    alwaysSample: (params) =>
      params.tags?.includes("error") || params.level === "ERROR",
  }
);

Output

Optimized batching configuration
Exponential backoff for rate limits
Concurrent request limiting
Sampling for ultra-high volume

Error Handling

Header/Error	Description	Action
429 Too Many Requests	Rate limited	Use exponential backoff
Retry-After	Seconds to wait	Honor this value exactly
X-RateLimit-Remaining	Requests left	Pre-emptive throttling
503 Service Unavailable	Overloaded	Back off significantly

Examples

Monitor Rate Limit Usage

class RateLimitMonitor {
  private remaining: number = 1000;
  private resetAt: Date = new Date();

  updateFromResponse(headers: Headers) {
    const remaining = headers.get("X-RateLimit-Remaining");
    const reset = headers.get("X-RateLimit-Reset");

    if (remaining) this.remaining = parseInt(remaining);
    if (reset) this.resetAt = new Date(parseInt(reset) * 1000);
  }

  shouldThrottle(): boolean {
    return this.remaining < 10 && new Date() < this.resetAt;
  }

  getWaitTime(): number {
    return Math.max(0, this.resetAt.getTime() - Date.now());
  }

  getStatus() {
    return {
      remaining: this.remaining,
      resetAt: this.resetAt.toISOString(),
      shouldThrottle: this.shouldThrottle(),
    };
  }
}

Batch Processing Pattern

async function processBatchWithRateLimits(items: any[]) {
  const BATCH_SIZE = 50;
  const DELAY_BETWEEN_BATCHES = 1000; // 1 second

  for (let i = 0; i < items.length; i += BATCH_SIZE) {
    const batch = items.slice(i, i + BATCH_SIZE);

    // Process batch
    const traces = batch.map((item) =>
      langfuse.trace({
        name: "batch-item",
        input: item,
      })
    );

    // Flush after each batch
    await langfuse.flushAsync();

    // Delay before next batch
    if (i + BATCH_SIZE < items.length) {
      await new Promise((r) => setTimeout(r, DELAY_BETWEEN_BATCHES));
    }
  }
}

Queue-Based Rate Limiting

import PQueue from "p-queue";

// Create rate-limited queue
const queue = new PQueue({
  concurrency: 10,      // Max 10 concurrent requests
  interval: 1000,       // Per second
  intervalCap: 50,      // Max 50 per interval
});

async function queuedTrace(params: TraceParams) {
  return queue.add(() => langfuse.trace(params));
}

Resources

Next Steps

For security configuration, see

langfuse-security-basics