Claude-skill-registry-data mistral-observability

install
source · Clone the upstream repo
git clone https://github.com/majiayu000/claude-skill-registry-data
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/majiayu000/claude-skill-registry-data "$T" && mkdir -p ~/.claude/skills && cp -r "$T/data/mistral-observability" ~/.claude/skills/majiayu000-claude-skill-registry-data-mistral-observability && rm -rf "$T"
manifest: data/mistral-observability/SKILL.md
source content

Mistral AI Observability

Overview

Set up comprehensive observability for Mistral AI integrations.

Prerequisites

  • Prometheus or compatible metrics backend
  • OpenTelemetry SDK installed (optional)
  • Grafana or similar dashboarding tool
  • AlertManager or similar alerting system

Instructions

Step 1: Define Key Metrics

MetricTypeDescription
mistral_requests_total
CounterTotal API requests
mistral_request_duration_seconds
HistogramRequest latency
mistral_tokens_total
CounterTokens used (input/output)
mistral_errors_total
CounterError count by type
mistral_cost_usd
CounterEstimated cost
mistral_cache_hits_total
CounterCache hit count

Step 2: Implement Prometheus Metrics

import { Registry, Counter, Histogram, Gauge } from 'prom-client';

const registry = new Registry();

// Request counter
const requestCounter = new Counter({
  name: 'mistral_requests_total',
  help: 'Total Mistral AI API requests',
  labelNames: ['model', 'status', 'endpoint'],
  registers: [registry],
});

// Latency histogram
const requestDuration = new Histogram({
  name: 'mistral_request_duration_seconds',
  help: 'Mistral AI request duration in seconds',
  labelNames: ['model', 'endpoint'],
  buckets: [0.1, 0.25, 0.5, 1, 2.5, 5, 10],
  registers: [registry],
});

// Token counter
const tokenCounter = new Counter({
  name: 'mistral_tokens_total',
  help: 'Total tokens used',
  labelNames: ['model', 'type'], // type: input, output
  registers: [registry],
});

// Error counter
const errorCounter = new Counter({
  name: 'mistral_errors_total',
  help: 'Mistral AI errors by type',
  labelNames: ['model', 'error_type', 'status_code'],
  registers: [registry],
});

// Cost gauge (estimated)
const costCounter = new Counter({
  name: 'mistral_cost_usd_total',
  help: 'Estimated cost in USD',
  labelNames: ['model'],
  registers: [registry],
});

export { registry, requestCounter, requestDuration, tokenCounter, errorCounter, costCounter };

Step 3: Create Instrumented Client Wrapper

import Mistral from '@mistralai/mistralai';
import {
  requestCounter,
  requestDuration,
  tokenCounter,
  errorCounter,
  costCounter,
} from './metrics';

// Pricing per 1M tokens (update as needed)
const PRICING: Record<string, { input: number; output: number }> = {
  'mistral-small-latest': { input: 0.20, output: 0.60 },
  'mistral-large-latest': { input: 2.00, output: 6.00 },
  'mistral-embed': { input: 0.10, output: 0 },
};

export async function instrumentedChat(
  client: Mistral,
  model: string,
  messages: any[],
  options?: { temperature?: number; maxTokens?: number }
): Promise<any> {
  const timer = requestDuration.startTimer({ model, endpoint: 'chat.complete' });

  try {
    const response = await client.chat.complete({
      model,
      messages,
      ...options,
    });

    // Record success
    requestCounter.inc({ model, status: 'success', endpoint: 'chat.complete' });

    // Record tokens
    if (response.usage) {
      tokenCounter.inc({ model, type: 'input' }, response.usage.promptTokens || 0);
      tokenCounter.inc({ model, type: 'output' }, response.usage.completionTokens || 0);

      // Estimate cost
      const pricing = PRICING[model] || PRICING['mistral-small-latest'];
      const cost =
        ((response.usage.promptTokens || 0) / 1_000_000) * pricing.input +
        ((response.usage.completionTokens || 0) / 1_000_000) * pricing.output;
      costCounter.inc({ model }, cost);
    }

    return response;
  } catch (error: any) {
    // Record error
    requestCounter.inc({ model, status: 'error', endpoint: 'chat.complete' });
    errorCounter.inc({
      model,
      error_type: error.code || 'unknown',
      status_code: error.status?.toString() || 'unknown',
    });
    throw error;
  } finally {
    timer();
  }
}

Step 4: OpenTelemetry Distributed Tracing

import { trace, SpanStatusCode, Span } from '@opentelemetry/api';

const tracer = trace.getTracer('mistral-client');

export async function tracedChat<T>(
  operationName: string,
  operation: () => Promise<T>,
  attributes?: Record<string, string>
): Promise<T> {
  return tracer.startActiveSpan(`mistral.${operationName}`, async (span: Span) => {
    if (attributes) {
      Object.entries(attributes).forEach(([key, value]) => {
        span.setAttribute(key, value);
      });
    }

    try {
      const result = await operation();

      // Add result attributes
      if ((result as any).usage) {
        span.setAttribute('mistral.input_tokens', (result as any).usage.promptTokens);
        span.setAttribute('mistral.output_tokens', (result as any).usage.completionTokens);
      }

      span.setStatus({ code: SpanStatusCode.OK });
      return result;
    } catch (error: any) {
      span.setStatus({
        code: SpanStatusCode.ERROR,
        message: error.message,
      });
      span.recordException(error);
      throw error;
    } finally {
      span.end();
    }
  });
}

// Usage
const response = await tracedChat(
  'chat.complete',
  () => client.chat.complete({ model, messages }),
  { model, 'user.id': userId }
);

Step 5: Structured Logging

import pino from 'pino';

const logger = pino({
  name: 'mistral',
  level: process.env.LOG_LEVEL || 'info',
  formatters: {
    level: (label) => ({ level: label }),
  },
});

interface MistralLogContext {
  requestId: string;
  model: string;
  operation: string;
  durationMs: number;
  inputTokens?: number;
  outputTokens?: number;
  cached?: boolean;
  error?: string;
}

export function logMistralOperation(context: MistralLogContext): void {
  const { error, ...rest } = context;

  if (error) {
    logger.error({ ...rest, error }, 'Mistral operation failed');
  } else {
    logger.info(rest, 'Mistral operation completed');
  }
}

// Usage
logMistralOperation({
  requestId: 'req-123',
  model: 'mistral-small-latest',
  operation: 'chat.complete',
  durationMs: 250,
  inputTokens: 100,
  outputTokens: 50,
});

Step 6: Alert Configuration

# prometheus/mistral_alerts.yaml
groups:
  - name: mistral_alerts
    rules:
      # High error rate
      - alert: MistralHighErrorRate
        expr: |
          rate(mistral_errors_total[5m]) /
          rate(mistral_requests_total[5m]) > 0.05
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Mistral AI error rate > 5%"
          description: "Error rate is {{ $value | humanizePercentage }}"

      # High latency
      - alert: MistralHighLatency
        expr: |
          histogram_quantile(0.95,
            rate(mistral_request_duration_seconds_bucket[5m])
          ) > 5
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Mistral AI P95 latency > 5s"

      # Rate limit approaching
      - alert: MistralRateLimitWarning
        expr: |
          rate(mistral_errors_total{error_type="rate_limit"}[5m]) > 0
        for: 2m
        labels:
          severity: warning
        annotations:
          summary: "Mistral AI rate limiting detected"

      # High cost
      - alert: MistralHighCost
        expr: |
          increase(mistral_cost_usd_total[1h]) > 10
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Mistral AI cost > $10/hour"

      # API unavailable
      - alert: MistralUnavailable
        expr: |
          rate(mistral_errors_total{status_code="503"}[5m]) > 0.1
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "Mistral AI service unavailable"

Step 7: Grafana Dashboard

{
  "title": "Mistral AI Monitoring",
  "panels": [
    {
      "title": "Request Rate",
      "type": "timeseries",
      "targets": [{
        "expr": "rate(mistral_requests_total[5m])",
        "legendFormat": "{{model}} - {{status}}"
      }]
    },
    {
      "title": "Latency P50/P95/P99",
      "type": "timeseries",
      "targets": [
        {
          "expr": "histogram_quantile(0.5, rate(mistral_request_duration_seconds_bucket[5m]))",
          "legendFormat": "P50"
        },
        {
          "expr": "histogram_quantile(0.95, rate(mistral_request_duration_seconds_bucket[5m]))",
          "legendFormat": "P95"
        },
        {
          "expr": "histogram_quantile(0.99, rate(mistral_request_duration_seconds_bucket[5m]))",
          "legendFormat": "P99"
        }
      ]
    },
    {
      "title": "Token Usage",
      "type": "timeseries",
      "targets": [{
        "expr": "rate(mistral_tokens_total[5m])",
        "legendFormat": "{{model}} - {{type}}"
      }]
    },
    {
      "title": "Estimated Cost ($/hour)",
      "type": "stat",
      "targets": [{
        "expr": "increase(mistral_cost_usd_total[1h])"
      }]
    }
  ]
}

Output

  • Prometheus metrics collection
  • OpenTelemetry tracing
  • Structured logging
  • Alert rules configured

Error Handling

IssueCauseSolution
Missing metricsNo instrumentationWrap client calls
Trace gapsMissing propagationCheck context headers
Alert stormsWrong thresholdsTune alert rules
High cardinalityToo many labelsReduce label values

Examples

Metrics Endpoint (Express)

import express from 'express';
import { registry } from './metrics';

const app = express();

app.get('/metrics', async (req, res) => {
  res.set('Content-Type', registry.contentType);
  res.send(await registry.metrics());
});

Resources

Next Steps

For incident response, see

mistral-incident-runbook
.