Claude-skill-registry deepgram-observability

install
source · Clone the upstream repo
git clone https://github.com/majiayu000/claude-skill-registry
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/majiayu000/claude-skill-registry "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/data/deepgram-observability" ~/.claude/skills/majiayu000-claude-skill-registry-deepgram-observability && rm -rf "$T"
manifest: skills/data/deepgram-observability/SKILL.md
source content

Deepgram Observability

Overview

Implement comprehensive observability for Deepgram integrations including metrics, distributed tracing, logging, and alerting.

Prerequisites

  • Prometheus or compatible metrics backend
  • OpenTelemetry SDK installed
  • Grafana or similar dashboarding tool
  • AlertManager configured

Observability Pillars

PillarToolPurpose
MetricsPrometheusPerformance & usage tracking
TracesOpenTelemetryRequest flow visibility
LogsStructured JSONDebugging & audit
AlertsAlertManagerIncident notification

Instructions

Step 1: Set Up Metrics Collection

Implement Prometheus counters, histograms, and gauges for key operations.

Step 2: Add Distributed Tracing

Integrate OpenTelemetry for end-to-end request tracing.

Step 3: Configure Structured Logging

Set up JSON logging with consistent field names.

Step 4: Create Alert Rules

Define alerting rules for error rates and latency.

Examples

Prometheus Metrics

// lib/metrics.ts
import { Registry, Counter, Histogram, Gauge, collectDefaultMetrics } from 'prom-client';

export const registry = new Registry();
collectDefaultMetrics({ register: registry });

// Request counters
export const transcriptionRequests = new Counter({
  name: 'deepgram_transcription_requests_total',
  help: 'Total number of transcription requests',
  labelNames: ['status', 'model', 'type'],
  registers: [registry],
});

// Latency histogram
export const transcriptionLatency = new Histogram({
  name: 'deepgram_transcription_latency_seconds',
  help: 'Transcription request latency in seconds',
  labelNames: ['model', 'type'],
  buckets: [0.1, 0.5, 1, 2, 5, 10, 30, 60, 120],
  registers: [registry],
});

// Audio duration processed
export const audioProcessed = new Counter({
  name: 'deepgram_audio_processed_seconds_total',
  help: 'Total audio duration processed in seconds',
  labelNames: ['model'],
  registers: [registry],
});

// Active connections gauge
export const activeConnections = new Gauge({
  name: 'deepgram_active_connections',
  help: 'Number of active Deepgram connections',
  labelNames: ['type'],
  registers: [registry],
});

// Rate limit hits
export const rateLimitHits = new Counter({
  name: 'deepgram_rate_limit_hits_total',
  help: 'Number of rate limit responses',
  registers: [registry],
});

// Cost tracking
export const estimatedCost = new Counter({
  name: 'deepgram_estimated_cost_dollars',
  help: 'Estimated cost in dollars',
  labelNames: ['model'],
  registers: [registry],
});

// Metrics endpoint
export async function getMetrics(): Promise<string> {
  return registry.metrics();
}

Instrumented Transcription Client

// lib/instrumented-client.ts
import { createClient, DeepgramClient } from '@deepgram/sdk';
import {
  transcriptionRequests,
  transcriptionLatency,
  audioProcessed,
  estimatedCost,
} from './metrics';
import { trace, context, SpanStatusCode } from '@opentelemetry/api';
import { logger } from './logger';

const tracer = trace.getTracer('deepgram-client');

const modelCosts: Record<string, number> = {
  'nova-2': 0.0043,
  'nova': 0.0043,
  'base': 0.0048,
};

export class InstrumentedDeepgramClient {
  private client: DeepgramClient;

  constructor(apiKey: string) {
    this.client = createClient(apiKey);
  }

  async transcribeUrl(url: string, options: { model?: string } = {}) {
    const model = options.model || 'nova-2';
    const startTime = Date.now();

    return tracer.startActiveSpan('deepgram.transcribe', async (span) => {
      span.setAttribute('deepgram.model', model);
      span.setAttribute('deepgram.audio_url', url);

      try {
        const { result, error } = await this.client.listen.prerecorded.transcribeUrl(
          { url },
          { model, smart_format: true }
        );

        const duration = (Date.now() - startTime) / 1000;

        if (error) {
          transcriptionRequests.labels('error', model, 'prerecorded').inc();
          span.setStatus({ code: SpanStatusCode.ERROR, message: error.message });

          logger.error('Transcription failed', {
            model,
            error: error.message,
            duration,
          });

          throw error;
        }

        // Record metrics
        transcriptionRequests.labels('success', model, 'prerecorded').inc();
        transcriptionLatency.labels(model, 'prerecorded').observe(duration);

        const audioDuration = result.metadata.duration;
        audioProcessed.labels(model).inc(audioDuration);

        const cost = (audioDuration / 60) * (modelCosts[model] || 0.0043);
        estimatedCost.labels(model).inc(cost);

        span.setAttribute('deepgram.request_id', result.metadata.request_id);
        span.setAttribute('deepgram.audio_duration', audioDuration);
        span.setAttribute('deepgram.processing_time', duration);
        span.setStatus({ code: SpanStatusCode.OK });

        logger.info('Transcription completed', {
          requestId: result.metadata.request_id,
          model,
          audioDuration,
          processingTime: duration,
          cost,
        });

        return result;
      } catch (err) {
        const duration = (Date.now() - startTime) / 1000;
        transcriptionRequests.labels('exception', model, 'prerecorded').inc();
        transcriptionLatency.labels(model, 'prerecorded').observe(duration);

        span.setStatus({
          code: SpanStatusCode.ERROR,
          message: err instanceof Error ? err.message : 'Unknown error',
        });

        logger.error('Transcription exception', {
          model,
          error: err instanceof Error ? err.message : 'Unknown',
          duration,
        });

        throw err;
      } finally {
        span.end();
      }
    });
  }
}

OpenTelemetry Configuration

// lib/tracing.ts
import { NodeSDK } from '@opentelemetry/sdk-node';
import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node';
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-grpc';
import { Resource } from '@opentelemetry/resources';
import { SemanticResourceAttributes } from '@opentelemetry/semantic-conventions';

const sdk = new NodeSDK({
  resource: new Resource({
    [SemanticResourceAttributes.SERVICE_NAME]: 'deepgram-service',
    [SemanticResourceAttributes.SERVICE_VERSION]: process.env.VERSION || '1.0.0',
    [SemanticResourceAttributes.DEPLOYMENT_ENVIRONMENT]: process.env.NODE_ENV || 'development',
  }),
  traceExporter: new OTLPTraceExporter({
    url: process.env.OTEL_EXPORTER_OTLP_ENDPOINT || 'http://localhost:4317',
  }),
  instrumentations: [
    getNodeAutoInstrumentations({
      '@opentelemetry/instrumentation-http': {
        ignoreIncomingPaths: ['/health', '/metrics'],
      },
    }),
  ],
});

export function initTracing(): void {
  sdk.start();

  process.on('SIGTERM', () => {
    sdk.shutdown()
      .then(() => console.log('Tracing terminated'))
      .catch((error) => console.error('Error terminating tracing', error))
      .finally(() => process.exit(0));
  });
}

Structured Logging

// lib/logger.ts
import pino from 'pino';

export const logger = pino({
  level: process.env.LOG_LEVEL || 'info',
  formatters: {
    level: (label) => ({ level: label }),
  },
  base: {
    service: 'deepgram-service',
    version: process.env.VERSION || '1.0.0',
    environment: process.env.NODE_ENV || 'development',
  },
  timestamp: pino.stdTimeFunctions.isoTime,
});

// Specialized loggers
export const transcriptionLogger = logger.child({ component: 'transcription' });
export const metricsLogger = logger.child({ component: 'metrics' });
export const alertLogger = logger.child({ component: 'alerts' });

Grafana Dashboard Configuration

{
  "dashboard": {
    "title": "Deepgram Transcription Service",
    "panels": [
      {
        "title": "Request Rate",
        "type": "graph",
        "targets": [
          {
            "expr": "sum(rate(deepgram_transcription_requests_total[5m])) by (status)",
            "legendFormat": "{{status}}"
          }
        ]
      },
      {
        "title": "Latency (P95)",
        "type": "graph",
        "targets": [
          {
            "expr": "histogram_quantile(0.95, sum(rate(deepgram_transcription_latency_seconds_bucket[5m])) by (le, model))",
            "legendFormat": "{{model}}"
          }
        ]
      },
      {
        "title": "Audio Processed (per hour)",
        "type": "stat",
        "targets": [
          {
            "expr": "sum(increase(deepgram_audio_processed_seconds_total[1h]))/60",
            "legendFormat": "Minutes"
          }
        ]
      },
      {
        "title": "Error Rate",
        "type": "gauge",
        "targets": [
          {
            "expr": "sum(rate(deepgram_transcription_requests_total{status='error'}[5m])) / sum(rate(deepgram_transcription_requests_total[5m])) * 100"
          }
        ]
      },
      {
        "title": "Estimated Cost Today",
        "type": "stat",
        "targets": [
          {
            "expr": "sum(increase(deepgram_estimated_cost_dollars[24h]))"
          }
        ]
      },
      {
        "title": "Active Connections",
        "type": "graph",
        "targets": [
          {
            "expr": "deepgram_active_connections",
            "legendFormat": "{{type}}"
          }
        ]
      }
    ]
  }
}

AlertManager Rules

# prometheus/rules/deepgram.yml
groups:
  - name: deepgram-alerts
    rules:
      - alert: DeepgramHighErrorRate
        expr: |
          sum(rate(deepgram_transcription_requests_total{status="error"}[5m])) /
          sum(rate(deepgram_transcription_requests_total[5m])) > 0.05
        for: 5m
        labels:
          severity: critical
          service: deepgram
        annotations:
          summary: "High Deepgram error rate (> 5%)"
          description: "Error rate is {{ $value | humanizePercentage }}"
          runbook: "https://wiki.example.com/runbooks/deepgram-errors"

      - alert: DeepgramHighLatency
        expr: |
          histogram_quantile(0.95,
            sum(rate(deepgram_transcription_latency_seconds_bucket[5m])) by (le)
          ) > 30
        for: 5m
        labels:
          severity: warning
          service: deepgram
        annotations:
          summary: "High Deepgram latency (P95 > 30s)"
          description: "P95 latency is {{ $value | humanizeDuration }}"

      - alert: DeepgramRateLimited
        expr: increase(deepgram_rate_limit_hits_total[1h]) > 10
        for: 0m
        labels:
          severity: warning
          service: deepgram
        annotations:
          summary: "Deepgram rate limiting detected"
          description: "{{ $value }} rate limit hits in the last hour"

      - alert: DeepgramCostSpike
        expr: |
          sum(increase(deepgram_estimated_cost_dollars[1h])) >
          sum(increase(deepgram_estimated_cost_dollars[1h] offset 1d)) * 2
        for: 30m
        labels:
          severity: warning
          service: deepgram
        annotations:
          summary: "Deepgram cost spike detected"
          description: "Current hour cost is 2x yesterday's average"

      - alert: DeepgramNoRequests
        expr: |
          sum(rate(deepgram_transcription_requests_total[15m])) == 0
          and sum(deepgram_transcription_requests_total) > 0
        for: 15m
        labels:
          severity: warning
          service: deepgram
        annotations:
          summary: "No Deepgram requests in 15 minutes"
          description: "Service may be down or disconnected"

Health Check Endpoint

// routes/health.ts
import express from 'express';
import { createClient } from '@deepgram/sdk';
import { getMetrics } from '../lib/metrics';

const router = express.Router();

interface HealthCheck {
  status: 'healthy' | 'degraded' | 'unhealthy';
  timestamp: string;
  checks: Record<string, {
    status: 'pass' | 'fail';
    latency?: number;
    message?: string;
  }>;
}

router.get('/health', async (req, res) => {
  const health: HealthCheck = {
    status: 'healthy',
    timestamp: new Date().toISOString(),
    checks: {},
  };

  // Check Deepgram API
  const startTime = Date.now();
  try {
    const client = createClient(process.env.DEEPGRAM_API_KEY!);
    const { error } = await client.manage.getProjects();

    health.checks.deepgram = {
      status: error ? 'fail' : 'pass',
      latency: Date.now() - startTime,
      message: error?.message,
    };
  } catch (err) {
    health.checks.deepgram = {
      status: 'fail',
      latency: Date.now() - startTime,
      message: err instanceof Error ? err.message : 'Unknown error',
    };
  }

  // Determine overall status
  const failedChecks = Object.values(health.checks).filter(c => c.status === 'fail');
  if (failedChecks.length > 0) {
    health.status = 'unhealthy';
  }

  const statusCode = health.status === 'healthy' ? 200 : 503;
  res.status(statusCode).json(health);
});

router.get('/metrics', async (req, res) => {
  res.set('Content-Type', 'text/plain');
  res.send(await getMetrics());
});

export default router;

Resources

Next Steps

Proceed to

deepgram-incident-runbook
for incident response procedures.