Claude-code-plugins-plus-skills maintainx-observability
install
source · Clone the upstream repo
git clone https://github.com/jeremylongshore/claude-code-plugins-plus-skills
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/jeremylongshore/claude-code-plugins-plus-skills "$T" && mkdir -p ~/.claude/skills && cp -r "$T/plugins/saas-packs/maintainx-pack/skills/maintainx-observability" ~/.claude/skills/jeremylongshore-claude-code-plugins-plus-skills-maintainx-observability && rm -rf "$T"
manifest:
plugins/saas-packs/maintainx-pack/skills/maintainx-observability/SKILL.mdsource content
MaintainX Observability
Overview
Implement metrics, structured logging, and alerting for MaintainX integrations to ensure reliability and rapid issue detection.
Prerequisites
- MaintainX integration deployed
- Node.js 18+
- Monitoring platform (Prometheus/Grafana, Datadog, or CloudWatch)
Instructions
Step 1: Prometheus Metrics
// src/observability/metrics.ts import { Counter, Histogram, Gauge, Registry } from 'prom-client'; const register = new Registry(); export const metrics = { apiRequests: new Counter({ name: 'maintainx_api_requests_total', help: 'Total MaintainX API requests', labelNames: ['method', 'endpoint', 'status'], registers: [register], }), apiLatency: new Histogram({ name: 'maintainx_api_latency_seconds', help: 'MaintainX API request latency', labelNames: ['method', 'endpoint'], buckets: [0.1, 0.25, 0.5, 1, 2.5, 5, 10], registers: [register], }), rateLimitHits: new Counter({ name: 'maintainx_rate_limit_hits_total', help: 'Times rate limited by MaintainX API', registers: [register], }), workOrdersProcessed: new Counter({ name: 'maintainx_work_orders_processed_total', help: 'Work orders processed', labelNames: ['action', 'status'], registers: [register], }), syncLag: new Gauge({ name: 'maintainx_sync_lag_seconds', help: 'Seconds since last successful sync', registers: [register], }), }; export { register };
Step 2: Instrumented API Client
// src/observability/instrumented-client.ts import axios, { AxiosInstance } from 'axios'; import { metrics } from './metrics'; export function createInstrumentedClient(apiKey: string): AxiosInstance { const client = axios.create({ baseURL: 'https://api.getmaintainx.com/v1', headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' }, timeout: 30_000, }); client.interceptors.request.use((config) => { (config as any).__startTime = process.hrtime.bigint(); return config; }); client.interceptors.response.use( (response) => { const elapsed = Number(process.hrtime.bigint() - (response.config as any).__startTime) / 1e9; const endpoint = response.config.url?.split('?')[0] || 'unknown'; metrics.apiRequests.inc({ method: response.config.method?.toUpperCase() || 'GET', endpoint, status: String(response.status), }); metrics.apiLatency.observe( { method: response.config.method?.toUpperCase() || 'GET', endpoint }, elapsed, ); return response; }, (error) => { const status = error.response?.status || 0; const endpoint = error.config?.url?.split('?')[0] || 'unknown'; metrics.apiRequests.inc({ method: error.config?.method?.toUpperCase() || 'GET', endpoint, status: String(status), }); if (status === 429) { metrics.rateLimitHits.inc(); } throw error; }, ); return client; }
Step 3: Structured Logging
// src/observability/logger.ts type LogLevel = 'debug' | 'info' | 'warn' | 'error'; interface LogEntry { level: LogLevel; message: string; service: string; timestamp: string; [key: string]: any; } class StructuredLogger { private service: string; constructor(service: string) { this.service = service; } private log(level: LogLevel, message: string, data?: Record<string, any>) { const entry: LogEntry = { level, message, service: this.service, timestamp: new Date().toISOString(), ...data, }; // JSON output for log aggregation (ELK, CloudWatch, Datadog) console.log(JSON.stringify(entry)); } info(message: string, data?: Record<string, any>) { this.log('info', message, data); } warn(message: string, data?: Record<string, any>) { this.log('warn', message, data); } error(message: string, data?: Record<string, any>) { this.log('error', message, data); } debug(message: string, data?: Record<string, any>) { this.log('debug', message, data); } } export const logger = new StructuredLogger('maintainx-integration'); // Usage logger.info('Work order created', { workOrderId: 12345, priority: 'HIGH' }); logger.error('API call failed', { endpoint: '/workorders', status: 500, retryCount: 2 });
Step 4: Health and Metrics Endpoints
// src/observability/server.ts import express from 'express'; import { register, metrics } from './metrics'; const app = express(); // Prometheus scrape endpoint app.get('/metrics', async (req, res) => { res.set('Content-Type', register.contentType); res.end(await register.metrics()); }); // Health check with metrics app.get('/health', async (req, res) => { const health = { status: 'healthy', uptime: process.uptime(), metrics: { totalRequests: await metrics.apiRequests.get(), rateLimitHits: await metrics.rateLimitHits.get(), syncLagSeconds: (await metrics.syncLag.get()).values[0]?.value || 0, }, }; res.json(health); }); app.listen(9090, () => logger.info('Metrics server on :9090'));
Step 5: Alerting Rules (Prometheus)
# prometheus/alerts.yml groups: - name: maintainx rules: - alert: MaintainXHighErrorRate expr: rate(maintainx_api_requests_total{status=~"5.."}[5m]) > 0.1 for: 5m labels: severity: critical annotations: summary: "MaintainX API error rate > 10%" - alert: MaintainXHighLatency expr: histogram_quantile(0.95, rate(maintainx_api_latency_seconds_bucket[5m])) > 5 for: 5m labels: severity: warning annotations: summary: "MaintainX API p95 latency > 5s" - alert: MaintainXRateLimited expr: rate(maintainx_rate_limit_hits_total[5m]) > 0 for: 1m labels: severity: warning annotations: summary: "MaintainX API rate limiting detected" - alert: MaintainXSyncStale expr: maintainx_sync_lag_seconds > 900 for: 5m labels: severity: critical annotations: summary: "MaintainX sync lag > 15 minutes"
Output
- Prometheus metrics (request count, latency histogram, rate limit counter, sync lag gauge)
- Instrumented axios client automatically recording metrics on every API call
- Structured JSON logging for all operations
endpoint for Prometheus scraping/metrics- Alerting rules for error rate, latency, rate limits, and sync staleness
Error Handling
| Issue | Cause | Solution |
|---|---|---|
| Metrics endpoint 500 | prom-client not initialized | Ensure is created before metrics |
| Missing labels | Metric name mismatch | Check match / calls |
| Log volume too high | Debug logging in production | Set in production |
| Stale sync alert | Sync job stopped | Check cron schedule, restart sync process |
Resources
- MaintainX API Reference
- prom-client -- Prometheus metrics for Node.js
- Prometheus Alerting Rules
Next Steps
For incident response, see
maintainx-incident-runbook.
Examples
Datadog integration using DogStatsD:
import StatsD from 'hot-shots'; const dogstatsd = new StatsD({ prefix: 'maintainx.' }); // Record API call dogstatsd.increment('api.requests', 1, { endpoint: '/workorders', status: '200' }); dogstatsd.histogram('api.latency', 0.45, { endpoint: '/workorders' });