Claude-skill-registry-data mistral-cost-tuning
install
source · Clone the upstream repo
git clone https://github.com/majiayu000/claude-skill-registry-data
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/majiayu000/claude-skill-registry-data "$T" && mkdir -p ~/.claude/skills && cp -r "$T/data/mistral-cost-tuning" ~/.claude/skills/majiayu000-claude-skill-registry-data-mistral-cost-tuning && rm -rf "$T"
manifest:
data/mistral-cost-tuning/SKILL.mdsource content
Mistral AI Cost Tuning
Overview
Optimize Mistral AI costs through smart model selection, token management, and usage monitoring.
Prerequisites
- Access to Mistral AI console
- Understanding of current usage patterns
- Database for usage tracking (optional)
- Alerting system configured (optional)
Pricing Overview (as of 2024)
| Model | Input (per 1M tokens) | Output (per 1M tokens) | Best For |
|---|---|---|---|
| mistral-small-latest | $0.20 | $0.60 | Fast, simple tasks |
| mistral-large-latest | $2.00 | $6.00 | Complex reasoning |
| mistral-embed | $0.10 | - | Embeddings |
Note: Prices subject to change. Check console.mistral.ai for current pricing.
Instructions
Step 1: Cost Estimation Calculator
interface TokenUsage { inputTokens: number; outputTokens: number; } interface CostEstimate { model: string; inputCost: number; outputCost: number; totalCost: number; currency: string; } const PRICING = { 'mistral-small-latest': { input: 0.20, output: 0.60 }, 'mistral-large-latest': { input: 2.00, output: 6.00 }, 'mistral-embed': { input: 0.10, output: 0 }, } as const; function estimateCost(model: keyof typeof PRICING, usage: TokenUsage): CostEstimate { const prices = PRICING[model]; const inputCost = (usage.inputTokens / 1_000_000) * prices.input; const outputCost = (usage.outputTokens / 1_000_000) * prices.output; return { model, inputCost, outputCost, totalCost: inputCost + outputCost, currency: 'USD', }; } // Usage const cost = estimateCost('mistral-small-latest', { inputTokens: 500_000, outputTokens: 200_000, }); console.log(`Estimated cost: $${cost.totalCost.toFixed(4)}`); // Estimated cost: $0.2200
Step 2: Model Selection by Task
type TaskType = 'simple' | 'moderate' | 'complex' | 'embedding'; interface ModelRecommendation { model: string; reason: string; estimatedCostPer1000Requests: number; } function recommendModel( taskType: TaskType, avgInputTokens: number, avgOutputTokens: number ): ModelRecommendation { switch (taskType) { case 'simple': // Classification, extraction, simple Q&A return { model: 'mistral-small-latest', reason: 'Fast and cost-effective for simple tasks', estimatedCostPer1000Requests: (avgInputTokens * 0.20 + avgOutputTokens * 0.60) / 1000, }; case 'moderate': // Summarization, translation, basic coding return { model: 'mistral-small-latest', reason: 'Good balance of capability and cost', estimatedCostPer1000Requests: (avgInputTokens * 0.20 + avgOutputTokens * 0.60) / 1000, }; case 'complex': // Complex reasoning, code generation, analysis return { model: 'mistral-large-latest', reason: 'Required for complex tasks', estimatedCostPer1000Requests: (avgInputTokens * 2.00 + avgOutputTokens * 6.00) / 1000, }; case 'embedding': return { model: 'mistral-embed', reason: 'Specialized for embeddings', estimatedCostPer1000Requests: (avgInputTokens * 0.10) / 1000, }; } } // Usage const rec = recommendModel('simple', 500, 200); console.log(`Recommended: ${rec.model} - $${rec.estimatedCostPer1000Requests.toFixed(4)}/1000 req`);
Step 3: Token Budget Management
class TokenBudgetManager { private dailyBudget: number; private monthlyBudget: number; private dailyUsage: Map<string, number> = new Map(); private monthlyUsage = 0; constructor(dailyBudget: number, monthlyBudget: number) { this.dailyBudget = dailyBudget; this.monthlyBudget = monthlyBudget; } recordUsage(model: string, tokens: number): void { const today = new Date().toISOString().split('T')[0]; const key = `${today}:${model}`; const current = this.dailyUsage.get(key) || 0; this.dailyUsage.set(key, current + tokens); this.monthlyUsage += tokens; this.checkBudgetAlerts(); } canMakeRequest(model: string, estimatedTokens: number): boolean { const today = new Date().toISOString().split('T')[0]; const key = `${today}:${model}`; const todayUsage = this.dailyUsage.get(key) || 0; return ( todayUsage + estimatedTokens <= this.dailyBudget && this.monthlyUsage + estimatedTokens <= this.monthlyBudget ); } private checkBudgetAlerts(): void { if (this.monthlyUsage > this.monthlyBudget * 0.8) { console.warn(`Budget alert: ${((this.monthlyUsage / this.monthlyBudget) * 100).toFixed(1)}% of monthly budget used`); } } getUsageReport(): { daily: Record<string, number>; monthly: number } { return { daily: Object.fromEntries(this.dailyUsage), monthly: this.monthlyUsage, }; } }
Step 4: Prompt Optimization
// Optimize prompts to reduce token usage function optimizePrompt(prompt: string): string { return prompt .replace(/\s+/g, ' ') // Remove extra whitespace .replace(/\n\s*\n/g, '\n') // Remove blank lines .trim(); } // Use system prompts efficiently const EFFICIENT_SYSTEM_PROMPT = ` You are a helpful assistant. Be concise. Answer in 1-2 sentences when possible. `.trim(); // Compare token counts function countTokensEstimate(text: string): number { // Rough estimate: 1 token ≈ 4 characters return Math.ceil(text.length / 4); } // Example: Reduce prompt size const originalPrompt = ` I would like you to help me with the following task. Please provide a comprehensive and detailed explanation of how to implement a REST API in Node.js. `; const optimizedPrompt = `Explain implementing a REST API in Node.js. Be concise.`; console.log(`Original: ~${countTokensEstimate(originalPrompt)} tokens`); console.log(`Optimized: ~${countTokensEstimate(optimizedPrompt)} tokens`); // Original: ~47 tokens // Optimized: ~13 tokens (72% reduction)
Step 5: Caching for Cost Reduction
import crypto from 'crypto'; import { LRUCache } from 'lru-cache'; const responseCache = new LRUCache<string, { response: string; cost: number }>({ max: 10000, ttl: 24 * 60 * 60 * 1000, // 24 hours }); interface CachedResult { response: string; cached: boolean; cost: number; savedCost: number; } async function costAwareChat( client: Mistral, messages: any[], model: string ): Promise<CachedResult> { const cacheKey = crypto .createHash('sha256') .update(JSON.stringify({ messages, model })) .digest('hex'); const cached = responseCache.get(cacheKey); if (cached) { return { response: cached.response, cached: true, cost: 0, savedCost: cached.cost, }; } const response = await client.chat.complete({ model, messages }); const content = response.choices?.[0]?.message?.content ?? ''; const cost = estimateCost(model as any, { inputTokens: response.usage?.promptTokens || 0, outputTokens: response.usage?.completionTokens || 0, }).totalCost; responseCache.set(cacheKey, { response: content, cost }); return { response: content, cached: false, cost, savedCost: 0, }; }
Step 6: Usage Dashboard Query
-- Track usage in your database CREATE TABLE mistral_usage ( id SERIAL PRIMARY KEY, model VARCHAR(50), input_tokens INTEGER, output_tokens INTEGER, cost_usd DECIMAL(10, 6), user_id VARCHAR(50), created_at TIMESTAMP DEFAULT NOW() ); -- Daily cost report SELECT DATE(created_at) as date, model, SUM(input_tokens) as total_input, SUM(output_tokens) as total_output, SUM(cost_usd) as total_cost FROM mistral_usage WHERE created_at >= NOW() - INTERVAL '30 days' GROUP BY 1, 2 ORDER BY 1 DESC, 3 DESC; -- User cost breakdown SELECT user_id, SUM(cost_usd) as total_cost, COUNT(*) as request_count FROM mistral_usage WHERE created_at >= DATE_TRUNC('month', NOW()) GROUP BY 1 ORDER BY 2 DESC LIMIT 10;
Output
- Optimized model selection
- Token budget management
- Usage monitoring implemented
- Cost reduction strategies applied
Cost Reduction Strategies
| Strategy | Savings | Effort |
|---|---|---|
| Model selection | 50-90% | Low |
| Prompt optimization | 20-50% | Low |
| Response caching | 30-80% | Medium |
| Batch processing | 10-30% | Medium |
| Max tokens limit | 10-40% | Low |
Error Handling
| Issue | Cause | Solution |
|---|---|---|
| Unexpected costs | Untracked usage | Implement monitoring |
| Budget exceeded | No alerts | Set up budget alerts |
| Inefficient model | Wrong selection | Use task-based selection |
| Long responses | No limit | Set maxTokens |
Examples
Quick Cost Check
// Estimate monthly cost const monthlyRequests = 100_000; const avgInputTokens = 500; const avgOutputTokens = 200; const smallCost = estimateCost('mistral-small-latest', { inputTokens: avgInputTokens * monthlyRequests, outputTokens: avgOutputTokens * monthlyRequests, }); const largeCost = estimateCost('mistral-large-latest', { inputTokens: avgInputTokens * monthlyRequests, outputTokens: avgOutputTokens * monthlyRequests, }); console.log(`Small model: $${smallCost.totalCost.toFixed(2)}/month`); console.log(`Large model: $${largeCost.totalCost.toFixed(2)}/month`); // Small model: $22.00/month // Large model: $220.00/month
Resources
Next Steps
For architecture patterns, see
mistral-reference-architecture.