Babysitter benchmarking-analyst
Benchmarking analysis skill for performance comparison and best practice identification.
install
source · Clone the upstream repo
git clone https://github.com/a5c-ai/babysitter
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/a5c-ai/babysitter "$T" && mkdir -p ~/.claude/skills && cp -r "$T/library/specializations/domains/science/industrial-engineering/skills/benchmarking-analyst" ~/.claude/skills/a5c-ai-babysitter-benchmarking-analyst-d79a72 && rm -rf "$T"
manifest:
library/specializations/domains/science/industrial-engineering/skills/benchmarking-analyst/SKILL.mdsource content
benchmarking-analyst
You are benchmarking-analyst - a specialized skill for benchmarking analysis including performance comparison and best practice identification.
Overview
This skill enables AI-powered benchmarking including:
- Internal benchmarking
- Competitive benchmarking
- Functional benchmarking
- Generic/best-in-class benchmarking
- Gap analysis
- Best practice identification
- Adaptation planning
- Performance tracking
Capabilities
1. Benchmarking Project Setup
from dataclasses import dataclass from typing import List, Dict, Optional from datetime import datetime from enum import Enum class BenchmarkType(Enum): INTERNAL = "internal" # Compare within organization COMPETITIVE = "competitive" # Compare with competitors FUNCTIONAL = "functional" # Compare similar functions across industries GENERIC = "generic" # Compare with best-in-class anywhere @dataclass class BenchmarkProject: title: str benchmark_type: BenchmarkType process_area: str metrics: List[str] partners: List[str] owner: str def setup_benchmark_project(project: BenchmarkProject): """ Set up benchmarking project structure """ phases = { "1_planning": { "status": "in_progress", "tasks": [ {"task": "Identify what to benchmark", "status": "complete"}, {"task": "Identify benchmark partners", "status": "complete"}, {"task": "Determine data collection method", "status": "not_started"}, {"task": "Define metrics and calculations", "status": "not_started"} ] }, "2_analysis": { "status": "not_started", "tasks": [ {"task": "Collect current performance data", "status": "not_started"}, {"task": "Collect partner performance data", "status": "not_started"}, {"task": "Determine performance gaps", "status": "not_started"}, {"task": "Identify enablers of superior performance", "status": "not_started"} ] }, "3_integration": { "status": "not_started", "tasks": [ {"task": "Communicate findings", "status": "not_started"}, {"task": "Establish improvement goals", "status": "not_started"}, {"task": "Develop action plans", "status": "not_started"} ] }, "4_action": { "status": "not_started", "tasks": [ {"task": "Implement improvements", "status": "not_started"}, {"task": "Monitor progress", "status": "not_started"}, {"task": "Recalibrate benchmarks", "status": "not_started"} ] } } return { "project_id": f"BM-{datetime.now().strftime('%Y%m%d')}", "title": project.title, "type": project.benchmark_type.value, "process_area": project.process_area, "metrics": project.metrics, "partners": project.partners, "owner": project.owner, "created_date": datetime.now().strftime("%Y-%m-%d"), "phases": phases, "status": "planning" }
2. Data Collection Framework
def create_data_collection_template(metrics: List[str], partners: List[str]): """ Create template for benchmark data collection """ template = { "data_points": {}, "collection_guidance": {} } for metric in metrics: template["data_points"][metric] = { "our_performance": { "value": None, "unit": None, "period": None, "data_source": None, "confidence": None }, "partners": {partner: { "value": None, "unit": None, "period": None, "data_source": None, "is_estimate": False } for partner in partners} } template["collection_guidance"][metric] = { "definition": f"Standard definition for {metric}", "calculation": f"How to calculate {metric}", "data_sources": ["System A", "Reports", "Surveys"], "normalization_notes": "Ensure comparable basis" } return template def validate_benchmark_data(data: Dict): """ Validate collected benchmark data """ issues = [] for metric, values in data['data_points'].items(): # Check our data if values['our_performance']['value'] is None: issues.append(f"Missing our performance data for {metric}") # Check partner data partners_with_data = sum(1 for p in values['partners'].values() if p['value'] is not None) if partners_with_data == 0: issues.append(f"No partner data for {metric}") elif partners_with_data < len(values['partners']) / 2: issues.append(f"Limited partner data for {metric}") # Check comparability units = set() if values['our_performance']['unit']: units.add(values['our_performance']['unit']) for p in values['partners'].values(): if p['unit']: units.add(p['unit']) if len(units) > 1: issues.append(f"Inconsistent units for {metric}: {units}") return { "is_valid": len(issues) == 0, "issues": issues, "completeness": calculate_completeness(data) } def calculate_completeness(data): """Calculate data completeness percentage""" total_cells = 0 filled_cells = 0 for metric, values in data['data_points'].items(): total_cells += 1 # Our data if values['our_performance']['value'] is not None: filled_cells += 1 for partner_data in values['partners'].values(): total_cells += 1 if partner_data['value'] is not None: filled_cells += 1 return round(filled_cells / total_cells * 100, 1) if total_cells > 0 else 0
3. Gap Analysis
import numpy as np def perform_gap_analysis(data: Dict, higher_is_better: Dict = None): """ Perform gap analysis between our performance and benchmarks higher_is_better: {metric: True/False} """ higher_is_better = higher_is_better or {} gap_analysis = [] for metric, values in data['data_points'].items(): our_value = values['our_performance']['value'] if our_value is None: continue partner_values = [p['value'] for p in values['partners'].values() if p['value'] is not None] if not partner_values: continue # Calculate statistics best = max(partner_values) if higher_is_better.get(metric, True) else min(partner_values) worst = min(partner_values) if higher_is_better.get(metric, True) else max(partner_values) median = np.median(partner_values) mean = np.mean(partner_values) # Calculate gaps gap_to_best = best - our_value if higher_is_better.get(metric, True) else our_value - best gap_to_median = median - our_value if higher_is_better.get(metric, True) else our_value - median # Calculate percentile position all_values = partner_values + [our_value] all_values_sorted = sorted(all_values, reverse=higher_is_better.get(metric, True)) our_rank = all_values_sorted.index(our_value) + 1 percentile = round((1 - our_rank / len(all_values)) * 100, 1) gap_analysis.append({ 'metric': metric, 'our_value': our_value, 'best_in_class': best, 'median': round(median, 2), 'mean': round(mean, 2), 'worst': worst, 'gap_to_best': round(gap_to_best, 2), 'gap_to_median': round(gap_to_median, 2), 'gap_percent': round(gap_to_best / our_value * 100, 1) if our_value != 0 else 0, 'our_percentile': percentile, 'position': classify_position(percentile) }) # Sort by gap size gap_analysis.sort(key=lambda x: abs(x['gap_percent']), reverse=True) return { 'gaps': gap_analysis, 'summary': { 'metrics_analyzed': len(gap_analysis), 'metrics_below_median': sum(1 for g in gap_analysis if g['gap_to_median'] > 0), 'metrics_above_median': sum(1 for g in gap_analysis if g['gap_to_median'] < 0), 'biggest_gap_metric': gap_analysis[0]['metric'] if gap_analysis else None } } def classify_position(percentile): """Classify position based on percentile""" if percentile >= 90: return "Leader" elif percentile >= 75: return "Above Average" elif percentile >= 50: return "Average" elif percentile >= 25: return "Below Average" else: return "Laggard"
4. Best Practice Identification
def identify_best_practices(gap_analysis: Dict, partner_insights: Dict): """ Identify best practices from benchmark partners partner_insights: {partner: {metric: {'practice': str, 'enablers': [str]}}} """ best_practices = [] for gap in gap_analysis['gaps']: metric = gap['metric'] if gap['gap_to_best'] <= 0: # We are best-in-class, document our practice best_practices.append({ 'metric': metric, 'source': 'internal', 'practice': 'Current practice is best-in-class', 'value': gap['our_value'], 'action': 'document_and_share' }) else: # Find best performer's practice for partner, insights in partner_insights.items(): if metric in insights: if insights[metric].get('is_best'): best_practices.append({ 'metric': metric, 'source': partner, 'practice': insights[metric]['practice'], 'enablers': insights[metric].get('enablers', []), 'value': gap['best_in_class'], 'our_gap': gap['gap_to_best'], 'action': 'adapt_and_implement' }) return { 'best_practices': best_practices, 'practices_to_adopt': sum(1 for p in best_practices if p['action'] == 'adapt_and_implement'), 'practices_to_share': sum(1 for p in best_practices if p['action'] == 'document_and_share') }
5. Improvement Target Setting
def set_improvement_targets(gap_analysis: Dict, timeline_years: int = 3): """ Set improvement targets based on gaps """ targets = [] for gap in gap_analysis['gaps']: if gap['gap_to_median'] <= 0: # Already above median, target best-in-class target = gap['best_in_class'] ambition = 'stretch' elif gap['position'] in ['Laggard', 'Below Average']: # Significant gap, target median first target = gap['median'] ambition = 'catch_up' else: # Close to median, target top quartile target = gap['best_in_class'] * 0.9 # 90% of best ambition = 'improve' # Calculate annual improvement needed total_improvement = target - gap['our_value'] annual_improvement = total_improvement / timeline_years targets.append({ 'metric': gap['metric'], 'current': gap['our_value'], 'target': round(target, 2), 'ambition': ambition, 'timeline_years': timeline_years, 'annual_improvement': round(annual_improvement, 2), 'milestones': generate_milestones(gap['our_value'], target, timeline_years) }) return { 'targets': targets, 'summary': { 'stretch_targets': sum(1 for t in targets if t['ambition'] == 'stretch'), 'catch_up_targets': sum(1 for t in targets if t['ambition'] == 'catch_up'), 'improve_targets': sum(1 for t in targets if t['ambition'] == 'improve') } } def generate_milestones(current, target, years): """Generate annual milestones""" improvement = (target - current) / years milestones = [] for year in range(1, years + 1): milestones.append({ 'year': year, 'target': round(current + improvement * year, 2) }) return milestones
6. Adaptation Planning
def create_adaptation_plan(best_practice: Dict, our_context: Dict): """ Create plan to adapt best practice to our context our_context: { 'constraints': [str], 'resources': [str], 'culture': str, 'current_capabilities': [str] } """ adaptation = { 'practice': best_practice['practice'], 'source': best_practice['source'], 'expected_improvement': best_practice.get('our_gap'), 'adaptation_needed': [], 'prerequisites': [], 'implementation_phases': [], 'risks': [] } # Identify adaptations based on context if our_context.get('constraints'): adaptation['adaptation_needed'].append({ 'reason': 'Organizational constraints', 'details': our_context['constraints'], 'mitigation': 'Modify approach to work within constraints' }) # Prerequisites based on enablers enablers = best_practice.get('enablers', []) for enabler in enablers: if enabler not in our_context.get('current_capabilities', []): adaptation['prerequisites'].append({ 'capability': enabler, 'status': 'gap', 'action': f'Develop capability: {enabler}' }) # Implementation phases adaptation['implementation_phases'] = [ { 'phase': 1, 'name': 'Preparation', 'duration_weeks': 4, 'activities': ['Train team', 'Secure resources', 'Pilot planning'] }, { 'phase': 2, 'name': 'Pilot', 'duration_weeks': 8, 'activities': ['Implement in limited area', 'Collect data', 'Adjust approach'] }, { 'phase': 3, 'name': 'Rollout', 'duration_weeks': 12, 'activities': ['Scale to full scope', 'Monitor results', 'Refine'] }, { 'phase': 4, 'name': 'Standardize', 'duration_weeks': 4, 'activities': ['Document standard work', 'Train all users', 'Audit'] } ] # Risks adaptation['risks'] = [ {'risk': 'Practice may not transfer directly', 'mitigation': 'Pilot first'}, {'risk': 'Resource constraints', 'mitigation': 'Phase implementation'}, {'risk': 'Cultural resistance', 'mitigation': 'Change management'} ] return adaptation
7. Progress Tracking
def track_benchmark_progress(targets: List[Dict], current_performance: Dict): """ Track progress toward benchmark targets """ progress = [] for target in targets: metric = target['metric'] current = current_performance.get(metric) if current is None: continue baseline = target['current'] goal = target['target'] # Calculate progress total_needed = goal - baseline achieved = current - baseline progress_pct = (achieved / total_needed * 100) if total_needed != 0 else 0 # Determine on-track status # Expected progress based on elapsed time would be calculated here # Simplified: check against annual milestones progress.append({ 'metric': metric, 'baseline': baseline, 'current': current, 'target': goal, 'improvement': round(achieved, 2), 'progress_percent': round(progress_pct, 1), 'remaining_gap': round(goal - current, 2), 'status': 'on_track' if progress_pct >= 80 else 'at_risk' if progress_pct >= 50 else 'behind' }) return { 'metrics': progress, 'summary': { 'on_track': sum(1 for p in progress if p['status'] == 'on_track'), 'at_risk': sum(1 for p in progress if p['status'] == 'at_risk'), 'behind': sum(1 for p in progress if p['status'] == 'behind'), 'avg_progress': round(np.mean([p['progress_percent'] for p in progress]), 1) if progress else 0 } }
Process Integration
This skill integrates with the following processes:
benchmarking-study-execution.jscontinuous-improvement-program.jsstrategic-planning.js
Output Format
{ "benchmark_project": { "title": "Manufacturing Efficiency Study", "type": "competitive", "partners": ["Company A", "Company B", "Industry Avg"] }, "gap_analysis": { "metrics_analyzed": 8, "below_median": 3, "biggest_gap": {"metric": "OEE", "gap": 12} }, "best_practices": [ {"metric": "OEE", "source": "Company A", "practice": "TPM program"} ], "targets": { "stretch": 2, "catch_up": 3 }, "progress": { "on_track": 5, "at_risk": 2, "behind": 1 } }
Best Practices
- Compare apples to apples - Ensure metrics are comparable
- Focus on enablers - Understanding how, not just what
- Adapt, don't copy - Context matters
- Benchmark continuously - Targets move
- Learn from best anywhere - Not just competitors
- Act on findings - Benchmarking without action is wasted
Constraints
- Partner data can be hard to obtain
- Comparability requires careful normalization
- Best-in-class today may not be tomorrow
- Implementation is harder than identification