Claude-code-plugins-plus-skills clickhouse-reference-architecture
install
source · Clone the upstream repo
git clone https://github.com/jeremylongshore/claude-code-plugins-plus-skills
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/jeremylongshore/claude-code-plugins-plus-skills "$T" && mkdir -p ~/.claude/skills && cp -r "$T/plugins/saas-packs/clickhouse-pack/skills/clickhouse-reference-architecture" ~/.claude/skills/jeremylongshore-claude-code-plugins-plus-skills-clickhouse-reference-architectur && rm -rf "$T"
manifest:
plugins/saas-packs/clickhouse-pack/skills/clickhouse-reference-architecture/SKILL.mdsource content
ClickHouse Reference Architecture
Overview
Production-grade architecture for ClickHouse analytics platforms covering project layout, data flow, multi-tenancy, and operational patterns.
Prerequisites
- Understanding of ClickHouse fundamentals (engines, ORDER BY, partitioning)
- TypeScript/Node.js project
Instructions
Step 1: Project Structure
my-analytics-platform/ ├── src/ │ ├── clickhouse/ │ │ ├── client.ts # Singleton client with health checks │ │ ├── schemas/ # SQL DDL files (source of truth) │ │ │ ├── 001-events.sql │ │ │ ├── 002-users.sql │ │ │ └── 003-materialized-views.sql │ │ ├── queries/ # Named query functions │ │ │ ├── events.ts │ │ │ ├── users.ts │ │ │ └── dashboards.ts │ │ └── migrations/ # Schema migrations │ │ ├── runner.ts │ │ └── 001-add-country.sql │ ├── ingestion/ │ │ ├── webhook-receiver.ts # HTTP webhook endpoint │ │ ├── kafka-consumer.ts # Kafka consumer (if applicable) │ │ └── buffer.ts # Insert batching buffer │ ├── api/ │ │ ├── routes.ts # API endpoints │ │ └── middleware.ts # Auth, rate limiting │ └── jobs/ │ ├── daily-rollup.ts # Scheduled aggregations │ └── cleanup.ts # TTL enforcement ├── tests/ │ ├── unit/ │ └── integration/ ├── docker-compose.yml # Local ClickHouse ├── init-db/ # Docker init scripts └── config/ ├── development.env ├── staging.env └── production.env
Step 2: Data Flow Architecture
┌─────────────────┐ │ Data Sources │ │ (Webhooks, API, │ │ Kafka, S3) │ └────────┬────────┘ │ ┌────────▼────────┐ │ Ingestion Layer │ │ (Buffer + Batch │ │ 10K+ rows/ins) │ └────────┬────────┘ │ ┌──────────────▼──────────────┐ │ ClickHouse Server │ │ │ │ ┌────────────────────────┐ │ │ │ Raw Event Tables │ │ │ │ (MergeTree, append) │ │ │ └───────────┬────────────┘ │ │ │ │ │ ┌───────────▼────────────┐ │ │ │ Materialized Views │ │ │ │ (Auto-aggregate on │ │ │ │ INSERT — hourly, │ │ │ │ daily, tenant-level) │ │ │ └───────────┬────────────┘ │ │ │ │ │ ┌───────────▼────────────┐ │ │ │ Aggregate Tables │ │ │ │ (AggregatingMergeTree)│ │ │ └────────────────────────┘ │ └──────────────┬──────────────┘ │ ┌────────▼────────┐ │ API Layer │ │ (Query aggregate│ │ tables, not │ │ raw events) │ └────────┬────────┘ │ ┌────────▼────────┐ │ Dashboards / │ │ Client Apps │ └─────────────────┘
Step 3: Schema Design (3-Layer Pattern)
-- Layer 1: Raw events (append-only, full fidelity) CREATE TABLE analytics.events_raw ( event_id UUID DEFAULT generateUUIDv4(), tenant_id UInt32, event_type LowCardinality(String), user_id UInt64, properties String CODEC(ZSTD(3)), created_at DateTime64(3) DEFAULT now64(3) ) ENGINE = MergeTree() ORDER BY (tenant_id, event_type, toDate(created_at), user_id) PARTITION BY toYYYYMM(created_at) TTL created_at + INTERVAL 90 DAY; -- Layer 2: Hourly aggregation (auto-populated via materialized view) CREATE TABLE analytics.events_hourly ( hour DateTime, tenant_id UInt32, event_type LowCardinality(String), cnt UInt64, users AggregateFunction(uniq, UInt64) ) ENGINE = AggregatingMergeTree() ORDER BY (tenant_id, event_type, hour); CREATE MATERIALIZED VIEW analytics.events_hourly_mv TO analytics.events_hourly AS SELECT toStartOfHour(created_at) AS hour, tenant_id, event_type, count() AS cnt, uniqState(user_id) AS users FROM analytics.events_raw GROUP BY hour, tenant_id, event_type; -- Layer 3: Daily rollup for dashboards CREATE TABLE analytics.events_daily ( date Date, tenant_id UInt32, total UInt64, users AggregateFunction(uniq, UInt64) ) ENGINE = AggregatingMergeTree() ORDER BY (tenant_id, date); CREATE MATERIALIZED VIEW analytics.events_daily_mv TO analytics.events_daily AS SELECT toDate(created_at) AS date, tenant_id, count() AS total, uniqState(user_id) AS users FROM analytics.events_raw GROUP BY date, tenant_id;
Step 4: Multi-Tenant Patterns
Approach A: Shared table with tenant_id in ORDER BY (recommended)
-- Tenant_id first in ORDER BY = queries filter on tenant efficiently ORDER BY (tenant_id, event_type, created_at) -- Query: only scans data for this tenant SELECT count() FROM events_raw WHERE tenant_id = 42;
Approach B: Database per tenant (for strict isolation)
CREATE DATABASE tenant_42; CREATE TABLE tenant_42.events (...) ENGINE = MergeTree() ...; -- Pros: Full isolation, easy to drop tenant -- Cons: Schema changes need per-tenant DDL, more operational overhead
Approach C: Row-level security (ClickHouse RBAC)
CREATE ROW POLICY tenant_isolation ON analytics.events_raw FOR SELECT USING tenant_id = getSetting('custom_tenant_id') TO app_user;
Step 5: Client Module
// src/clickhouse/client.ts import { createClient, ClickHouseClient } from '@clickhouse/client'; let instance: ClickHouseClient | null = null; export function getClient(): ClickHouseClient { if (!instance) { instance = createClient({ url: process.env.CLICKHOUSE_HOST!, username: process.env.CLICKHOUSE_USER!, password: process.env.CLICKHOUSE_PASSWORD!, database: process.env.CLICKHOUSE_DATABASE ?? 'analytics', max_open_connections: Number(process.env.CH_MAX_CONNECTIONS ?? 10), request_timeout: 30_000, compression: { request: true, response: true }, }); } return instance; } // src/clickhouse/queries/dashboards.ts export async function getTenantDashboard(tenantId: number, days = 30) { const client = getClient(); const rs = await client.query({ query: ` SELECT date, sum(total) AS events, uniqMerge(users) AS unique_users FROM analytics.events_daily WHERE tenant_id = {tid:UInt32} AND date >= today() - {days:UInt32} GROUP BY date ORDER BY date `, query_params: { tid: tenantId, days }, format: 'JSONEachRow', }); return rs.json<{ date: string; events: string; unique_users: string }>(); }
Architecture Decision Records
| Decision | Choice | Why |
|---|---|---|
| Engine | MergeTree (raw) + AggregatingMergeTree (rollups) | Best for append + pre-agg |
| Multi-tenant | Shared table + tenant_id in ORDER BY | Scales to 10K+ tenants |
| Ingestion | Buffer + batch INSERT | Avoids "too many parts" |
| Aggregation | Materialized views (not cron) | Real-time, zero-lag |
| Format | JSONEachRow | Client support, debugging |
| Compression | ZSTD(3) for strings, Delta for ints | 10-20x compression |
Error Handling
| Issue | Cause | Solution |
|---|---|---|
| Cross-tenant data leak | Missing WHERE tenant_id | Use row policies or middleware |
| Stale dashboard data | MV not created | Verify MV exists and is attached |
| Schema drift | Manual DDL changes | Use migration runner |
| Slow dashboard queries | Querying raw table | Query aggregate tables instead |
Resources
Next Steps
For multi-environment configuration, see
clickhouse-multi-env-setup.