Skillshub cohere-reference-architecture
install
source · Clone the upstream repo
git clone https://github.com/ComeOnOliver/skillshub
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/ComeOnOliver/skillshub "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/jeremylongshore/claude-code-plugins-plus-skills/cohere-reference-architecture" ~/.claude/skills/comeonoliver-skillshub-cohere-reference-architecture && rm -rf "$T"
manifest:
skills/jeremylongshore/claude-code-plugins-plus-skills/cohere-reference-architecture/SKILL.mdsource content
Cohere Reference Architecture
Overview
Production-ready architecture for Cohere API v2 applications covering RAG pipelines, tool-use agents, and multi-model orchestration.
Prerequisites
- Understanding of layered architecture
SDK v7+cohere-ai- TypeScript project with vitest
Project Structure
my-cohere-app/ ├── src/ │ ├── cohere/ │ │ ├── client.ts # CohereClientV2 singleton │ │ ├── models.ts # Model selection logic │ │ ├── types.ts # Cohere-specific types │ │ └── errors.ts # Error classification │ ├── services/ │ │ ├── chat.ts # Chat completions + streaming │ │ ├── rag.ts # RAG pipeline (embed → rerank → chat) │ │ ├── agents.ts # Tool-use agent loops │ │ ├── embed.ts # Batch embedding + caching │ │ ├── rerank.ts # Document reranking │ │ └── classify.ts # Few-shot classification │ ├── tools/ # Tool definitions for agents │ │ ├── registry.ts # Tool name → executor mapping │ │ ├── search.ts │ │ └── calculator.ts │ ├── api/ │ │ ├── chat.ts # POST /api/chat (streaming) │ │ ├── embed.ts # POST /api/embed │ │ └── health.ts # GET /api/health │ └── cache/ │ └── embeddings.ts # LRU cache for embeddings ├── tests/ │ ├── unit/ │ │ ├── chat.test.ts │ │ ├── rag.test.ts │ │ └── fixtures/ # Mocked API responses │ └── integration/ │ └── cohere.test.ts # Real API tests (gated) ├── config/ │ ├── models.json # Model selection per environment │ └── tools.json # Tool definitions └── package.json
Layer Architecture
┌─────────────────────────────────────────┐ │ API Layer │ │ (Express/Next.js routes, SSE stream) │ ├─────────────────────────────────────────┤ │ Service Layer │ │ (RAG pipeline, agent loop, classify) │ ├─────────────────────────────────────────┤ │ Cohere Layer │ │ (CohereClientV2, retry, model select) │ ├─────────────────────────────────────────┤ │ Infrastructure Layer │ │ (Embed cache, tool registry, queue) │ └─────────────────────────────────────────┘
Core Components
Client Layer
// src/cohere/client.ts import { CohereClientV2, CohereError, CohereTimeoutError } from 'cohere-ai'; let instance: CohereClientV2 | null = null; export function getCohere(): CohereClientV2 { if (!instance) { instance = new CohereClientV2({ token: process.env.CO_API_KEY, }); } return instance; } // src/cohere/models.ts export const MODELS = { chat: { premium: 'command-a-03-2025', standard: 'command-r-08-2024', fast: 'command-r7b-12-2024', }, embed: { latest: 'embed-v4.0', english: 'embed-english-v3.0', multilingual: 'embed-multilingual-v3.0', }, rerank: { latest: 'rerank-v3.5', }, } as const;
RAG Service
// src/services/rag.ts import { getCohere } from '../cohere/client'; import { MODELS } from '../cohere/models'; interface RAGResult { answer: string; citations: Array<{ start: number; end: number; text: string; sources: string[] }>; model: string; } export async function ragQuery( query: string, documents: Array<{ id: string; text: string }>, options?: { model?: string; topN?: number } ): Promise<RAGResult> { const cohere = getCohere(); const model = options?.model ?? MODELS.chat.standard; // Step 1: Rerank documents const reranked = await cohere.rerank({ model: MODELS.rerank.latest, query, documents: documents.map(d => d.text), topN: options?.topN ?? 5, }); // Step 2: Chat with top documents const topDocs = reranked.results.map(r => ({ id: documents[r.index].id, data: { text: documents[r.index].text }, })); const response = await cohere.chat({ model, messages: [{ role: 'user', content: query }], documents: topDocs, }); return { answer: response.message?.content?.[0]?.text ?? '', citations: (response.message?.citations ?? []).map(c => ({ start: c.start, end: c.end, text: c.text, sources: c.sources?.map((s: any) => s.id) ?? [], })), model, }; }
Agent Service
// src/services/agents.ts import { getCohere } from '../cohere/client'; import { MODELS } from '../cohere/models'; import { toolRegistry } from '../tools/registry'; export async function runAgent( userMessage: string, maxSteps = 5 ): Promise<string> { const cohere = getCohere(); const messages: any[] = [{ role: 'user', content: userMessage }]; const tools = toolRegistry.getToolDefinitions(); for (let step = 0; step < maxSteps; step++) { const response = await cohere.chat({ model: MODELS.chat.premium, messages, tools, }); if (response.finishReason !== 'TOOL_CALL') { return response.message?.content?.[0]?.text ?? ''; } const toolCalls = response.message?.toolCalls ?? []; messages.push({ role: 'assistant', toolCalls }); for (const tc of toolCalls) { const result = await toolRegistry.execute( tc.function.name, JSON.parse(tc.function.arguments) ); messages.push({ role: 'tool', toolCallId: tc.id, content: result }); } } return 'Agent reached max steps.'; }
Tool Registry
// src/tools/registry.ts interface ToolDefinition { type: 'function'; function: { name: string; description: string; parameters: Record<string, unknown>; }; } class ToolRegistry { private tools: Map<string, { definition: ToolDefinition; executor: (args: any) => Promise<string>; }> = new Map(); register( name: string, description: string, parameters: Record<string, unknown>, executor: (args: any) => Promise<string> ) { this.tools.set(name, { definition: { type: 'function', function: { name, description, parameters }, }, executor, }); } getToolDefinitions(): ToolDefinition[] { return Array.from(this.tools.values()).map(t => t.definition); } async execute(name: string, args: any): Promise<string> { const tool = this.tools.get(name); if (!tool) return JSON.stringify({ error: `Unknown tool: ${name}` }); try { return await tool.executor(args); } catch (err) { return JSON.stringify({ error: String(err) }); } } } export const toolRegistry = new ToolRegistry();
Error Classification
// src/cohere/errors.ts import { CohereError, CohereTimeoutError } from 'cohere-ai'; export type ErrorCategory = 'auth' | 'rate_limit' | 'bad_request' | 'server' | 'timeout' | 'unknown'; export function classifyError(err: unknown): { category: ErrorCategory; retryable: boolean; message: string; } { if (err instanceof CohereTimeoutError) { return { category: 'timeout', retryable: true, message: 'Request timed out' }; } if (err instanceof CohereError) { switch (err.statusCode) { case 401: return { category: 'auth', retryable: false, message: 'Invalid API key' }; case 429: return { category: 'rate_limit', retryable: true, message: 'Rate limited' }; case 400: return { category: 'bad_request', retryable: false, message: err.message }; default: if (err.statusCode && err.statusCode >= 500) { return { category: 'server', retryable: true, message: err.message }; } } } return { category: 'unknown', retryable: false, message: String(err) }; }
Data Flow
User Query │ ▼ ┌─────────────┐ │ API Route │ POST /api/chat └──────┬──────┘ │ ▼ ┌─────────────┐ ┌─────────────┐ │ RAG Service │───▶│ Rerank │ rerank-v3.5 │ or Agent │ │ Service │ └──────┬──────┘ └─────────────┘ │ ▼ ┌─────────────┐ ┌─────────────┐ │ Chat/Stream │───▶│ Embed │ embed-v4.0 │ Service │ │ Cache │ (cached) └──────┬──────┘ └─────────────┘ │ ▼ ┌─────────────┐ │ CohereClient │ command-a-03-2025 │ V2 │ └─────────────┘
Output
- Layered architecture separating API, service, and client concerns
- RAG pipeline with rerank pre-filtering and grounded citations
- Agent loop with pluggable tool registry
- Error classification for retry/alert decisions
- Model selection per environment and use case
Error Handling
| Issue | Cause | Solution |
|---|---|---|
| Circular imports | Wrong layering | Services depend on client, not vice versa |
| Tool not found | Missing registration | Register tools at startup |
| Model mismatch | Env config wrong | Validate model IDs at startup |
| Cache miss storm | TTL expired | Stale-while-revalidate pattern |
Resources
Next Steps
For multi-environment setup, see
cohere-multi-env-setup.