Claude-code-plugins-plus groq-deploy-integration
install
source · Clone the upstream repo
git clone https://github.com/jeremylongshore/claude-code-plugins-plus-skills
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/jeremylongshore/claude-code-plugins-plus-skills "$T" && mkdir -p ~/.claude/skills && cp -r "$T/plugins/saas-packs/groq-pack/skills/groq-deploy-integration" ~/.claude/skills/jeremylongshore-claude-code-plugins-plus-groq-deploy-integration && rm -rf "$T"
manifest:
plugins/saas-packs/groq-pack/skills/groq-deploy-integration/SKILL.mdsource content
Groq Deploy Integration
Overview
Deploy applications using Groq's inference API to Vercel Edge, Cloud Run, Docker, and other platforms. Groq's sub-200ms latency makes it ideal for edge deployments and real-time applications.
Prerequisites
- Groq API key stored in
GROQ_API_KEY - Application using
packagegroq-sdk - Platform CLI installed (vercel, docker, or gcloud)
Instructions
Step 1: Vercel Edge Function
// app/api/chat/route.ts (Next.js App Router) import Groq from "groq-sdk"; export const runtime = "edge"; export async function POST(req: Request) { const groq = new Groq({ apiKey: process.env.GROQ_API_KEY! }); const { messages, stream: useStream } = await req.json(); if (useStream) { const stream = await groq.chat.completions.create({ model: "llama-3.3-70b-versatile", messages, stream: true, max_tokens: 2048, }); const encoder = new TextEncoder(); const readable = new ReadableStream({ async start(controller) { for await (const chunk of stream) { const content = chunk.choices[0]?.delta?.content; if (content) { controller.enqueue( encoder.encode(`data: ${JSON.stringify({ content })}\n\n`) ); } } controller.enqueue(encoder.encode("data: [DONE]\n\n")); controller.close(); }, }); return new Response(readable, { headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", }, }); } const completion = await groq.chat.completions.create({ model: "llama-3.3-70b-versatile", messages, max_tokens: 2048, }); return Response.json(completion); }
Step 2: Vercel Deployment
set -euo pipefail # Set secret vercel env add GROQ_API_KEY production # Deploy vercel --prod
Step 3: Docker Container
FROM node:20-slim AS builder WORKDIR /app COPY package*.json ./ RUN npm ci COPY . . RUN npm run build FROM node:20-slim WORKDIR /app COPY --from=builder /app/dist ./dist COPY --from=builder /app/node_modules ./node_modules COPY --from=builder /app/package.json . EXPOSE 3000 HEALTHCHECK --interval=30s --timeout=5s CMD curl -sf http://localhost:3000/health || exit 1 CMD ["node", "dist/index.js"]
Step 4: Cloud Run Deployment
set -euo pipefail # Store API key in Secret Manager echo -n "$GROQ_API_KEY" | gcloud secrets create groq-api-key --data-file=- # Deploy with streaming support gcloud run deploy groq-api \ --source . \ --region us-central1 \ --set-secrets=GROQ_API_KEY=groq-api-key:latest \ --min-instances=1 \ --max-instances=10 \ --cpu=1 --memory=512Mi \ --allow-unauthenticated \ --timeout=60s
Step 5: Express Server with Health Check
import express from "express"; import Groq from "groq-sdk"; const app = express(); const groq = new Groq(); app.use(express.json()); // Health check -- uses cheapest model with minimal tokens app.get("/health", async (_req, res) => { try { const start = performance.now(); await groq.chat.completions.create({ model: "llama-3.1-8b-instant", messages: [{ role: "user", content: "OK" }], max_tokens: 1, }); res.json({ status: "healthy", groq: { connected: true, latencyMs: Math.round(performance.now() - start) }, }); } catch (err: any) { res.status(503).json({ status: "unhealthy", groq: { connected: false, error: err.message }, }); } }); // Chat endpoint with streaming app.post("/api/chat", async (req, res) => { const { messages, model = "llama-3.3-70b-versatile" } = req.body; if (req.headers.accept === "text/event-stream") { res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", }); const stream = await groq.chat.completions.create({ model, messages, stream: true, max_tokens: 2048, }); for await (const chunk of stream) { const content = chunk.choices[0]?.delta?.content; if (content) { res.write(`data: ${JSON.stringify({ content })}\n\n`); } } res.write("data: [DONE]\n\n"); res.end(); } else { const completion = await groq.chat.completions.create({ model, messages, max_tokens: 2048, }); res.json(completion); } }); app.listen(3000, () => console.log("Groq API server on :3000"));
Step 6: Vercel AI SDK Integration
// Using @ai-sdk/groq for Vercel AI SDK import { createGroq } from "@ai-sdk/groq"; import { streamText } from "ai"; const groq = createGroq({ apiKey: process.env.GROQ_API_KEY }); export async function POST(req: Request) { const { messages } = await req.json(); const result = streamText({ model: groq("llama-3.3-70b-versatile"), messages, }); return result.toDataStreamResponse(); }
Environment Variable Config
| Platform | Command |
|---|---|
| Vercel | |
| Cloud Run | |
| Fly.io | |
| Railway | |
| Docker | or Docker secrets |
Error Handling
| Issue | Cause | Solution |
|---|---|---|
| Rate limited (429) | Too many requests | Implement request queuing with backoff |
| Edge timeout | Response > 25s | Use streaming for long completions |
| Model unavailable | Capacity or deprecation | Fall back to |
| Cold start latency | Serverless function init | Set on Cloud Run |
| API key not found | Secret not configured | Check platform secret config |
Resources
Next Steps
For multi-environment setup, see
groq-multi-env-setup.