Claude-skill-registry cloudflare-workers-ai
install
source · Clone the upstream repo
git clone https://github.com/majiayu000/claude-skill-registry
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/majiayu000/claude-skill-registry "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/data/cloudflare-workers-ai" ~/.claude/skills/majiayu000-claude-skill-registry-cloudflare-workers-ai-1e4b48 && rm -rf "$T"
manifest:
skills/data/cloudflare-workers-ai/SKILL.mdsource content
Critical Patterns
Setup Binding
# wrangler.toml [ai] binding = "AI"
export interface Env { AI: Ai }
Text Generation
import { createWorkersAI } from "workers-ai-provider" import { generateText } from "ai" type Env = { AI: Ai } export default { async fetch(req: Request, env: Env) { const workersai = createWorkersAI({ binding: env.AI }) const { text } = await generateText({ model: workersai("@cf/meta/llama-3.3-70b-instruct-fp8-fast"), prompt: "Write a short poem about clouds" }) return Response.json({ generatedText: text }) } }
Popular models:
- Llama 3.3 (large, fast)@cf/meta/llama-3.3-70b-instruct-fp8-fast
- Llama 2 (smaller)@cf/meta/llama-2-7b-chat-int8
- Mistral 7B@cf/mistral/mistral-7b-instruct-v0.1
Streaming Text
import { streamText } from "ai" import { Hono } from "hono" const app = new Hono<{ Bindings: Env }>() app.post("/chat", async (c) => { const { messages } = await c.req.json() const workersai = createWorkersAI({ binding: c.env.AI }) const result = await streamText({ model: workersai("@cf/meta/llama-3.3-70b-instruct-fp8-fast"), messages }) return result.toTextStreamResponse({ headers: { "Content-Type": "text/x-unknown", "content-encoding": "identity", "transfer-encoding": "chunked" } }) }) export default { fetch: app.fetch }
Chat with System Prompt
const systemPrompt = `You are a helpful financial advisor. Provide accurate, concise advice about personal finance.` const result = await streamText({ model: workersai("@cf/meta/llama-3.3-70b-instruct-fp8-fast"), messages: [ { role: "system", content: systemPrompt }, { role: "user", content: userMessage } ], maxTokens: 1000, temperature: 0.7 })
Text Embeddings
import { embed } from "ai" const workersai = createWorkersAI({ binding: env.AI }) const { embedding } = await embed({ model: workersai.textEmbedding("@cf/baai/bge-base-en-v1.5"), value: "Search engines use embeddings for semantic understanding" }) // embedding is array of numbers [0.123, -0.456, ...] console.log(embedding.length) // 768 dimensions
Popular embedding models:
- 768 dimensions (recommended)@cf/baai/bge-base-en-v1.5
- 384 dimensions (faster)@cf/baai/bge-small-en-v1.5
- 1024 dimensions (more accurate)@cf/baai/bge-large-en-v1.5
Batch Embeddings
import { embedMany } from "ai" const { embeddings } = await embedMany({ model: workersai.textEmbedding("@cf/baai/bge-base-en-v1.5"), values: [ "First document", "Second document", "Third document" ] }) // embeddings is array of arrays console.log(embeddings.length) // 3 console.log(embeddings[0].length) // 768
Image Generation
import { generateImage } from "ai" const workersai = createWorkersAI({ binding: env.AI }) const { image } = await generateImage({ model: workersai.image("@cf/black-forest-labs/flux-1-schnell"), prompt: "A serene mountain landscape at sunset", size: "1024x1024" }) return new Response(image, { headers: { "Content-Type": "image/png" } })
Popular image models:
- FLUX.1 (fast, recommended)@cf/black-forest-labs/flux-1-schnell
- SDXL@cf/stabilityai/stable-diffusion-xl-base-1.0
- SDXL Lightning@cf/bytedance/stable-diffusion-xl-lightning
Sizes:
"1024x1024", "1024x768", "768x1024"
Common Patterns
Complete Chat API
import { Hono } from "hono" import { createWorkersAI } from "workers-ai-provider" import { streamText } from "ai" const app = new Hono<{ Bindings: Env }>() app.post("/api/chat", async (c) => { const { messages } = await c.req.json() if (!messages || !Array.isArray(messages)) { return c.json({ error: "Invalid messages" }, 400) } const workersai = createWorkersAI({ binding: c.env.AI }) try { const result = await streamText({ model: workersai("@cf/meta/llama-3.3-70b-instruct-fp8-fast"), messages, maxTokens: 1000, temperature: 0.7 }) return result.toTextStreamResponse({ headers: { "Content-Type": "text/x-unknown", "content-encoding": "identity", "transfer-encoding": "chunked" } }) } catch (error) { console.error("AI error:", error) return c.json({ error: "Failed" }, 500) } }) export default { fetch: app.fetch }
Semantic Search
app.post("/search", async (c) => { const { query } = await c.req.json() const workersai = createWorkersAI({ binding: c.env.AI }) // 1. Generate embedding for query const { embedding: queryEmbedding } = await embed({ model: workersai.textEmbedding("@cf/baai/bge-base-en-v1.5"), value: query }) // 2. Search in vector database (e.g., Vectorize) const results = await c.env.VECTORIZE.query(queryEmbedding, { topK: 10 }) return c.json({ results }) })
Image Generation with Upload to R2
app.post("/generate-image", async (c) => { const { prompt } = await c.req.json() if (!prompt || prompt.length < 3 || prompt.length > 1000) { return c.json({ error: "Invalid prompt" }, 400) } const workersai = createWorkersAI({ binding: c.env.AI }) try { const { image } = await generateImage({ model: workersai.image("@cf/black-forest-labs/flux-1-schnell"), prompt, size: "1024x1024" }) // Upload to R2 const key = `images/${crypto.randomUUID()}.png` await c.env.BUCKET.put(key, image, { httpMetadata: { contentType: "image/png" } }) return c.json({ success: true, key }) } catch (error) { return c.json({ error: "Generation failed" }, 500) } })
Performance Tips
// ✅ Stream for long responses const result = await streamText({...}) return result.toTextStreamResponse() // ❌ Wait for entire response (slow) const { text } = await generateText({...}) return Response.json({ text }) // ✅ Choose right model size // Small tasks -> smaller models workersai("@cf/meta/llama-2-7b-chat-int8") // Complex reasoning -> larger models workersai("@cf/meta/llama-3.3-70b-instruct-fp8-fast") // ✅ Cache embeddings const cached = await cache.get(text) if (cached) return JSON.parse(cached) const { embedding } = await embed({...}) await cache.put(text, JSON.stringify(embedding)) // ✅ Batch embeddings const { embeddings } = await embedMany({ model: workersai.textEmbedding("@cf/baai/bge-base-en-v1.5"), values: documents }) // ❌ One at a time (slow) for (const doc of documents) { await embed({ value: doc }) }
Model Configuration
const result = await generateText({ model: workersai("@cf/meta/llama-3.3-70b-instruct-fp8-fast"), prompt: "Your prompt", maxTokens: 500, // Max tokens to generate temperature: 0.7, // Randomness (0-1) topP: 0.9, // Nucleus sampling frequencyPenalty: 0.5, // Reduce repetition presencePenalty: 0.5, // Encourage diversity stopSequences: ["\n\n", "END"] })
Common Mistakes
❌ Not handling errors
const { text } = await generateText({...}) // May throw!
✅ Always use try-catch
try { const { text } = await generateText({...}) } catch (error) { return c.json({ error: "Failed" }, 500) }
❌ Missing streaming headers
return result.toTextStreamResponse()
✅ Include required headers
return result.toTextStreamResponse({ headers: { "Content-Type": "text/x-unknown", "content-encoding": "identity", "transfer-encoding": "chunked" } })
Commands
# List models wrangler ai models list # Test model wrangler ai run @cf/meta/llama-3.3-70b-instruct-fp8-fast --prompt "Hello" # Deploy wrangler deploy