Claude-code-plugins-plus perplexity-data-handling
install
source · Clone the upstream repo
git clone https://github.com/jeremylongshore/claude-code-plugins-plus-skills
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/jeremylongshore/claude-code-plugins-plus-skills "$T" && mkdir -p ~/.claude/skills && cp -r "$T/plugins/saas-packs/perplexity-pack/skills/perplexity-data-handling" ~/.claude/skills/jeremylongshore-claude-code-plugins-plus-perplexity-data-handling && rm -rf "$T"
manifest:
plugins/saas-packs/perplexity-pack/skills/perplexity-data-handling/SKILL.mdsource content
Perplexity Data Handling
Overview
Manage data flowing through Perplexity Sonar API. Critical concern: queries are sent to Perplexity for web search, so any PII in queries is exposed to external infrastructure. Responses contain citations (third-party URLs) that must be validated before displaying to users.
Data Flow
User Input → Query Sanitization → Perplexity API → Response Parsing │ ┌─────────────┼──────────────┐ │ │ │ Answer Text Citations Search Results │ │ │ Format & Validate & Store for Display Deduplicate Analytics
Prerequisites
- Perplexity API key configured
- Understanding of PII regulations (GDPR/CCPA)
- Cache storage (Redis or in-memory)
Instructions
Step 1: Query Sanitization
function sanitizeQuery(query: string): { clean: string; redacted: boolean } { let clean = query; let redacted = false; const patterns: Array<[RegExp, string]> = [ [/\b[\w.+-]+@[\w-]+\.[\w.]+\b/g, "[email]"], [/\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/g, "[phone]"], [/\b\d{3}-\d{2}-\d{4}\b/g, "[ssn]"], [/\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, "[card]"], [/\b(pplx-|sk-|pk_|sk_live_)\w{20,}\b/g, "[token]"], [/\b(user|customer|account)\s*#?\s*\d+\b/gi, "[id]"], ]; for (const [pattern, replacement] of patterns) { if (pattern.test(clean)) { clean = clean.replace(pattern, replacement); redacted = true; } } return { clean, redacted }; } async function safeSearch(rawQuery: string) { const { clean, redacted } = sanitizeQuery(rawQuery); if (redacted) { console.warn("[Data] PII redacted from Perplexity query"); } return perplexity.chat.completions.create({ model: "sonar", messages: [{ role: "user", content: clean }], }); }
Step 2: Citation Validation
interface ValidatedCitation { url: string; domain: string; valid: boolean; index: number; } function validateCitations(citations: string[]): ValidatedCitation[] { return citations.map((url, i) => { try { const parsed = new URL(url); return { url: url.replace(/[.,;:]+$/, ""), domain: parsed.hostname, valid: ["http:", "https:"].includes(parsed.protocol), index: i + 1, }; } catch { return { url, domain: "unknown", valid: false, index: i + 1 }; } }); } function deduplicateCitations(citations: ValidatedCitation[]): ValidatedCitation[] { const seen = new Set<string>(); return citations.filter((c) => { const normalized = c.url.split("?")[0].replace(/\/$/, ""); if (seen.has(normalized)) return false; seen.add(normalized); return true; }); } // Replace [1] markers with linked citations function renderCitations(answer: string, citations: ValidatedCitation[]): string { let rendered = answer; for (const c of citations.filter((c) => c.valid)) { rendered = rendered.replaceAll(`[${c.index}]`, `[${c.index}](${c.url})`); } return rendered; }
Step 3: Result Caching with Freshness Policy
import { LRUCache } from "lru-cache"; import { createHash } from "crypto"; interface CachedResult { answer: string; citations: ValidatedCitation[]; cachedAt: number; model: string; } const CACHE_TTL: Record<string, number> = { news: 30 * 60_000, // 30 min for breaking/current events research: 4 * 3600_000, // 4 hours for research topics factual: 24 * 3600_000, // 24 hours for stable facts default: 1 * 3600_000, // 1 hour default }; const resultCache = new LRUCache<string, CachedResult>({ max: 500 }); function detectQueryType(query: string): keyof typeof CACHE_TTL { if (/\b(latest|today|breaking|recent|this week)\b/i.test(query)) return "news"; if (/\b(research|study|paper|analysis|compare)\b/i.test(query)) return "research"; if (/\b(what is|define|how does|who is)\b/i.test(query)) return "factual"; return "default"; } async function cachedSearch(query: string, model = "sonar") { const hash = createHash("sha256") .update(`${model}:${query.toLowerCase().trim()}`) .digest("hex"); const cached = resultCache.get(hash); if (cached) return { ...cached, fromCache: true }; const response = await safeSearch(query); const rawCitations = (response as any).citations || []; const citations = deduplicateCitations(validateCitations(rawCitations)); const queryType = detectQueryType(query); const entry: CachedResult = { answer: response.choices[0].message.content || "", citations, cachedAt: Date.now(), model: response.model, }; resultCache.set(hash, entry, { ttl: CACHE_TTL[queryType] }); return { ...entry, fromCache: false }; }
Step 4: Conversation Context Management
import OpenAI from "openai"; type Message = OpenAI.ChatCompletionMessageParam; class SearchContext { private messages: Message[] = []; private readonly maxMessages = 10; private readonly maxEstimatedTokens = 8000; constructor(systemPrompt?: string) { if (systemPrompt) { this.messages.push({ role: "system", content: systemPrompt }); } } addUserMessage(content: string) { this.messages.push({ role: "user", content }); this.trim(); } addAssistantMessage(content: string) { this.messages.push({ role: "assistant", content }); this.trim(); } getMessages(): Message[] { return [...this.messages]; } private trim() { // Keep system prompt + last N messages while (this.messages.length > this.maxMessages) { const systemIdx = this.messages[0].role === "system" ? 1 : 0; this.messages.splice(systemIdx, 1); } // Trim if estimated tokens too high while (this.estimateTokens() > this.maxEstimatedTokens && this.messages.length > 2) { const systemIdx = this.messages[0].role === "system" ? 1 : 0; this.messages.splice(systemIdx, 1); } } private estimateTokens(): number { return this.messages.reduce( (sum, m) => sum + Math.ceil(String(m.content).length / 4), 0 ); } clear() { const system = this.messages.find((m) => m.role === "system"); this.messages = system ? [system] : []; } }
Error Handling
| Issue | Cause | Solution |
|---|---|---|
| PII in search query | User entered personal data | Apply before API call |
| Broken citation URLs | Source page moved/deleted | Validate URLs, filter invalid ones |
| Stale cached results | TTL too long for news | Use query-type-aware TTL |
| Context overflow | Too many conversation turns | Automatic trimming in SearchContext |
| Duplicate citations | Same source cited multiple times | Deduplicate by normalized URL |
Output
- Query sanitization stripping PII before API calls
- Citation validation and deduplication
- Cache with query-type-aware TTL
- Conversation context with automatic trimming
Resources
Next Steps
For access control, see
perplexity-enterprise-rbac.