Vibeship-spawner-skills ai-personalization

AI Personalization Skill

install
source · Clone the upstream repo
git clone https://github.com/vibeforge1111/vibeship-spawner-skills
manifest: ai/ai-personalization/skill.yaml
source content

AI Personalization Skill

Recommendation systems, user embeddings, and personalized experiences

id: ai-personalization name: ai-personalization category: ai description: | Building AI-powered personalization systems: recommendation engines, collaborative filtering, content-based filtering, user preference learning, cold-start solutions, and LLM-enhanced personalized experiences.

version: 1.0.0

triggers:

  • "recommendation system"
  • "personalization"
  • "collaborative filtering"
  • "content-based filtering"
  • "user preferences"
  • "recommend"
  • "suggestions"
  • "for you"
  • "similar items"
  • "you might like"

technologies: primary: - OpenAI Embeddings (user/item embeddings) - Pinecone/Qdrant (vector similarity) - Redis (real-time features) - PostgreSQL (user interactions) supporting: - LanceDB (local vector storage) - BM25 (keyword matching) - React Query (caching)

concepts: recommendation_types: - name: Collaborative Filtering description: Recommend based on similar users' preferences when_to_use: "Have user interaction data, finding patterns across users" - name: Content-Based Filtering description: Recommend based on item attributes matching user preferences when_to_use: "Know item features, matching to explicit user preferences" - name: Hybrid description: Combine collaborative and content-based approaches when_to_use: "Need robustness, handling cold-start with fallback" - name: Knowledge-Based description: Use domain rules and constraints when_to_use: "Complex constraints (dietary, compatibility)"

patterns: collaborative_filtering_setup: description: Basic collaborative filtering with embeddings when: "Building recommendation system with user interactions" pattern: | // lib/recommendations/collaborative.ts import OpenAI from "openai"; import { Pinecone } from "@pinecone-database/pinecone";

  const openai = new OpenAI();
  const pinecone = new Pinecone();
  const index = pinecone.index("user-embeddings");

  interface UserInteraction {
    userId: string;
    itemId: string;
    action: "view" | "like" | "purchase" | "save";
    timestamp: Date;
    metadata?: Record<string, unknown>;
  }

  interface UserProfile {
    userId: string;
    embedding: number[];
    interactionCount: number;
    lastUpdated: Date;
  }

  // Build user embedding from interactions
  async function buildUserEmbedding(
    userId: string,
    interactions: UserInteraction[]
  ): Promise<number[]> {
    // Weight interactions by recency and action type
    const weights: Record<string, number> = {
      purchase: 5,
      like: 3,
      save: 2,
      view: 1,
    };

    const now = Date.now();
    const halfLife = 7 * 24 * 60 * 60 * 1000; // 7 days

    // Create weighted text representation
    const weightedItems: string[] = [];

    for (const interaction of interactions) {
      const actionWeight = weights[interaction.action] ?? 1;
      const age = now - interaction.timestamp.getTime();
      const recencyWeight = Math.exp(-age / halfLife);
      const totalWeight = Math.round(actionWeight * recencyWeight * 10);

      // Repeat item ID based on weight
      for (let i = 0; i < totalWeight; i++) {
        weightedItems.push(interaction.itemId);
      }
    }

    // Generate embedding from weighted representation
    const response = await openai.embeddings.create({
      model: "text-embedding-3-small",
      input: weightedItems.join(" "),
    });

    return response.data[0].embedding;
  }

  // Find similar users
  async function findSimilarUsers(
    userEmbedding: number[],
    options?: { topK?: number; excludeUser?: string }
  ): Promise<Array<{ userId: string; score: number }>> {
    const { topK = 10, excludeUser } = options ?? {};

    const results = await index.namespace("users").query({
      vector: userEmbedding,
      topK: topK + 1, // Extra to filter self
      includeMetadata: true,
    });

    return results.matches
      .filter((m) => m.id !== excludeUser)
      .slice(0, topK)
      .map((m) => ({
        userId: m.id,
        score: m.score ?? 0,
      }));
  }

  // Get recommendations for user
  async function getRecommendations(
    userId: string,
    options?: { limit?: number; excludeItems?: string[] }
  ): Promise<Array<{ itemId: string; score: number; reason: string }>> {
    const { limit = 20, excludeItems = [] } = options ?? {};
    const excludeSet = new Set(excludeItems);

    // Get user's embedding
    const userResult = await index.namespace("users").fetch([userId]);
    const userVector = userResult.records[userId]?.values;

    if (!userVector) {
      return []; // New user - needs cold start handling
    }

    // Find similar users
    const similarUsers = await findSimilarUsers(userVector, {
      topK: 50,
      excludeUser: userId,
    });

    // Aggregate their items with weighted scores
    const itemScores = new Map<string, { score: number; sources: string[] }>();

    for (const simUser of similarUsers) {
      const userItems = await getUserRecentItems(simUser.userId);

      for (const item of userItems) {
        if (excludeSet.has(item.itemId)) continue;

        const existing = itemScores.get(item.itemId);
        const contribution = simUser.score * item.weight;

        if (existing) {
          existing.score += contribution;
          existing.sources.push(simUser.userId);
        } else {
          itemScores.set(item.itemId, {
            score: contribution,
            sources: [simUser.userId],
          });
        }
      }
    }

    // Sort and return top items
    return Array.from(itemScores.entries())
      .sort((a, b) => b[1].score - a[1].score)
      .slice(0, limit)
      .map(([itemId, data]) => ({
        itemId,
        score: data.score,
        reason: `Liked by ${data.sources.length} similar users`,
      }));
  }

  async function getUserRecentItems(
    userId: string
  ): Promise<Array<{ itemId: string; weight: number }>> {
    // Implementation: query from DB
    return [];
  }

content_based_filtering: description: Content-based recommendations using item features when: "Have rich item metadata and explicit preferences" pattern: | // lib/recommendations/content-based.ts import OpenAI from "openai"; import { Pinecone } from "@pinecone-database/pinecone";

  const openai = new OpenAI();
  const pinecone = new Pinecone();
  const index = pinecone.index("item-embeddings");

  interface Item {
    id: string;
    title: string;
    description: string;
    category: string;
    tags: string[];
    attributes: Record<string, string | number>;
  }

  interface UserPreferences {
    likedCategories: string[];
    likedTags: string[];
    attributePreferences: Record<string, string | number>;
  }

  // Build item embedding from features
  async function buildItemEmbedding(item: Item): Promise<number[]> {
    // Create rich text representation
    const features = [
      item.title,
      item.description,
      `Category: ${item.category}`,
      `Tags: ${item.tags.join(", ")}`,
      ...Object.entries(item.attributes).map(([k, v]) => `${k}: ${v}`),
    ].join(". ");

    const response = await openai.embeddings.create({
      model: "text-embedding-3-small",
      input: features,
    });

    return response.data[0].embedding;
  }

  // Build preference profile embedding
  async function buildPreferenceEmbedding(
    preferences: UserPreferences,
    likedItems: Item[]
  ): Promise<number[]> {
    // Combine explicit preferences with implicit from liked items
    const parts: string[] = [];

    if (preferences.likedCategories.length) {
      parts.push(`Preferred categories: ${preferences.likedCategories.join(", ")}`);
    }
    if (preferences.likedTags.length) {
      parts.push(`Preferred tags: ${preferences.likedTags.join(", ")}`);
    }

    // Add liked item descriptions (weighted recent)
    for (const item of likedItems.slice(-10)) {
      parts.push(item.description);
    }

    const response = await openai.embeddings.create({
      model: "text-embedding-3-small",
      input: parts.join(". "),
    });

    return response.data[0].embedding;
  }

  // Get content-based recommendations
  async function getContentRecommendations(
    preferenceEmbedding: number[],
    options?: {
      limit?: number;
      categoryFilter?: string[];
      excludeItems?: string[];
    }
  ): Promise<Array<{ item: Item; score: number; matchedFeatures: string[] }>> {
    const { limit = 20, categoryFilter, excludeItems = [] } = options ?? {};

    // Build filter
    const filter: Record<string, unknown> = {};
    if (categoryFilter?.length) {
      filter.category = { $in: categoryFilter };
    }
    if (excludeItems.length) {
      filter.id = { $nin: excludeItems };
    }

    const results = await index.namespace("items").query({
      vector: preferenceEmbedding,
      topK: limit,
      filter: Object.keys(filter).length ? filter : undefined,
      includeMetadata: true,
    });

    return results.matches.map((match) => ({
      item: match.metadata as unknown as Item,
      score: match.score ?? 0,
      matchedFeatures: extractMatchedFeatures(match.metadata),
    }));
  }

  function extractMatchedFeatures(metadata: unknown): string[] {
    // Analyze which features contributed to match
    return [];
  }

hybrid_recommendation: description: Combine multiple recommendation strategies when: "Need robust recommendations with cold-start handling" pattern: | // lib/recommendations/hybrid.ts interface RecommendationStrategy { name: string; weight: number; getRecommendations( userId: string, options: RecommendationOptions ): Promise<ScoredItem[]>; }

  interface ScoredItem {
    itemId: string;
    score: number;
    source: string;
  }

  interface RecommendationOptions {
    limit: number;
    excludeItems: string[];
    context?: Record<string, unknown>;
  }

  interface HybridConfig {
    strategies: RecommendationStrategy[];
    diversityFactor: number; // 0-1, higher = more diverse
    coldStartThreshold: number; // min interactions for collaborative
  }

  async function getHybridRecommendations(
    userId: string,
    config: HybridConfig,
    options: RecommendationOptions
  ): Promise<Array<{ itemId: string; score: number; sources: string[] }>> {
    const userInteractionCount = await getUserInteractionCount(userId);

    // Adjust weights based on data availability
    const adjustedStrategies = config.strategies.map((strategy) => {
      let weight = strategy.weight;

      // Reduce collaborative weight for new users
      if (strategy.name === "collaborative" &&
          userInteractionCount < config.coldStartThreshold) {
        weight *= userInteractionCount / config.coldStartThreshold;
      }

      return { ...strategy, weight };
    });

    // Normalize weights
    const totalWeight = adjustedStrategies.reduce((sum, s) => sum + s.weight, 0);
    const normalizedStrategies = adjustedStrategies.map((s) => ({
      ...s,
      weight: s.weight / totalWeight,
    }));

    // Get recommendations from each strategy
    const allResults = await Promise.all(
      normalizedStrategies.map(async (strategy) => {
        const recs = await strategy.getRecommendations(userId, options);
        return recs.map((r) => ({
          ...r,
          weightedScore: r.score * strategy.weight,
          source: strategy.name,
        }));
      })
    );

    // Merge and deduplicate
    const itemMap = new Map<string, { score: number; sources: string[] }>();

    for (const results of allResults) {
      for (const rec of results) {
        const existing = itemMap.get(rec.itemId);
        if (existing) {
          existing.score += rec.weightedScore;
          existing.sources.push(rec.source);
        } else {
          itemMap.set(rec.itemId, {
            score: rec.weightedScore,
            sources: [rec.source],
          });
        }
      }
    }

    // Apply diversity re-ranking
    const sorted = Array.from(itemMap.entries())
      .sort((a, b) => b[1].score - a[1].score);

    if (config.diversityFactor > 0) {
      return applyDiversityReranking(sorted, config.diversityFactor, options.limit);
    }

    return sorted.slice(0, options.limit).map(([itemId, data]) => ({
      itemId,
      score: data.score,
      sources: data.sources,
    }));
  }

  function applyDiversityReranking(
    items: Array<[string, { score: number; sources: string[] }]>,
    factor: number,
    limit: number
  ): Array<{ itemId: string; score: number; sources: string[] }> {
    // MMR (Maximal Marginal Relevance) style reranking
    const selected: typeof items = [];
    const remaining = [...items];

    while (selected.length < limit && remaining.length > 0) {
      let bestIdx = 0;
      let bestScore = -Infinity;

      for (let i = 0; i < remaining.length; i++) {
        const relevance = remaining[i][1].score;
        const diversity = selected.length === 0 ? 1 :
          calculateDiversity(remaining[i], selected);

        const mmrScore = (1 - factor) * relevance + factor * diversity;

        if (mmrScore > bestScore) {
          bestScore = mmrScore;
          bestIdx = i;
        }
      }

      selected.push(remaining.splice(bestIdx, 1)[0]);
    }

    return selected.map(([itemId, data]) => ({
      itemId,
      score: data.score,
      sources: data.sources,
    }));
  }

  function calculateDiversity(
    item: [string, { score: number; sources: string[] }],
    selected: Array<[string, { score: number; sources: string[] }]>
  ): number {
    // Calculate dissimilarity to already selected items
    return 1; // Simplified - real implementation uses embeddings
  }

  async function getUserInteractionCount(userId: string): Promise<number> {
    return 0; // Implementation
  }

cold_start_handling: description: Handle new users and items with limited data when: "Users or items have insufficient interaction history" pattern: | // lib/recommendations/cold-start.ts import { z } from "zod";

  const OnboardingPreferencesSchema = z.object({
    selectedCategories: z.array(z.string()).min(1).max(5),
    selectedItems: z.array(z.string()).min(3).max(10),
    attributePreferences: z.record(z.string(), z.union([z.string(), z.number()])),
  });

  type OnboardingPreferences = z.infer<typeof OnboardingPreferencesSchema>;

  interface ColdStartStrategy {
    // Phase 1: Popular items (everyone)
    getPopularItems(options: {
      category?: string;
      limit: number;
    }): Promise<string[]>;

    // Phase 2: Onboarding preferences
    processOnboarding(
      userId: string,
      preferences: OnboardingPreferences
    ): Promise<void>;

    // Phase 3: Preference elicitation
    getElicitationItems(
      userId: string,
      phase: "burn-in" | "refinement"
    ): Promise<Array<{ itemId: string; reason: string }>>;
  }

  // Two-phase elicitation for new users
  class PreferenceElicitation {
    private burnInSize = 5;
    private refinementSize = 10;

    async runBurnIn(userId: string): Promise<string[]> {
      // Phase 1: Show diverse popular items
      const diversePopular = await this.getDiversePopularItems(this.burnInSize);
      return diversePopular;
    }

    async runRefinement(
      userId: string,
      burnInRatings: Map<string, number>
    ): Promise<string[]> {
      // Phase 2: Adaptive items based on burn-in
      const likedItems = Array.from(burnInRatings.entries())
        .filter(([_, rating]) => rating >= 4)
        .map(([id]) => id);

      if (likedItems.length === 0) {
        // No strong preferences - try different categories
        return this.getDiversePopularItems(this.refinementSize);
      }

      // Find items similar to liked ones, but in unexplored areas
      return this.getExplorativeItems(likedItems, this.refinementSize);
    }

    private async getDiversePopularItems(count: number): Promise<string[]> {
      // Get top items from each category
      return [];
    }

    private async getExplorativeItems(
      seedItems: string[],
      count: number
    ): Promise<string[]> {
      // Find similar items in diverse categories
      return [];
    }
  }

  // Meta-learning for cold items
  interface MetaWarmUp {
    // Transform cold item embedding into warm feature space
    transformColdEmbedding(
      itemId: string,
      itemFeatures: Record<string, unknown>
    ): Promise<number[]>;
  }

  class ItemColdStartHandler {
    // For new items with no interactions
    async getInitialItemEmbedding(
      item: { id: string; features: Record<string, unknown> }
    ): Promise<number[]> {
      // Use content features to bootstrap embedding
      const featureText = Object.entries(item.features)
        .map(([k, v]) => `${k}: ${v}`)
        .join(". ");

      const response = await openai.embeddings.create({
        model: "text-embedding-3-small",
        input: featureText,
      });

      return response.data[0].embedding;
    }

    // Update embedding as interactions come in
    async warmUpEmbedding(
      itemId: string,
      coldEmbedding: number[],
      interactions: Array<{ userId: string; action: string }>
    ): Promise<number[]> {
      if (interactions.length < 5) {
        return coldEmbedding; // Not enough data yet
      }

      // Blend content embedding with interaction-based embedding
      const interactionEmbedding = await this.buildInteractionEmbedding(
        itemId,
        interactions
      );

      const warmRatio = Math.min(interactions.length / 50, 0.8);

      return coldEmbedding.map((v, i) =>
        (1 - warmRatio) * v + warmRatio * interactionEmbedding[i]
      );
    }

    private async buildInteractionEmbedding(
      itemId: string,
      interactions: Array<{ userId: string; action: string }>
    ): Promise<number[]> {
      // Average of user embeddings who interacted
      return [];
    }
  }

llm_personalized_responses: description: Personalize LLM responses based on user context when: "Need AI chat/content personalized to user preferences" pattern: | // lib/personalization/llm-personalized.ts import OpenAI from "openai";

  const openai = new OpenAI();

  interface UserContext {
    userId: string;
    preferences: {
      communicationStyle: "formal" | "casual" | "technical";
      verbosity: "brief" | "detailed";
      expertise: "beginner" | "intermediate" | "expert";
    };
    history: Array<{
      role: "user" | "assistant";
      content: string;
      timestamp: Date;
    }>;
    profileSummary: string;
  }

  interface PersonalizedRAGContext {
    relevantMemories: string[];
    userPreferences: Record<string, string>;
    recentTopics: string[];
  }

  // Build personalized system prompt
  function buildPersonalizedSystemPrompt(context: UserContext): string {
    const styleGuide = {
      formal: "Use professional, formal language",
      casual: "Use friendly, conversational tone",
      technical: "Use precise technical terminology",
    };

    const verbosityGuide = {
      brief: "Keep responses concise and to the point",
      detailed: "Provide comprehensive explanations with examples",
    };

    const expertiseGuide = {
      beginner: "Explain concepts simply, avoid jargon",
      intermediate: "Assume basic knowledge, introduce advanced concepts",
      expert: "Use technical depth, skip basic explanations",
    };

    return `You are a helpful assistant personalized for this user.

User Profile: ${context.profileSummary}

Communication Guidelines:

  • ${styleGuide[context.preferences.communicationStyle]}
  • ${verbosityGuide[context.preferences.verbosity]}
  • ${expertiseGuide[context.preferences.expertise]}

Personalization Notes:

  • Remember the user's previous interactions and preferences

  • Maintain consistency with past conversations

  • Adapt explanations to their expertise level`; }

    // Personalized chat with memory
    async function personalizedChat(
      userMessage: string,
      context: UserContext,
      ragContext?: PersonalizedRAGContext
    ): Promise<string> {
      const systemPrompt = buildPersonalizedSystemPrompt(context);
    
      const messages: OpenAI.ChatCompletionMessageParam[] = [
        { role: "system", content: systemPrompt },
      ];
    
      // Add relevant memories from RAG
      if (ragContext?.relevantMemories.length) {
        messages.push({
          role: "system",
          content: `Relevant context from user's history:\n${ragContext.relevantMemories.join("\n")}`,
        });
      }
    
      // Add recent conversation history
      const recentHistory = context.history.slice(-6);
      for (const msg of recentHistory) {
        messages.push({ role: msg.role, content: msg.content });
      }
    
      messages.push({ role: "user", content: userMessage });
    
      const response = await openai.chat.completions.create({
        model: "gpt-4o",
        messages,
        temperature: 0.7,
      });
    
      return response.choices[0].message.content ?? "";
    }
    
    // User embedding for personalized retrieval
    async function buildUserContextEmbedding(
      context: UserContext
    ): Promise<number[]> {
      const contextText = [
        context.profileSummary,
        `Style: ${context.preferences.communicationStyle}`,
        `Expertise: ${context.preferences.expertise}`,
        `Recent topics: ${getRecentTopics(context.history).join(", ")}`,
      ].join(". ");
    
      const response = await openai.embeddings.create({
        model: "text-embedding-3-small",
        input: contextText,
      });
    
      return response.data[0].embedding;
    }
    
    function getRecentTopics(
      history: UserContext["history"]
    ): string[] {
      // Extract topics from recent conversations
      return [];
    }
    

    real_time_recommendations: description: Update recommendations based on live behavior when: "Need recommendations that adapt to current session" pattern: | // lib/recommendations/realtime.ts import { Redis } from "@upstash/redis";

    const redis = new Redis({
      url: process.env.UPSTASH_REDIS_REST_URL!,
      token: process.env.UPSTASH_REDIS_REST_TOKEN!,
    });
    
    interface SessionContext {
      userId: string;
      sessionId: string;
      currentViews: string[];
      currentClicks: string[];
      currentCart: string[];
      timeOnPage: Map<string, number>;
    }
    
    interface RealTimeSignal {
      type: "view" | "click" | "add_to_cart" | "time_spent";
      itemId: string;
      value?: number;
      timestamp: number;
    }
    
    // Track real-time signals
    async function trackSignal(
      sessionId: string,
      signal: RealTimeSignal
    ): Promise<void> {
      const key = `session:${sessionId}:signals`;
    
      await redis.zadd(key, {
        score: signal.timestamp,
        member: JSON.stringify(signal),
      });
    
      // Keep last 1 hour of signals
      await redis.zremrangebyscore(key, 0, Date.now() - 3600000);
      await redis.expire(key, 7200); // 2 hour TTL
    }
    
    // Get session-boosted recommendations
    async function getSessionAwareRecommendations(
      userId: string,
      sessionId: string,
      baseRecommendations: Array<{ itemId: string; score: number }>,
      options?: { boostFactor?: number }
    ): Promise<Array<{ itemId: string; score: number; boosted: boolean }>> {
      const { boostFactor = 1.5 } = options ?? {};
    
      // Get session signals
      const signalsKey = `session:${sessionId}:signals`;
      const rawSignals = await redis.zrange(signalsKey, 0, -1);
    
      const signals: RealTimeSignal[] = rawSignals.map((s) =>
        JSON.parse(s as string)
      );
    
      // Build session interest profile
      const sessionInterests = new Map<string, number>();
    
      for (const signal of signals) {
        const weight = getSignalWeight(signal);
        const current = sessionInterests.get(signal.itemId) ?? 0;
        sessionInterests.set(signal.itemId, current + weight);
      }
    
      // Get items similar to session interests
      const similarToSession = await findSimilarItems(
        Array.from(sessionInterests.keys()),
        20
      );
    
      const boostSet = new Set(similarToSession.map((i) => i.itemId));
    
      // Apply session boost to base recommendations
      return baseRecommendations.map((rec) => ({
        itemId: rec.itemId,
        score: boostSet.has(rec.itemId) ? rec.score * boostFactor : rec.score,
        boosted: boostSet.has(rec.itemId),
      }));
    }
    
    function getSignalWeight(signal: RealTimeSignal): number {
      const weights: Record<string, number> = {
        add_to_cart: 5,
        click: 2,
        view: 1,
        time_spent: 0.1, // Per second
      };
    
      const baseWeight = weights[signal.type] ?? 1;
    
      if (signal.type === "time_spent" && signal.value) {
        return baseWeight * Math.min(signal.value, 60); // Cap at 60 seconds
      }
    
      return baseWeight;
    }
    
    async function findSimilarItems(
      itemIds: string[],
      limit: number
    ): Promise<Array<{ itemId: string; similarity: number }>> {
      // Find items similar to the given set
      return [];
    }
    

anti_patterns:

  • pattern: "Using only collaborative filtering for all users" why: "Fails completely for new users with no history" instead: "Hybrid approach with content-based fallback for cold start"

  • pattern: "Not capping recommendation list before heavy operations" why: "Personalization scoring on 10K items is slow" instead: "Pre-filter to candidate set (100-500) before scoring"

  • pattern: "Updating user embeddings synchronously on every interaction" why: "Adds latency to every action" instead: "Batch embedding updates async (every 5-10 interactions)"

  • pattern: "No diversity in recommendations" why: "Echo chamber / filter bubble effect" instead: "Apply MMR or diversity reranking"

  • pattern: "Storing full interaction history in user session" why: "Sessions get bloated, serialization slow" instead: "Store in Redis/DB, summarize in session"

  • pattern: "Building embeddings from raw item IDs" why: "IDs have no semantic meaning" instead: "Use item titles, descriptions, features for embedding text"

testing_requirements:

  • "Cold start: New user gets reasonable recommendations"
  • "Preference change: Recommendations adapt within session"
  • "Diversity: Top 10 contains items from multiple categories"
  • "Performance: Recommendations return in <200ms"
  • "Privacy: No PII in embeddings or logs"
  • "Feedback loop: Explicit feedback improves next batch"

handoffs:

  • skill: llm-integration when: "Need LLM for content generation or analysis"
  • skill: semantic-search when: "Building vector-based similarity"
  • skill: ai-observability when: "Tracking recommendation quality metrics"
  • skill: ai-safety-alignment when: "Need to filter inappropriate recommendations"