Vibecosystem elasticsearch-patterns

Mapping design, query optimization, aggregation patterns, index lifecycle management, and search relevance tuning.

install

source · Clone the upstream repo

git clone https://github.com/vibeeval/vibecosystem

manifest: skills/elasticsearch-patterns/skill.md

Elasticsearch Patterns

Search and analytics patterns for Elasticsearch deployments.

Mapping Design

{
  "mappings": {
    "dynamic": "strict",
    "properties": {
      "id": { "type": "keyword" },
      "title": {
        "type": "text",
        "analyzer": "standard",
        "fields": {
          "keyword": { "type": "keyword" },
          "autocomplete": {
            "type": "text",
            "analyzer": "autocomplete_analyzer"
          }
        }
      },
      "description": {
        "type": "text",
        "analyzer": "standard"
      },
      "price": { "type": "scaled_float", "scaling_factor": 100 },
      "category": { "type": "keyword" },
      "tags": { "type": "keyword" },
      "location": { "type": "geo_point" },
      "created_at": { "type": "date" },
      "metadata": {
        "type": "object",
        "enabled": false
      }
    }
  },
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1,
    "analysis": {
      "analyzer": {
        "autocomplete_analyzer": {
          "type": "custom",
          "tokenizer": "standard",
          "filter": ["lowercase", "autocomplete_filter"]
        }
      },
      "filter": {
        "autocomplete_filter": {
          "type": "edge_ngram",
          "min_gram": 2,
          "max_gram": 20
        }
      }
    }
  }
}

Query Patterns

import { Client } from '@elastic/elasticsearch'

const client = new Client({ node: process.env.ELASTICSEARCH_URL })

// Full-text search with boosting and highlighting
async function searchProducts(query: string, filters: ProductFilters) {
  const result = await client.search({
    index: 'products',
    body: {
      query: {
        bool: {
          must: [
            {
              multi_match: {
                query,
                fields: ['title^3', 'description', 'tags^2'],  // Title 3x boost
                type: 'best_fields',
                fuzziness: 'AUTO',         // Typo tolerance
                prefix_length: 2,          // First 2 chars must match exactly
              }
            }
          ],
          filter: [
            ...(filters.category ? [{ term: { category: filters.category } }] : []),
            ...(filters.minPrice || filters.maxPrice ? [{
              range: {
                price: {
                  ...(filters.minPrice && { gte: filters.minPrice }),
                  ...(filters.maxPrice && { lte: filters.maxPrice }),
                }
              }
            }] : []),
            ...(filters.tags?.length ? [{ terms: { tags: filters.tags } }] : []),
          ],
        }
      },
      highlight: {
        fields: {
          title: { number_of_fragments: 0 },       // Full field highlight
          description: { fragment_size: 150 },      // Snippet
        },
        pre_tags: ['<mark>'],
        post_tags: ['</mark>'],
      },
      sort: [
        { _score: 'desc' },
        { created_at: 'desc' },
      ],
      from: filters.offset ?? 0,
      size: filters.limit ?? 20,
    }
  })

  return {
    hits: result.hits.hits.map(hit => ({
      ...hit._source,
      score: hit._score,
      highlights: hit.highlight,
    })),
    total: (result.hits.total as { value: number }).value,
  }
}

// Autocomplete search (edge_ngram)
async function autocomplete(prefix: string) {
  const result = await client.search({
    index: 'products',
    body: {
      query: {
        match: {
          'title.autocomplete': {
            query: prefix,
            operator: 'and',
          }
        }
      },
      _source: ['title', 'category'],
      size: 10,
    }
  })
  return result.hits.hits.map(h => h._source)
}

Aggregation Patterns

// Faceted search: get filter counts alongside results
async function searchWithFacets(query: string) {
  const result = await client.search({
    index: 'products',
    body: {
      query: { match: { title: query } },
      size: 20,
      aggs: {
        // Category facets
        categories: {
          terms: { field: 'category', size: 20 }
        },
        // Price ranges
        price_ranges: {
          range: {
            field: 'price',
            ranges: [
              { key: 'budget', to: 50 },
              { key: 'mid', from: 50, to: 200 },
              { key: 'premium', from: 200 },
            ]
          }
        },
        // Price statistics
        price_stats: {
          stats: { field: 'price' }
        },
        // Date histogram
        created_over_time: {
          date_histogram: {
            field: 'created_at',
            calendar_interval: 'month',
          }
        },
      }
    }
  })

  return {
    hits: result.hits.hits,
    facets: {
      categories: result.aggregations?.categories,
      priceRanges: result.aggregations?.price_ranges,
      priceStats: result.aggregations?.price_stats,
      timeline: result.aggregations?.created_over_time,
    }
  }
}

Index Lifecycle Management (ILM)

{
  "policy": {
    "phases": {
      "hot": {
        "min_age": "0ms",
        "actions": {
          "rollover": {
            "max_primary_shard_size": "50gb",
            "max_age": "7d"
          },
          "set_priority": { "priority": 100 }
        }
      },
      "warm": {
        "min_age": "30d",
        "actions": {
          "shrink": { "number_of_shards": 1 },
          "forcemerge": { "max_num_segments": 1 },
          "set_priority": { "priority": 50 },
          "allocate": {
            "number_of_replicas": 0,
            "require": { "data": "warm" }
          }
        }
      },
      "cold": {
        "min_age": "90d",
        "actions": {
          "set_priority": { "priority": 0 },
          "freeze": {},
          "allocate": {
            "require": { "data": "cold" }
          }
        }
      },
      "delete": {
        "min_age": "365d",
        "actions": {
          "delete": {}
        }
      }
    }
  }
}

Bulk Indexing

async function bulkIndex(documents: Product[]): Promise<void> {
  const body = documents.flatMap(doc => [
    { index: { _index: 'products', _id: doc.id } },
    doc,
  ])

  const result = await client.bulk({ body, refresh: false })  // No refresh for throughput

  if (result.errors) {
    const erroredItems = result.items.filter((item: any) => item.index?.error)
    console.error(`Bulk indexing errors: ${erroredItems.length}/${documents.length}`)
    for (const item of erroredItems.slice(0, 5)) {
      console.error(item.index?.error)
    }
  }
}

// Reindex with zero downtime using aliases
async function reindexWithAlias(oldIndex: string, newIndex: string, alias: string) {
  // 1. Create new index with updated mappings
  await client.indices.create({ index: newIndex, body: newMappings })

  // 2. Reindex data
  await client.reindex({
    body: { source: { index: oldIndex }, dest: { index: newIndex } },
    wait_for_completion: true,
  })

  // 3. Atomic alias swap
  await client.indices.updateAliases({
    body: {
      actions: [
        { remove: { index: oldIndex, alias } },
        { add: { index: newIndex, alias } },
      ]
    }
  })
}

Checklist

Explicit mappings with
```
dynamic: strict
```
(no surprise field types)
Multi-field mappings: keyword for filtering, text for search
Edge n-gram analyzer for autocomplete fields
Boost important fields in multi_match (title > description)
Use filter context for non-scoring queries (cacheable, faster)
ILM policy for hot/warm/cold/delete lifecycle
Alias-based indexing for zero-downtime reindexing
Bulk API for batch writes (never single-document in loops)

Anti-Patterns

Dynamic mapping in production: unexpected field types cause search failures
Searching keyword fields with full-text queries (no tokenization)
Refreshing after every write: kills indexing throughput
Single huge index instead of time-based indices with ILM
Deep pagination with from/size beyond 10,000 (use search_after)
Storing data only in ES without a source-of-truth database