Vibecosystem vector-db-patterns
Embedding strategies, ANN algorithms, hybrid search, RAG chunking strategies, and reranking for semantic search and retrieval.
install
source · Clone the upstream repo
git clone https://github.com/vibeeval/vibecosystem
manifest:
skills/vector-db-patterns/skill.mdsource content
Vector DB Patterns
Semantic search and retrieval-augmented generation (RAG) patterns with vector databases.
Embedding Strategies
import { OpenAI } from 'openai' const openai = new OpenAI() // Batch embedding for efficiency (max 2048 inputs per request for text-embedding-3-small) async function embedTexts(texts: string[]): Promise<number[][]> { const BATCH_SIZE = 2048 const allEmbeddings: number[][] = [] for (let i = 0; i < texts.length; i += BATCH_SIZE) { const batch = texts.slice(i, i + BATCH_SIZE) const response = await openai.embeddings.create({ model: 'text-embedding-3-small', // 1536 dimensions, good cost/quality input: batch, dimensions: 512, // Reduce dims for speed (Matryoshka) }) allEmbeddings.push(...response.data.map(d => d.embedding)) } return allEmbeddings } // Embed with prefix for asymmetric retrieval async function embedForSearch(query: string): Promise<number[]> { const [embedding] = await embedTexts([`search_query: ${query}`]) return embedding } async function embedForStorage(document: string): Promise<number[]> { const [embedding] = await embedTexts([`search_document: ${document}`]) return embedding }
Chunking Strategies for RAG
interface Chunk { id: string text: string metadata: { sourceId: string chunkIndex: number startChar: number endChar: number } } // Recursive character splitting with overlap function chunkText( text: string, chunkSize: number = 512, overlap: number = 50 ): Chunk[] { const separators = ['\n\n', '\n', '. ', ' '] return recursiveSplit(text, separators, chunkSize, overlap) } function recursiveSplit( text: string, separators: string[], chunkSize: number, overlap: number ): Chunk[] { if (text.length <= chunkSize) { return [{ id: crypto.randomUUID(), text, metadata: {} as any }] } const separator = separators.find(s => text.includes(s)) ?? '' const parts = text.split(separator) const chunks: Chunk[] = [] let current = '' for (const part of parts) { const candidate = current ? current + separator + part : part if (candidate.length > chunkSize && current) { chunks.push({ id: crypto.randomUUID(), text: current.trim(), metadata: {} as any }) // Overlap: keep last N chars of previous chunk const overlapText = current.slice(-overlap) current = overlapText + separator + part } else { current = candidate } } if (current.trim()) { chunks.push({ id: crypto.randomUUID(), text: current.trim(), metadata: {} as any }) } return chunks } // Semantic chunking: split at topic boundaries using embeddings async function semanticChunk(text: string, threshold: number = 0.3): Promise<Chunk[]> { const sentences = text.match(/[^.!?]+[.!?]+/g) ?? [text] const embeddings = await embedTexts(sentences) const chunks: string[][] = [[sentences[0]]] for (let i = 1; i < sentences.length; i++) { const similarity = cosineSimilarity(embeddings[i - 1], embeddings[i]) if (similarity < threshold) { // Low similarity = topic boundary = new chunk chunks.push([sentences[i]]) } else { chunks[chunks.length - 1].push(sentences[i]) } } return chunks.map((sentences, i) => ({ id: crypto.randomUUID(), text: sentences.join(' ').trim(), metadata: { sourceId: '', chunkIndex: i, startChar: 0, endChar: 0 } })) }
Vector Search with Metadata Filtering
// Using Pinecone import { Pinecone } from '@pinecone-database/pinecone' const pinecone = new Pinecone() const index = pinecone.index('documents') // Upsert with metadata async function indexDocument(doc: Document, chunks: Chunk[]): Promise<void> { const embeddings = await embedTexts(chunks.map(c => c.text)) const vectors = chunks.map((chunk, i) => ({ id: chunk.id, values: embeddings[i], metadata: { text: chunk.text, sourceId: doc.id, sourceTitle: doc.title, category: doc.category, createdAt: doc.createdAt.toISOString(), chunkIndex: i, } })) // Upsert in batches of 100 for (let i = 0; i < vectors.length; i += 100) { await index.upsert(vectors.slice(i, i + 100)) } } // Query with metadata filter async function searchDocuments( query: string, filters?: { category?: string; after?: Date }, topK: number = 10 ): Promise<SearchResult[]> { const queryEmbedding = await embedForSearch(query) const filter: Record<string, any> = {} if (filters?.category) { filter.category = { $eq: filters.category } } if (filters?.after) { filter.createdAt = { $gte: filters.after.toISOString() } } const results = await index.query({ vector: queryEmbedding, topK, includeMetadata: true, filter: Object.keys(filter).length > 0 ? filter : undefined, }) return results.matches.map(m => ({ id: m.id, score: m.score ?? 0, text: m.metadata?.text as string, sourceId: m.metadata?.sourceId as string, sourceTitle: m.metadata?.sourceTitle as string, })) }
Hybrid Search (Vector + Keyword)
// Combine vector similarity with BM25 keyword matching async function hybridSearch( query: string, topK: number = 10, alpha: number = 0.7 // 0.7 = 70% semantic, 30% keyword ): Promise<SearchResult[]> { // Run both searches in parallel const [vectorResults, keywordResults] = await Promise.all([ vectorSearch(query, topK * 2), keywordSearch(query, topK * 2), // BM25 via Elasticsearch ]) // Reciprocal Rank Fusion (RRF) const k = 60 // RRF constant const scores = new Map<string, number>() vectorResults.forEach((r, rank) => { const current = scores.get(r.id) ?? 0 scores.set(r.id, current + alpha * (1 / (k + rank + 1))) }) keywordResults.forEach((r, rank) => { const current = scores.get(r.id) ?? 0 scores.set(r.id, current + (1 - alpha) * (1 / (k + rank + 1))) }) // Sort by combined score, return top K const allResults = [...vectorResults, ...keywordResults] const uniqueResults = new Map(allResults.map(r => [r.id, r])) return [...scores.entries()] .sort((a, b) => b[1] - a[1]) .slice(0, topK) .map(([id, score]) => ({ ...uniqueResults.get(id)!, score, })) }
Reranking
// Cross-encoder reranking: slower but much more accurate than bi-encoder async function rerankResults( query: string, results: SearchResult[], topK: number = 5 ): Promise<SearchResult[]> { // Use Cohere Rerank or cross-encoder model const response = await fetch('https://api.cohere.ai/v1/rerank', { method: 'POST', headers: { Authorization: `Bearer ${process.env.COHERE_API_KEY}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ model: 'rerank-english-v3.0', query, documents: results.map(r => r.text), top_n: topK, return_documents: false, }), }) const data = await response.json() return data.results.map((r: any) => ({ ...results[r.index], score: r.relevance_score, })) } // RAG pipeline: retrieve → rerank → generate async function ragQuery(query: string): Promise<string> { // Step 1: Retrieve candidates (broad, fast) const candidates = await hybridSearch(query, 20) // Step 2: Rerank (narrow, accurate) const reranked = await rerankResults(query, candidates, 5) // Step 3: Generate answer with context const context = reranked.map(r => r.text).join('\n\n') const response = await openai.chat.completions.create({ model: 'gpt-4o', messages: [ { role: 'system', content: `Answer based on the context below.\n\nContext:\n${context}` }, { role: 'user', content: query }, ], }) return response.choices[0].message.content! }
Checklist
- Chunk size 256-1024 tokens with 10-20% overlap
- Asymmetric embedding prefixes for query vs document
- Metadata stored alongside vectors for pre-filtering
- Hybrid search (vector + BM25) for best recall
- Reranking top-N candidates with cross-encoder
- Batch embedding calls (never one-by-one)
- Dimension reduction (Matryoshka) for cost/speed optimization
- Evaluation: hit rate, MRR, NDCG on test queries
Anti-Patterns
- Embedding entire documents as single vectors (context lost, poor retrieval)
- Fixed-size chunking ignoring sentence/paragraph boundaries
- Only vector search without keyword fallback (misses exact matches)
- Embedding queries and documents identically (asymmetric retrieval needs prefixes)
- Not evaluating retrieval quality (building blind)
- Storing embeddings without source text (can't debug or rerank)