Claude-skill-registry hashing-techniques
Hash-based data structures and techniques including frequency counting, duplicate detection, and LRU cache implementation.
install
source · Clone the upstream repo
git clone https://github.com/majiayu000/claude-skill-registry
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/majiayu000/claude-skill-registry "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/data/hashing-techniques" ~/.claude/skills/majiayu000-claude-skill-registry-hashing-techniques && rm -rf "$T"
manifest:
skills/data/hashing-techniques/SKILL.mdsource content
Hashing Techniques Skill
Atomic Responsibility: Execute hash-based lookups and data organization.
Frequency Counting Pattern
from typing import List from collections import Counter import heapq def top_k_frequent(nums: List[int], k: int) -> List[int]: """ Find k most frequent elements. Time: O(n log k), Space: O(n) """ count = Counter(nums) return heapq.nlargest(k, count.keys(), key=count.get) def top_k_frequent_bucket(nums: List[int], k: int) -> List[int]: """ Bucket sort approach for O(n) time. Time: O(n), Space: O(n) """ count = Counter(nums) buckets = [[] for _ in range(len(nums) + 1)] for num, freq in count.items(): buckets[freq].append(num) result = [] for i in range(len(buckets) - 1, -1, -1): for num in buckets[i]: result.append(num) if len(result) == k: return result return result
Duplicate Detection
def contains_duplicate(nums: List[int]) -> bool: """O(n) time, O(n) space using set.""" return len(nums) != len(set(nums)) def find_duplicates(nums: List[int]) -> List[int]: """Find all duplicate elements.""" seen = set() duplicates = [] for num in nums: if num in seen: duplicates.append(num) else: seen.add(num) return duplicates def find_duplicates_inplace(nums: List[int]) -> List[int]: """ Find duplicates with O(1) space using index marking. Requires: values in range [1, n] """ result = [] for num in nums: index = abs(num) - 1 if nums[index] < 0: result.append(abs(num)) else: nums[index] = -nums[index] return result
LRU Cache Implementation
from collections import OrderedDict class LRUCache: """ Least Recently Used cache with O(1) operations. Time: O(1) for get and put Space: O(capacity) """ def __init__(self, capacity: int): if capacity <= 0: raise ValueError("Capacity must be positive") self.cache = OrderedDict() self.capacity = capacity def get(self, key: int) -> int: """Get value and mark as recently used.""" if key not in self.cache: return -1 self.cache.move_to_end(key) return self.cache[key] def put(self, key: int, value: int) -> None: """Add/update value and maintain capacity.""" if key in self.cache: self.cache.move_to_end(key) self.cache[key] = value if len(self.cache) > self.capacity: self.cache.popitem(last=False)
Group Anagrams
from collections import defaultdict def group_anagrams(strs: List[str]) -> List[List[str]]: """ Group strings that are anagrams of each other. Time: O(n * k log k) where k = max string length Space: O(n * k) """ groups = defaultdict(list) for s in strs: key = tuple(sorted(s)) groups[key].append(s) return list(groups.values()) def group_anagrams_count(strs: List[str]) -> List[List[str]]: """ Alternative using character count as key. Time: O(n * k), Space: O(n * k) """ groups = defaultdict(list) for s in strs: count = [0] * 26 for c in s: count[ord(c) - ord('a')] += 1 key = tuple(count) groups[key].append(s) return list(groups.values())
Unit Test Template
import pytest class TestHashingTechniques: def test_top_k_frequent(self): assert set(top_k_frequent([1, 1, 1, 2, 2, 3], 2)) == {1, 2} def test_contains_duplicate(self): assert contains_duplicate([1, 2, 3, 1]) == True assert contains_duplicate([1, 2, 3]) == False def test_lru_cache(self): cache = LRUCache(2) cache.put(1, 1) cache.put(2, 2) assert cache.get(1) == 1 cache.put(3, 3) assert cache.get(2) == -1 def test_group_anagrams(self): result = group_anagrams(["eat", "tea", "tan", "ate", "nat", "bat"]) assert len(result) == 3
Troubleshooting
| Issue | Cause | Solution |
|---|---|---|
| KeyError | Missing key access | Use with default |
| TypeError | Unhashable type | Convert list to tuple |
| Memory exceeded | Storing full objects | Store indices only |
Debug Checklist
□ Key type is hashable? □ Using get() with default? □ Handling empty input? □ Memory efficient storage?