Merge pull request #13 from Ja7ad/feat/consistent_hashing

ja7ad · web-flow · commit 379305913c63 · 2025-01-22T12:10:17.000+03:30
feat: add consistent hashing algorithm
diff --git a/README.md b/README.md
@@ -24,6 +24,7 @@ been validated in terms of functionality and testing.
 | [Reservoir Sampling Algorithm L](./rs/README.md) | Optimized reservoir sampling for large `N`, reduces unnecessary replacements using skipping. |
 | [Weighted Reservoir Sampling](./rs/README.md)    | Selects items with probability proportional to their weights using a heap-based approach. Used in recommendation systems and A/B testing. |
 | [Random Sort Reservoir Sampling](./rs/README.md) | Uses a min-heap and random priorities to maintain the top `k` elements in a streaming dataset. |
+| [Consistent Hashing](./ch/README.md)             | Used by distributed systems (CDNs, databases) to evenly distribute requests across servers. |
 
 ## 🚀 Installation >= go 1.19
 
diff --git a/ch/README.md b/ch/README.md
@@ -0,0 +1,84 @@
+# Consistent Hashing
+
+In computer science, consistent hashing is a special kind of hashing technique such that when a hash table is resized, only 
+$\displaystyle n/m$ keys need to be remapped on average where $\displaystyle n$ is the number of keys and
+$\displaystyle m$ is the number of slots. In contrast, in most traditional hash tables, a change in the number of array 
+slots causes nearly all keys to be remapped because the mapping between the keys and the slots is defined by a modular operation.
+
+## Project used algorithm
+
+- Couchbase automated data partitioning
+- OpenStack's Object Storage Service Swift
+- Partitioning component of Amazon's storage system Dynamo
+- Data partitioning in Apache Cassandra
+- Data partitioning in ScyllaDB
+- Data partitioning in Voldemort
+- Akka's consistent hashing router
+- Riak, a distributed key-value database
+- Gluster, a network-attached storage file system
+- Akamai content delivery network
+- Discord chat application
+- Load balancing gRPC requests to a distributed cache in SpiceDB
+- Chord algorithm
+MinIO object storage system
+
+## 📊 **Mathematical Formula for Consistent Hashing**
+
+### **Problem Definition**
+Given a set of `N` nodes and `K` keys, we need to distribute the keys among the nodes **such that minimal data movement is required** when nodes are added or removed.
+
+### **Hash Ring Representation**
+1. We define a **circular space** from `0` to `M-1`, where `M = 2^m` for an `m`-bit hash function.
+2. Each **node** `n_i` is hashed using function `H(n_i)`, assigning it a position on the ring: $P(n_i) = H(n_i) \mod M$
+3. Each **key** `k_j` is hashed to the ring using the same function: $P(k_j) = H(k_j) \mod M$
+4. A **key is assigned to the first node encountered in the clockwise direction** from its position.
+
+### **Mathematical Proof of Load Balancing**
+The expected number of keys per node is given by: $E[\text{keys per node}] = \frac{K}{N}$
+where:
+- `K` is the total number of keys.
+- `N` is the total number of nodes.
+
+If a node **joins**, it takes responsibility for keys previously mapped to the **next node**, meaning only: $\frac{K}{N+1}$
+keys are affected, significantly reducing data movement compared to traditional hashing (`O(K)` movement).
+If a node **leaves**, its keys are reassigned to the **next available node**, again affecting only: $\frac{K}{N-1}$
+keys instead of `O(K)`.
+
+### **Time Complexity**
+| Operation         | Complexity |
+|-------------------|------------|
+| **Node Addition** | `O(K/N + log N)` |
+| **Node Removal**  | `O(K/N + log N)` |
+| **Key Lookup**    | `O(log N)` (Binary Search) |
+| **Add a key**     | `O(log N)`|
+| **Remove a key**    | `O(log N)` |
+
+
+
+## 🧪 **Mathematical Test Case for Consistent Hashing**
+### **Test Case Design**
+To validate **Consistent Hashing**, we check:
+1. **Keys are evenly distributed** across nodes (`K/N` per node).
+2. **Minimal keys move on node addition/removal** (`K/N+1` or `K/N-1`).
+3. **Lookups are efficient (`O(log N)`)** using binary search.
+
+### **Example**
+#### **Initial Nodes (`N = 3`)**
+| Node | Hash Value (Position on Ring) |
+|------|-----------------------------|
+| `A`  | `H(A) = 15` |
+| `B`  | `H(B) = 45` |
+| `C`  | `H(C) = 90` |
+
+#### **Keys (`K = 6`)**
+| Key  | Hash Value | Assigned Node |
+|------|-----------|--------------|
+| `k1` | `H(k1) = 10` | `A` |
+| `k2` | `H(k2) = 30` | `B` |
+| `k3` | `H(k3) = 55` | `C` |
+| `k4` | `H(k4) = 70` | `C` |
+| `k5` | `H(k5) = 85` | `C` |
+| `k6` | `H(k6) = 95` | `A` |
+
+#### **After Adding `Node D (H(D) = 60)`**
+Only **`k3` and `k4`** move to `D`, while other keys remain unaffected.
diff --git a/ch/ch.go b/ch/ch.go
@@ -0,0 +1,113 @@
+package ch
+
+import (
+	"hash/crc32"
+	"sort"
+	"strconv"
+	"sync"
+)
+
+// Hash function type
+type Hash func(data []byte) uint32
+
+// Map represents the consistent hash ring with generics
+type Map[T any] struct {
+	mu       sync.RWMutex
+	hash     Hash
+	replicas int
+	keys     []int          // Sorted virtual node positions
+	hashMap  map[int]string // Virtual node hash -> Real node
+	data     map[string]T
+}
+
+// New creates a new Consistent Hashing instance
+func New[T any](replicas int, fn Hash) *Map[T] {
+	m := &Map[T]{
+		replicas: replicas,
+		hash:     fn,
+		hashMap:  make(map[int]string),
+		data:     make(map[string]T),
+	}
+	if m.hash == nil {
+		m.hash = crc32.ChecksumIEEE
+	}
+	return m
+}
+
+// AddNode adds a node to the hash ring
+func (m *Map[T]) AddNode(node string) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	for i := 0; i < m.replicas; i++ {
+		hash := int(m.hash([]byte(strconv.Itoa(i) + node)))
+		m.keys = append(m.keys, hash)
+		m.hashMap[hash] = node
+	}
+
+	sort.Ints(m.keys)
+}
+
+// RemoveNode removes a node from the hash ring
+func (m *Map[T]) RemoveNode(node string) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	var newKeys []int
+	for _, hash := range m.keys {
+		if m.hashMap[hash] != node {
+			newKeys = append(newKeys, hash)
+		} else {
+			delete(m.hashMap, hash)
+		}
+	}
+	m.keys = newKeys
+}
+
+// GetNode returns the closest node for the provided key
+func (m *Map[T]) GetNode(key string) string {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	if len(m.keys) == 0 {
+		return ""
+	}
+
+	hash := int(m.hash([]byte(key)))
+	idx := sort.Search(len(m.keys), func(i int) bool {
+		return m.keys[i] >= hash
+	})
+	if idx == len(m.keys) {
+		idx = 0
+	}
+	return m.hashMap[m.keys[idx]]
+}
+
+// AddKey stores a key-value pair in the correct node
+func (m *Map[T]) AddKey(key string, value T) {
+	node := m.GetNode(key)
+
+	// If no node found, no need to store the value
+	if node == "" {
+		return
+	}
+
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.data[key] = value
+}
+
+// RemoveKey deletes a key from the system
+func (m *Map[T]) RemoveKey(key string) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	delete(m.data, key)
+}
+
+// GetKey retrieves a value stored in the system
+func (m *Map[T]) GetKey(key string) (T, bool) {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	value, exists := m.data[key]
+	return value, exists
+}
diff --git a/ch/ch_example_test.go b/ch/ch_example_test.go
@@ -0,0 +1,20 @@
+package ch
+
+import "fmt"
+
+func ExampleNew() {
+	type UserData struct {
+		Name  string
+		Email string
+	}
+
+	chStruct := New[UserData](3, nil)
+	chStruct.AddNode("NodeA")
+	chStruct.AddNode("NodeB")
+
+	chStruct.AddKey("user123", UserData{Name: "Alice", Email: "alice@example.com"})
+	user, exists := chStruct.GetKey("user123")
+	if exists {
+		fmt.Println("User Data:", user.Name, user.Email)
+	}
+}
diff --git a/ch/ch_test.go b/ch/ch_test.go
@@ -0,0 +1,141 @@
+package ch
+
+import (
+	"strconv"
+	"testing"
+)
+
+func TestConsistentHashing_NodeAddition(t *testing.T) {
+	ch := New[string](3, nil)
+	ch.AddNode("NodeA")
+	ch.AddNode("NodeB")
+	ch.AddNode("NodeC")
+
+	key := "my-key"
+	node := ch.GetNode(key)
+
+	if node == "" {
+		t.Errorf("Expected a valid node, but got an empty string")
+	}
+
+	sameNode := ch.GetNode(key)
+	if node != sameNode {
+		t.Errorf("Expected consistent mapping, but got different results")
+	}
+}
+
+func TestConsistentHashing_AddGetKey(t *testing.T) {
+	ch := New[int](3, nil)
+	ch.AddNode("NodeA")
+	ch.AddNode("NodeB")
+
+	ch.AddKey("user123", 99)
+	ch.AddKey("user456", 42)
+
+	value, exists := ch.GetKey("user123")
+	if !exists || value != 99 {
+		t.Errorf("Expected 99, but got %d", value)
+	}
+
+	value, exists = ch.GetKey("user456")
+	if !exists || value != 42 {
+		t.Errorf("Expected 42, but got %d", value)
+	}
+}
+
+func TestConsistentHashing_RemoveKey(t *testing.T) {
+	ch := New[string](3, nil)
+	ch.AddNode("NodeA")
+	ch.AddKey("user123", "Data1")
+
+	ch.RemoveKey("user123")
+
+	_, exists := ch.GetKey("user123")
+	if exists {
+		t.Errorf("Expected key to be removed, but it still exists")
+	}
+}
+
+type TestStruct struct {
+	Name  string
+	Score int
+}
+
+func TestConsistentHashing_WithStruct(t *testing.T) {
+	ch := New[TestStruct](3, nil)
+	ch.AddNode("NodeA")
+	ch.AddNode("NodeB")
+
+	data := TestStruct{Name: "Alice", Score: 100}
+	ch.AddKey("user123", data)
+
+	retrieved, exists := ch.GetKey("user123")
+	if !exists || retrieved.Name != "Alice" || retrieved.Score != 100 {
+		t.Errorf("Expected Alice with score 100, but got %+v", retrieved)
+	}
+}
+
+func BenchmarkConsistentHashing_AddNode(b *testing.B) {
+	ch := New[string](100, nil)
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		ch.AddNode("Node" + strconv.Itoa(i))
+	}
+}
+
+func BenchmarkConsistentHashing_RemoveNode(b *testing.B) {
+	ch := New[string](100, nil)
+	for i := 0; i < 1000; i++ {
+		ch.AddNode("Node" + strconv.Itoa(i))
+	}
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		ch.RemoveNode("Node" + strconv.Itoa(i%1000))
+	}
+}
+
+func BenchmarkConsistentHashing_GetNode(b *testing.B) {
+	ch := New[string](100, nil)
+	for i := 0; i < 1000; i++ {
+		ch.AddNode("Node" + strconv.Itoa(i))
+	}
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = ch.GetNode("key" + strconv.Itoa(i))
+	}
+}
+
+func BenchmarkConsistentHashing_AddKey(b *testing.B) {
+	ch := New[string](100, nil)
+	for i := 0; i < 1000; i++ {
+		ch.AddNode("Node" + strconv.Itoa(i))
+	}
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		ch.AddKey("key"+strconv.Itoa(i), "value"+strconv.Itoa(i))
+	}
+}
+
+func BenchmarkConsistentHashing_RemoveKey(b *testing.B) {
+	ch := New[string](100, nil)
+	for i := 0; i < 1000; i++ {
+		ch.AddNode("Node" + strconv.Itoa(i))
+	}
+	for i := 0; i < 10000; i++ {
+		ch.AddKey("key"+strconv.Itoa(i), "value"+strconv.Itoa(i))
+	}
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		ch.RemoveKey("key" + strconv.Itoa(i%10000))
+	}
+}