Skip to content

Commit f5a0eb8

Browse files
author
Peter Alfonsi
committed
Simplified RBM size estimator
Signed-off-by: Peter Alfonsi <[email protected]>
1 parent 35a22c2 commit f5a0eb8

File tree

5 files changed

+63
-185
lines changed

5 files changed

+63
-185
lines changed

server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ public interface KeyLookupStore<T> {
103103

104104
/**
105105
* Returns an estimate of the store's memory usage.
106-
* @return The memory usage, in MB
106+
* @return The memory usage
107107
*/
108108
long getMemorySizeInBytes();
109109

server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyStoreStats.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,15 @@ public class KeyStoreStats {
2222
protected CounterMetric numAddAttempts;
2323
protected CounterMetric numCollisions;
2424
protected boolean guaranteesNoFalseNegatives;
25-
protected final int maxNumEntries;
2625
protected AtomicBoolean atCapacity;
2726
protected CounterMetric numRemovalAttempts;
2827
protected CounterMetric numSuccessfulRemovals;
2928

30-
protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) {
29+
protected KeyStoreStats(long memSizeCapInBytes) {
3130
this.size = new CounterMetric();
3231
this.numAddAttempts = new CounterMetric();
3332
this.numCollisions = new CounterMetric();
3433
this.memSizeCapInBytes = memSizeCapInBytes;
35-
this.maxNumEntries = maxNumEntries;
3634
this.atCapacity = new AtomicBoolean(false);
3735
this.numRemovalAttempts = new CounterMetric();
3836
this.numSuccessfulRemovals = new CounterMetric();

server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434

3535
import org.opensearch.common.metrics.CounterMetric;
3636

37-
import java.util.ArrayList;
3837
import java.util.HashMap;
3938
import java.util.HashSet;
4039
import java.util.concurrent.locks.Lock;
@@ -77,10 +76,14 @@ public int getValue() {
7776
protected RoaringBitmap rbm;
7877
private HashMap<Integer, CounterMetric> collidedIntCounters;
7978
private HashMap<Integer, HashSet<Integer>> removalSets;
80-
protected RBMSizeEstimator sizeEstimator;
8179
protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
8280
protected final Lock readLock = lock.readLock();
8381
protected final Lock writeLock = lock.writeLock();
82+
private long mostRecentByteEstimate;
83+
private final int REFRESH_SIZE_EST_INTERVAL = 10000;
84+
// Refresh size estimate every X new elements. Refreshes use the RBM's internal size estimator, which takes ~0.01 ms,
85+
// so we don't want to do it on every get(), and it doesn't matter much if there are +- 10000 keys in this store
86+
// in terms of storage impact
8487

8588
// Default constructor sets modulo = 2^28
8689
public RBMIntKeyLookupStore(long memSizeCapInBytes) {
@@ -89,18 +92,11 @@ public RBMIntKeyLookupStore(long memSizeCapInBytes) {
8992

9093
public RBMIntKeyLookupStore(KeystoreModuloValue moduloValue, long memSizeCapInBytes) {
9194
this.modulo = moduloValue.getValue();
92-
sizeEstimator = new RBMSizeEstimator(modulo);
93-
this.stats = new KeyStoreStats(memSizeCapInBytes, calculateMaxNumEntries(memSizeCapInBytes));
95+
this.stats = new KeyStoreStats(memSizeCapInBytes);
9496
this.rbm = new RoaringBitmap();
9597
this.collidedIntCounters = new HashMap<>();
9698
this.removalSets = new HashMap<>();
97-
}
98-
99-
protected int calculateMaxNumEntries(long memSizeCapInBytes) {
100-
if (memSizeCapInBytes == 0) {
101-
return Integer.MAX_VALUE;
102-
}
103-
return sizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes);
99+
this.mostRecentByteEstimate = 0L;
104100
}
105101

106102
private final int transform(int value) {
@@ -125,7 +121,11 @@ public boolean add(Integer value) {
125121
return false;
126122
}
127123
stats.numAddAttempts.inc();
128-
if (stats.size.count() == stats.maxNumEntries) {
124+
125+
if (getSize() % REFRESH_SIZE_EST_INTERVAL == 0) {
126+
mostRecentByteEstimate = getMemorySizeInBytes();
127+
}
128+
if (getMemorySizeCapInBytes() > 0 && mostRecentByteEstimate > getMemorySizeCapInBytes()) {
129129
stats.atCapacity.set(true);
130130
return false;
131131
}
@@ -273,9 +273,24 @@ public boolean isCollision(Integer value1, Integer value2) {
273273
return transform(value1) == transform(value2);
274274
}
275275

276+
static double getRBMSizeMultiplier(int numEntries, int modulo) {
277+
double x = Math.log10((double) numEntries / modulo);
278+
if (x < -5) {
279+
return 7.0;
280+
}
281+
if (x < -2.75) {
282+
return -2.5 * x - 5.5;
283+
}
284+
if (x <= 0) {
285+
return -3.0 / 22.0 * x + 1;
286+
}
287+
return 1;
288+
}
289+
276290
@Override
277291
public long getMemorySizeInBytes() {
278-
return sizeEstimator.getSizeInBytes((int) stats.size.count()); // + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size());
292+
double multiplier = getRBMSizeMultiplier((int) stats.size.count(), modulo);
293+
return (long) (rbm.getSizeInBytes() * multiplier);
279294
}
280295

281296
@Override

server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMSizeEstimator.java

Lines changed: 0 additions & 137 deletions
This file was deleted.

server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,9 @@
3232
package org.opensearch.common.cache.tier.keystore;
3333

3434
import org.opensearch.common.Randomness;
35-
import org.opensearch.common.cache.tier.keystore.RBMIntKeyLookupStore;
36-
import org.opensearch.common.cache.tier.keystore.RBMSizeEstimator;
3735
import org.opensearch.common.metrics.CounterMetric;
3836
import org.opensearch.test.OpenSearchTestCase;
37+
import org.roaringbitmap.RoaringBitmap;
3938

4039
import java.util.ArrayList;
4140
import java.util.HashSet;
@@ -45,8 +44,10 @@
4544
import java.util.concurrent.ThreadPoolExecutor;
4645

4746
public class RBMIntKeyLookupStoreTests extends OpenSearchTestCase {
47+
48+
final int BYTES_IN_MB = 1048576;
4849
public void testInit() {
49-
long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB;
50+
long memCap = 100 * BYTES_IN_MB;
5051
RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(memCap);
5152
assertEquals(0, kls.getSize());
5253
assertEquals(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_EIGHT.getValue(), kls.modulo);
@@ -129,16 +130,35 @@ public void testAddingDuplicates() throws Exception {
129130
}
130131

131132
public void testMemoryCapBlocksAdd() throws Exception {
133+
// Now that we're using a modified version of rbm.getSizeInBytes(), which doesn't provide an inverse function,
134+
// we have to test filling just an RBM with random test values first so that we can get the resulting memory cap limit
135+
// to use with our modified size estimate.
136+
// This is much noisier so the precision is lower.
137+
138+
// It is necessary to use randomly distributed integers for both parts of this test, as we would do with hashes in the cache,
139+
// as that's what our size estimator is designed for.
140+
// If we add a run of integers, our size estimator is not valid, especially for small RBMs.
141+
142+
int[] maxEntriesArr = new int[] { 1342000, 100000, 3000000};
143+
long[] rbmReportedSizes = new long[4];
144+
Random rand = Randomness.get();
145+
for (int j = 0; j < maxEntriesArr.length; j++) {
146+
RoaringBitmap rbm = new RoaringBitmap();
147+
for (int i = 0; i < maxEntriesArr[j]; i++) {
148+
rbm.add(rand.nextInt());
149+
}
150+
rbmReportedSizes[j] = rbm.getSizeInBytes();
151+
}
132152
RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE;
133-
for (int maxEntries : new int[] { 2342000, 1000, 100000 }) {
134-
long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytesWithModuloValue(maxEntries, moduloValue);
153+
for (int i = 0; i < maxEntriesArr.length; i++) {
154+
double multiplier = RBMIntKeyLookupStore.getRBMSizeMultiplier(maxEntriesArr[i], moduloValue.getValue());
155+
long memSizeCapInBytes = (long) (rbmReportedSizes[i] * multiplier);
156+
//long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytesWithModuloValue(maxEntries, moduloValue);
135157
RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, memSizeCapInBytes);
136-
for (int j = 0; j < maxEntries + 1000; j++) {
137-
kls.add(j);
158+
for (int j = 0; j < maxEntriesArr[i] + 5000; j++) {
159+
kls.add(rand.nextInt());
138160
}
139-
assertTrue(Math.abs(maxEntries - kls.getSize()) < (double) maxEntries / 25);
140-
// exact cap varies a small amount bc of floating point, especially when we use bytes instead of MB for calculations
141-
// precision gets much worse when we compose the two functions, as we do here, but this wouldn't happen in an actual use case
161+
assertTrue(Math.abs(maxEntriesArr[i] - kls.getSize()) < (double) maxEntriesArr[i] / 10);
142162
}
143163
}
144164

@@ -205,7 +225,7 @@ public void testConcurrency() throws Exception {
205225
}
206226

207227
public void testRemoveNoCollisions() throws Exception {
208-
long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB;
228+
long memCap = 100L * BYTES_IN_MB;
209229
int numToAdd = 195000;
210230
RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.NONE, memCap);
211231
// there should be no collisions for sequential positive numbers up to modulo
@@ -222,7 +242,7 @@ public void testRemoveNoCollisions() throws Exception {
222242

223243
public void testRemoveWithCollisions() throws Exception {
224244
int modulo = (int) Math.pow(2, 26);
225-
long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB;
245+
long memCap = 100L * BYTES_IN_MB;
226246
RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX, memCap);
227247
for (int i = 0; i < 10; i++) {
228248
kls.add(i);
@@ -278,24 +298,6 @@ public void testNullInputs() throws Exception {
278298
assertEquals(4, kls.getSize());
279299
}
280300

281-
public void testMemoryCapValueInitialization() {
282-
RBMIntKeyLookupStore.KeystoreModuloValue[] mods = RBMIntKeyLookupStore.KeystoreModuloValue.values();
283-
double[] expectedMultipliers = new double[] { 1.2, 1.2, 1, 1, 1 };
284-
double[] expectedSlopes = new double[] { 0.637, 0.637, 0.619, 0.614, 0.629 };
285-
double[] expectedIntercepts = new double[] { 3.091, 3.091, 2.993, 2.905, 2.603 };
286-
long memSizeCapInBytes = (long) 100.0 * RBMSizeEstimator.BYTES_IN_MB;
287-
double delta = 0.01;
288-
for (int i = 0; i < mods.length; i++) {
289-
RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = mods[i];
290-
RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, memSizeCapInBytes);
291-
assertEquals(kls.stats.memSizeCapInBytes, kls.getMemorySizeCapInBytes(), 1.0);
292-
assertEquals(expectedMultipliers[i], kls.sizeEstimator.bufferMultiplier, delta);
293-
assertEquals(expectedSlopes[i], kls.sizeEstimator.slope, delta);
294-
assertEquals(expectedIntercepts[i], kls.sizeEstimator.intercept, delta);
295-
}
296-
297-
}
298-
299301
public void testRemovalLogic() throws Exception {
300302
RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX;
301303
int modulo = moduloValue.getValue();
@@ -395,6 +397,6 @@ public void testRemovalLogicWithHashCollision() throws Exception {
395397
assertTrue(removalSet.contains(k1));
396398
assertEquals(1, numCollisions.count());
397399
assertTrue(kls.contains(k1));
398-
assertTrue(kls.contains(k2));
400+
assertTrue(kls.contains(k2));
399401
}
400402
}

0 commit comments

Comments
 (0)