Skip to content
This repository was archived by the owner on Jul 7, 2020. It is now read-only.

Commit 94af55c

Browse files
author
abramsm
committed
fixes #7
There were two problems. 1) Math.pow(2,32) == Math.pow(-2,32) in java, not good... 2) The long range correction algorithm reduces precision in some cases so I've given the caller the option of disabling this function
1 parent c49e035 commit 94af55c

File tree

2 files changed

+37
-3
lines changed

2 files changed

+37
-3
lines changed

src/main/java/com/clearspring/analytics/stream/cardinality/HyperLogLog.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@
5454
*/
5555
public class HyperLogLog implements ICardinality
5656
{
57-
private final static int POW_2_32 = (int) Math.pow(2, 32);
58-
private final static int NEGATIVE_POW_2_32 = (int) Math.pow(-2, 32);
57+
private final static double POW_2_32 = Math.pow(2, 32);
58+
private final static double NEGATIVE_POW_2_32 = - -4294967296.0;
5959

6060
private final RegisterSet registerSet;
6161
private final int log2m;
@@ -180,6 +180,11 @@ public boolean offer(Object o)
180180

181181
@Override
182182
public long cardinality()
183+
{
184+
return cardinality(true);
185+
}
186+
187+
public long cardinality(boolean enableLongRangeCorrection)
183188
{
184189
double registerSum = 0;
185190
int count = registerSet.count;
@@ -211,7 +216,14 @@ else if (estimate <= (1.0 / 30.0) * POW_2_32)
211216
else if (estimate > (1.0 / 30.0) * POW_2_32)
212217
{
213218
// Large Range Estimate
214-
return Math.round((NEGATIVE_POW_2_32 * Math.log(1 - (estimate / POW_2_32))));
219+
if (enableLongRangeCorrection)
220+
{
221+
return Math.round((NEGATIVE_POW_2_32 * Math.log(1.0 - (estimate / POW_2_32))));
222+
}
223+
else
224+
{
225+
return Math.round(estimate);
226+
}
215227
}
216228
return 0;
217229
}

src/test/java/com/clearspring/analytics/stream/cardinality/TestHyperLogLog.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,4 +122,26 @@ public void testMerge() throws CardinalityMergeException
122122
assertTrue(mergedEstimate >= expectedCardinality - (3 * se));
123123
assertTrue(mergedEstimate <= expectedCardinality + (3 * se));
124124
}
125+
126+
@Test
127+
public void testPrecise_disableLongRangeCorrection() throws CardinalityMergeException
128+
{
129+
int cardinality = 150000000;
130+
131+
HyperLogLog baseline = new HyperLogLog(20);
132+
for (int j = 0; j < cardinality; j++)
133+
{
134+
double val = Math.random();
135+
baseline.offer(val);
136+
}
137+
138+
139+
long mergedEstimate = baseline.cardinality(false);
140+
double se = cardinality * (1.04 / Math.sqrt(Math.pow(2, 20)));
141+
142+
System.out.println("Expect estimate: " + mergedEstimate + " is between " + (cardinality - (3 * se)) + " and " + (cardinality + (3 * se)));
143+
144+
assertTrue(mergedEstimate >= cardinality - (3 * se));
145+
assertTrue(mergedEstimate <= cardinality + (3 * se));
146+
}
125147
}

0 commit comments

Comments
 (0)