Skip to content

Commit f3a4025

Browse files
committed
Add Hll Sketch Merge Order Test.
1 parent 0d09395 commit f3a4025

1 file changed

Lines changed: 136 additions & 0 deletions

File tree

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.datasketches.hll;
21+
22+
import static org.apache.datasketches.common.Util.pwr2SeriesNext;
23+
import static org.testng.Assert.assertTrue;
24+
25+
import org.testng.annotations.Test;
26+
27+
/**
28+
* This test demonstrates that DataSketch HLL merging is not order-dependent if you use the Composite estimator.
29+
*/
30+
public class HllSketchMergeOrderTest {
31+
32+
private static final int LgK = 11;
33+
34+
@Test
35+
public void testDataSketchHLLMergeOrderDependency() {
36+
final int ppo = 1 << 17; //= 131,072 unique Points Per Octave.
37+
38+
// Create 3 sketches with fractional powers of 2 series
39+
final HllSketch sketchA = createUniquePowerSeriesSketch(1L << 59, ppo, 1L << 60);
40+
final HllSketch sketchB = createUniquePowerSeriesSketch(1L << 60, ppo, 1L << 61);
41+
final HllSketch sketchC = createUniquePowerSeriesSketch(1L << 61, ppo, 1L << 62);
42+
43+
final double skAEst = sketchA.getCompositeEstimate();
44+
final double skBEst = sketchB.getCompositeEstimate();
45+
final double skCEst = sketchC.getCompositeEstimate();
46+
47+
final double skA_RE = (skAEst/ppo) - 1.0;
48+
final double skB_RE = (skBEst/ppo) - 1.0;
49+
final double skC_RE = (skCEst/ppo) - 1.0;
50+
51+
//Print individual composite estimates and Relative Error:
52+
println("");
53+
println("SketchA estimate: " + skAEst + ", RE%: " + (skA_RE * 100));
54+
println("SketchB estimate: " + skBEst + ", RE%: " + (skB_RE * 100));
55+
println("SketchC estimate: " + skCEst + ", RE%: " + (skC_RE * 100));
56+
57+
println("\nNOTE: Sketch Relative Error for Composite Estimator for LgK = 11 is +/- 4.6% at 95% confidence.\n");
58+
59+
// Test six different merge orders:
60+
final double estABC = mergeThreeSketches(sketchA, sketchB, sketchC);
61+
final double estACB = mergeThreeSketches(sketchA, sketchC, sketchB);
62+
final double estBAC = mergeThreeSketches(sketchB, sketchA, sketchC);
63+
final double estBCA = mergeThreeSketches(sketchB, sketchC, sketchA);
64+
final double estCAB = mergeThreeSketches(sketchC, sketchA, sketchB);
65+
final double estCBA = mergeThreeSketches(sketchC, sketchB, sketchA);
66+
67+
println("Merge order ABC: " + estABC);
68+
println("Merge order ACB: " + estACB);
69+
println("Merge order BAC: " + estBAC);
70+
println("Merge order BCA: " + estBCA);
71+
println("Merge order CAB: " + estCAB);
72+
println("Merge order CBA: " + estCBA);
73+
74+
assertTrue((estABC == estACB) && (estABC == estBAC) && (estABC == estBCA) && (estABC == estCAB) && (estABC == estCBA));
75+
}
76+
77+
/**
78+
* Generates a power series based on fractional powers of 2 where the separation between successive values is 2^(1/ppo).
79+
* @param baseValue starting value, inclusive
80+
* @param ppo number of unique points per octave
81+
* @param limit the upper limit, exclusive
82+
* @return the loaded sketch
83+
*/
84+
private HllSketch createUniquePowerSeriesSketch(final long baseValue, final int ppo, final long limit) {
85+
final HllSketch sketch = new HllSketch(LgK);
86+
int count = 0;
87+
long lastp = 0;
88+
for (long p = baseValue; p < limit; p = pwr2SeriesNext(ppo, p)) {
89+
sketch.update(p);
90+
count++;
91+
lastp = p;
92+
}
93+
println("BaseValue: " + baseValue + ", limit: " + limit + ", Count: " + count + ", lastPt: " + lastp);
94+
return sketch;
95+
}
96+
97+
/**
98+
* Merges three sketches in the specified order and returns the composite estimate
99+
*/
100+
private double mergeThreeSketches(final HllSketch s1, final HllSketch s2, final HllSketch s3) {
101+
final Union union = new Union(LgK);
102+
103+
union.update(s1);
104+
union.update(s2);
105+
union.update(s3);
106+
107+
return union.getCompositeEstimate();
108+
}
109+
110+
//@Test
111+
/**
112+
* Used for tweaking the generator algorithm
113+
*/
114+
public void checkNewGenerator() {
115+
final long baseValue = 1L << 59;
116+
final int ppo = 1 << 10;
117+
final long limit = 1L << 60;
118+
int count = 0;
119+
long lastp = 0;
120+
for (long p = baseValue; p < limit; p = pwr2SeriesNext(ppo, p)) {
121+
count++;
122+
println(count + ", " + p);
123+
lastp = p;
124+
}
125+
println("\nPPO: " + ppo);
126+
println("Count: " + count);
127+
println("baseValue: " + baseValue);
128+
println("last p: " + lastp);
129+
println("limit: " + limit);
130+
}
131+
132+
private static void println(final Object o) {
133+
System.out.println(o.toString());
134+
}
135+
136+
}

0 commit comments

Comments
 (0)