|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one |
| 3 | + * or more contributor license agreements. See the NOTICE file |
| 4 | + * distributed with this work for additional information |
| 5 | + * regarding copyright ownership. The ASF licenses this file |
| 6 | + * to you under the Apache License, Version 2.0 (the |
| 7 | + * "License"); you may not use this file except in compliance |
| 8 | + * with the License. You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, |
| 13 | + * software distributed under the License is distributed on an |
| 14 | + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | + * KIND, either express or implied. See the License for the |
| 16 | + * specific language governing permissions and limitations |
| 17 | + * under the License. |
| 18 | + */ |
| 19 | + |
| 20 | +package org.apache.datasketches.hll; |
| 21 | + |
| 22 | +import static org.apache.datasketches.common.Util.pwr2SeriesNext; |
| 23 | +import static org.testng.Assert.assertTrue; |
| 24 | + |
| 25 | +import org.testng.annotations.Test; |
| 26 | + |
| 27 | +/** |
| 28 | + * This test demonstrates that DataSketch HLL merging is not order-dependent if you use the Composite estimator. |
| 29 | + */ |
| 30 | +public class HllSketchMergeOrderTest { |
| 31 | + |
| 32 | + private static final int LgK = 11; |
| 33 | + |
| 34 | + @Test |
| 35 | + public void testDataSketchHLLMergeOrderDependency() { |
| 36 | + final int ppo = 1 << 17; //= 131,072 unique Points Per Octave. |
| 37 | + |
| 38 | + // Create 3 sketches with fractional powers of 2 series |
| 39 | + final HllSketch sketchA = createUniquePowerSeriesSketch(1L << 59, ppo, 1L << 60); |
| 40 | + final HllSketch sketchB = createUniquePowerSeriesSketch(1L << 60, ppo, 1L << 61); |
| 41 | + final HllSketch sketchC = createUniquePowerSeriesSketch(1L << 61, ppo, 1L << 62); |
| 42 | + |
| 43 | + final double skAEst = sketchA.getCompositeEstimate(); |
| 44 | + final double skBEst = sketchB.getCompositeEstimate(); |
| 45 | + final double skCEst = sketchC.getCompositeEstimate(); |
| 46 | + |
| 47 | + final double skA_RE = (skAEst/ppo) - 1.0; |
| 48 | + final double skB_RE = (skBEst/ppo) - 1.0; |
| 49 | + final double skC_RE = (skCEst/ppo) - 1.0; |
| 50 | + |
| 51 | + //Print individual composite estimates and Relative Error: |
| 52 | + println(""); |
| 53 | + println("SketchA estimate: " + skAEst + ", RE%: " + (skA_RE * 100)); |
| 54 | + println("SketchB estimate: " + skBEst + ", RE%: " + (skB_RE * 100)); |
| 55 | + println("SketchC estimate: " + skCEst + ", RE%: " + (skC_RE * 100)); |
| 56 | + |
| 57 | + println("\nNOTE: Sketch Relative Error for Composite Estimator for LgK = 11 is +/- 4.6% at 95% confidence.\n"); |
| 58 | + |
| 59 | + // Test six different merge orders: |
| 60 | + final double estABC = mergeThreeSketches(sketchA, sketchB, sketchC); |
| 61 | + final double estACB = mergeThreeSketches(sketchA, sketchC, sketchB); |
| 62 | + final double estBAC = mergeThreeSketches(sketchB, sketchA, sketchC); |
| 63 | + final double estBCA = mergeThreeSketches(sketchB, sketchC, sketchA); |
| 64 | + final double estCAB = mergeThreeSketches(sketchC, sketchA, sketchB); |
| 65 | + final double estCBA = mergeThreeSketches(sketchC, sketchB, sketchA); |
| 66 | + |
| 67 | + println("Merge order ABC: " + estABC); |
| 68 | + println("Merge order ACB: " + estACB); |
| 69 | + println("Merge order BAC: " + estBAC); |
| 70 | + println("Merge order BCA: " + estBCA); |
| 71 | + println("Merge order CAB: " + estCAB); |
| 72 | + println("Merge order CBA: " + estCBA); |
| 73 | + |
| 74 | + assertTrue((estABC == estACB) && (estABC == estBAC) && (estABC == estBCA) && (estABC == estCAB) && (estABC == estCBA)); |
| 75 | + } |
| 76 | + |
| 77 | + /** |
| 78 | + * Generates a power series based on fractional powers of 2 where the separation between successive values is 2^(1/ppo). |
| 79 | + * @param baseValue starting value, inclusive |
| 80 | + * @param ppo number of unique points per octave |
| 81 | + * @param limit the upper limit, exclusive |
| 82 | + * @return the loaded sketch |
| 83 | + */ |
| 84 | + private HllSketch createUniquePowerSeriesSketch(final long baseValue, final int ppo, final long limit) { |
| 85 | + final HllSketch sketch = new HllSketch(LgK); |
| 86 | + int count = 0; |
| 87 | + long lastp = 0; |
| 88 | + for (long p = baseValue; p < limit; p = pwr2SeriesNext(ppo, p)) { |
| 89 | + sketch.update(p); |
| 90 | + count++; |
| 91 | + lastp = p; |
| 92 | + } |
| 93 | + println("BaseValue: " + baseValue + ", limit: " + limit + ", Count: " + count + ", lastPt: " + lastp); |
| 94 | + return sketch; |
| 95 | + } |
| 96 | + |
| 97 | + /** |
| 98 | + * Merges three sketches in the specified order and returns the composite estimate |
| 99 | + */ |
| 100 | + private double mergeThreeSketches(final HllSketch s1, final HllSketch s2, final HllSketch s3) { |
| 101 | + final Union union = new Union(LgK); |
| 102 | + |
| 103 | + union.update(s1); |
| 104 | + union.update(s2); |
| 105 | + union.update(s3); |
| 106 | + |
| 107 | + return union.getCompositeEstimate(); |
| 108 | + } |
| 109 | + |
| 110 | + //@Test |
| 111 | + /** |
| 112 | + * Used for tweaking the generator algorithm |
| 113 | + */ |
| 114 | + public void checkNewGenerator() { |
| 115 | + final long baseValue = 1L << 59; |
| 116 | + final int ppo = 1 << 10; |
| 117 | + final long limit = 1L << 60; |
| 118 | + int count = 0; |
| 119 | + long lastp = 0; |
| 120 | + for (long p = baseValue; p < limit; p = pwr2SeriesNext(ppo, p)) { |
| 121 | + count++; |
| 122 | + println(count + ", " + p); |
| 123 | + lastp = p; |
| 124 | + } |
| 125 | + println("\nPPO: " + ppo); |
| 126 | + println("Count: " + count); |
| 127 | + println("baseValue: " + baseValue); |
| 128 | + println("last p: " + lastp); |
| 129 | + println("limit: " + limit); |
| 130 | + } |
| 131 | + |
| 132 | + private static void println(final Object o) { |
| 133 | + System.out.println(o.toString()); |
| 134 | + } |
| 135 | + |
| 136 | +} |
0 commit comments