Skip to content

Commit 2139854

Browse files
committed
Improve probabilistic test to make it extremely unlikely that it will
fail just due to random variation in sequence.
1 parent 54132a4 commit 2139854

1 file changed

Lines changed: 61 additions & 55 deletions

File tree

src/test/java/org/apache/datasketches/sampling/ReservoirItemsSketchTest.java

Lines changed: 61 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,6 @@
4545
import org.apache.datasketches.common.SketchesArgumentException;
4646
import org.apache.datasketches.common.SketchesException;
4747
import org.apache.datasketches.common.SketchesStateException;
48-
import org.apache.datasketches.sampling.PreambleUtil;
49-
import org.apache.datasketches.sampling.ReservoirItemsSketch;
50-
import org.apache.datasketches.sampling.ReservoirSize;
51-
import org.apache.datasketches.sampling.SampleSubsetSummary;
5248
import org.testng.annotations.Test;
5349

5450
public class ReservoirItemsSketchTest {
@@ -534,60 +530,70 @@ public void checkForceIncrement() {
534530
@Test
535531
public void checkEstimateSubsetSum() {
536532
final int k = 10;
537-
final ReservoirItemsSketch<Long> sketch = ReservoirItemsSketch.newInstance(k);
538-
539-
// empty sketch -- all zeros
540-
SampleSubsetSummary ss = sketch.estimateSubsetSum(item -> true);
541-
assertEquals(ss.getEstimate(), 0.0);
542-
assertEquals(ss.getTotalSketchWeight(), 0.0);
543-
544-
// add items, keeping in exact mode
533+
SampleSubsetSummary ss = null;
545534
double itemCount = 0.0;
546-
for (long i = 1; i <= (k - 1); ++i) {
547-
sketch.update(i);
548-
itemCount += 1.0;
549-
}
550-
551-
ss = sketch.estimateSubsetSum(item -> true);
552-
assertEquals(ss.getEstimate(), itemCount);
553-
assertEquals(ss.getLowerBound(), itemCount);
554-
assertEquals(ss.getUpperBound(), itemCount);
555-
assertEquals(ss.getTotalSketchWeight(), itemCount);
556-
557-
// add a few more items, pushing to sampling mode
558-
for (long i = k; i <= (k + 1); ++i) {
559-
sketch.update(i);
560-
itemCount += 1.0;
561-
}
562-
563-
// predicate always true so estimate == upper bound
564-
ss = sketch.estimateSubsetSum(item -> true);
565-
assertEquals(ss.getEstimate(), itemCount);
566-
assertEquals(ss.getUpperBound(), itemCount);
567-
assertTrue(ss.getLowerBound() < itemCount);
568-
assertEquals(ss.getTotalSketchWeight(), itemCount);
569-
570-
// predicate always false so estimate == lower bound == 0.0
571-
ss = sketch.estimateSubsetSum(item -> false);
572-
assertEquals(ss.getEstimate(), 0.0);
573-
assertEquals(ss.getLowerBound(), 0.0);
574-
assertTrue(ss.getUpperBound() > 0.0);
575-
assertEquals(ss.getTotalSketchWeight(), itemCount);
576-
577-
// finally, a non-degenerate predicate
578-
// insert negative items with identical weights, filter for negative weights only
579-
for (long i = 1; i <= (k + 1); ++i) {
580-
sketch.update(-i);
581-
itemCount += 1.0;
582-
}
583535

584-
ss = sketch.estimateSubsetSum(item -> item < 0);
585-
assertTrue(ss.getEstimate() >= ss.getLowerBound());
586-
assertTrue(ss.getEstimate() <= ss.getUpperBound());
536+
//trial loop for probabilistic testing
537+
int passLB = 0;
538+
int passUB = 0;
539+
for (int t = 0; t < 3; t++) {
540+
final ReservoirItemsSketch<Long> sketch = ReservoirItemsSketch.newInstance(k);
541+
542+
// empty sketch -- all zeros
543+
ss = sketch.estimateSubsetSum(item -> true);
544+
assertEquals(ss.getEstimate(), 0.0);
545+
assertEquals(ss.getTotalSketchWeight(), 0.0);
546+
547+
// add items, keeping in exact mode
548+
itemCount = 0.0;
549+
for (long i = 1; i <= (k - 1); ++i) {
550+
sketch.update(i);
551+
itemCount += 1.0;
552+
}
553+
554+
ss = sketch.estimateSubsetSum(item -> true);
555+
assertEquals(ss.getEstimate(), itemCount);
556+
assertEquals(ss.getLowerBound(), itemCount);
557+
assertEquals(ss.getUpperBound(), itemCount);
558+
assertEquals(ss.getTotalSketchWeight(), itemCount);
559+
560+
// add a few more items, pushing to sampling mode
561+
for (long i = k; i <= (k + 1); ++i) {
562+
sketch.update(i);
563+
itemCount += 1.0;
564+
}
565+
566+
// predicate always true so estimate == upper bound
567+
ss = sketch.estimateSubsetSum(item -> true);
568+
assertEquals(ss.getEstimate(), itemCount);
569+
assertEquals(ss.getUpperBound(), itemCount);
570+
assertTrue(ss.getLowerBound() < itemCount);
571+
assertEquals(ss.getTotalSketchWeight(), itemCount);
572+
573+
// predicate always false so estimate == lower bound == 0.0
574+
ss = sketch.estimateSubsetSum(item -> false);
575+
assertEquals(ss.getEstimate(), 0.0);
576+
assertEquals(ss.getLowerBound(), 0.0);
577+
assertTrue(ss.getUpperBound() > 0.0);
578+
assertEquals(ss.getTotalSketchWeight(), itemCount);
579+
580+
// finally, a non-degenerate predicate
581+
// insert negative items with identical weights, filter for negative weights only
582+
for (long i = 1; i <= (k + 1); ++i) {
583+
sketch.update(-i);
584+
itemCount += 1.0;
585+
}
586+
587+
ss = sketch.estimateSubsetSum(item -> item < 0);
588+
assertTrue(ss.getEstimate() >= ss.getLowerBound());
589+
assertTrue(ss.getEstimate() <= ss.getUpperBound());
590+
591+
// allow pretty generous bounds when testing
592+
if(ss.getLowerBound() < (itemCount / 1.4)) { passLB++; }
593+
if(ss.getUpperBound() > (itemCount / 2.6)) { passUB++; }
594+
} //End trial loop
595+
assertTrue(passLB >= 2 && passUB >= 2); //2 out of 3 must pass for LB and UB
587596

588-
// allow pretty generous bounds when testing
589-
assertTrue(ss.getLowerBound() < (itemCount / 1.4));
590-
assertTrue(ss.getUpperBound() > (itemCount / 2.6));
591597
assertEquals(ss.getTotalSketchWeight(), itemCount);
592598
}
593599

0 commit comments

Comments
 (0)