|
45 | 45 | import org.apache.datasketches.common.SketchesArgumentException; |
46 | 46 | import org.apache.datasketches.common.SketchesException; |
47 | 47 | import org.apache.datasketches.common.SketchesStateException; |
48 | | -import org.apache.datasketches.sampling.PreambleUtil; |
49 | | -import org.apache.datasketches.sampling.ReservoirItemsSketch; |
50 | | -import org.apache.datasketches.sampling.ReservoirSize; |
51 | | -import org.apache.datasketches.sampling.SampleSubsetSummary; |
52 | 48 | import org.testng.annotations.Test; |
53 | 49 |
|
54 | 50 | public class ReservoirItemsSketchTest { |
@@ -534,60 +530,70 @@ public void checkForceIncrement() { |
534 | 530 | @Test |
535 | 531 | public void checkEstimateSubsetSum() { |
536 | 532 | final int k = 10; |
537 | | - final ReservoirItemsSketch<Long> sketch = ReservoirItemsSketch.newInstance(k); |
538 | | - |
539 | | - // empty sketch -- all zeros |
540 | | - SampleSubsetSummary ss = sketch.estimateSubsetSum(item -> true); |
541 | | - assertEquals(ss.getEstimate(), 0.0); |
542 | | - assertEquals(ss.getTotalSketchWeight(), 0.0); |
543 | | - |
544 | | - // add items, keeping in exact mode |
| 533 | + SampleSubsetSummary ss = null; |
545 | 534 | double itemCount = 0.0; |
546 | | - for (long i = 1; i <= (k - 1); ++i) { |
547 | | - sketch.update(i); |
548 | | - itemCount += 1.0; |
549 | | - } |
550 | | - |
551 | | - ss = sketch.estimateSubsetSum(item -> true); |
552 | | - assertEquals(ss.getEstimate(), itemCount); |
553 | | - assertEquals(ss.getLowerBound(), itemCount); |
554 | | - assertEquals(ss.getUpperBound(), itemCount); |
555 | | - assertEquals(ss.getTotalSketchWeight(), itemCount); |
556 | | - |
557 | | - // add a few more items, pushing to sampling mode |
558 | | - for (long i = k; i <= (k + 1); ++i) { |
559 | | - sketch.update(i); |
560 | | - itemCount += 1.0; |
561 | | - } |
562 | | - |
563 | | - // predicate always true so estimate == upper bound |
564 | | - ss = sketch.estimateSubsetSum(item -> true); |
565 | | - assertEquals(ss.getEstimate(), itemCount); |
566 | | - assertEquals(ss.getUpperBound(), itemCount); |
567 | | - assertTrue(ss.getLowerBound() < itemCount); |
568 | | - assertEquals(ss.getTotalSketchWeight(), itemCount); |
569 | | - |
570 | | - // predicate always false so estimate == lower bound == 0.0 |
571 | | - ss = sketch.estimateSubsetSum(item -> false); |
572 | | - assertEquals(ss.getEstimate(), 0.0); |
573 | | - assertEquals(ss.getLowerBound(), 0.0); |
574 | | - assertTrue(ss.getUpperBound() > 0.0); |
575 | | - assertEquals(ss.getTotalSketchWeight(), itemCount); |
576 | | - |
577 | | - // finally, a non-degenerate predicate |
578 | | - // insert negative items with identical weights, filter for negative weights only |
579 | | - for (long i = 1; i <= (k + 1); ++i) { |
580 | | - sketch.update(-i); |
581 | | - itemCount += 1.0; |
582 | | - } |
583 | 535 |
|
584 | | - ss = sketch.estimateSubsetSum(item -> item < 0); |
585 | | - assertTrue(ss.getEstimate() >= ss.getLowerBound()); |
586 | | - assertTrue(ss.getEstimate() <= ss.getUpperBound()); |
| 536 | + //trial loop for probabilistic testing |
| 537 | + int passLB = 0; |
| 538 | + int passUB = 0; |
| 539 | + for (int t = 0; t < 3; t++) { |
| 540 | + final ReservoirItemsSketch<Long> sketch = ReservoirItemsSketch.newInstance(k); |
| 541 | + |
| 542 | + // empty sketch -- all zeros |
| 543 | + ss = sketch.estimateSubsetSum(item -> true); |
| 544 | + assertEquals(ss.getEstimate(), 0.0); |
| 545 | + assertEquals(ss.getTotalSketchWeight(), 0.0); |
| 546 | + |
| 547 | + // add items, keeping in exact mode |
| 548 | + itemCount = 0.0; |
| 549 | + for (long i = 1; i <= (k - 1); ++i) { |
| 550 | + sketch.update(i); |
| 551 | + itemCount += 1.0; |
| 552 | + } |
| 553 | + |
| 554 | + ss = sketch.estimateSubsetSum(item -> true); |
| 555 | + assertEquals(ss.getEstimate(), itemCount); |
| 556 | + assertEquals(ss.getLowerBound(), itemCount); |
| 557 | + assertEquals(ss.getUpperBound(), itemCount); |
| 558 | + assertEquals(ss.getTotalSketchWeight(), itemCount); |
| 559 | + |
| 560 | + // add a few more items, pushing to sampling mode |
| 561 | + for (long i = k; i <= (k + 1); ++i) { |
| 562 | + sketch.update(i); |
| 563 | + itemCount += 1.0; |
| 564 | + } |
| 565 | + |
| 566 | + // predicate always true so estimate == upper bound |
| 567 | + ss = sketch.estimateSubsetSum(item -> true); |
| 568 | + assertEquals(ss.getEstimate(), itemCount); |
| 569 | + assertEquals(ss.getUpperBound(), itemCount); |
| 570 | + assertTrue(ss.getLowerBound() < itemCount); |
| 571 | + assertEquals(ss.getTotalSketchWeight(), itemCount); |
| 572 | + |
| 573 | + // predicate always false so estimate == lower bound == 0.0 |
| 574 | + ss = sketch.estimateSubsetSum(item -> false); |
| 575 | + assertEquals(ss.getEstimate(), 0.0); |
| 576 | + assertEquals(ss.getLowerBound(), 0.0); |
| 577 | + assertTrue(ss.getUpperBound() > 0.0); |
| 578 | + assertEquals(ss.getTotalSketchWeight(), itemCount); |
| 579 | + |
| 580 | + // finally, a non-degenerate predicate |
| 581 | + // insert negative items with identical weights, filter for negative weights only |
| 582 | + for (long i = 1; i <= (k + 1); ++i) { |
| 583 | + sketch.update(-i); |
| 584 | + itemCount += 1.0; |
| 585 | + } |
| 586 | + |
| 587 | + ss = sketch.estimateSubsetSum(item -> item < 0); |
| 588 | + assertTrue(ss.getEstimate() >= ss.getLowerBound()); |
| 589 | + assertTrue(ss.getEstimate() <= ss.getUpperBound()); |
| 590 | + |
| 591 | + // allow pretty generous bounds when testing |
| 592 | + if(ss.getLowerBound() < (itemCount / 1.4)) { passLB++; } |
| 593 | + if(ss.getUpperBound() > (itemCount / 2.6)) { passUB++; } |
| 594 | + } //End trial loop |
| 595 | + assertTrue(passLB >= 2 && passUB >= 2); //2 out of 3 must pass for LB and UB |
587 | 596 |
|
588 | | - // allow pretty generous bounds when testing |
589 | | - assertTrue(ss.getLowerBound() < (itemCount / 1.4)); |
590 | | - assertTrue(ss.getUpperBound() > (itemCount / 2.6)); |
591 | 597 | assertEquals(ss.getTotalSketchWeight(), itemCount); |
592 | 598 | } |
593 | 599 |
|
|
0 commit comments