Skip to content

Commit f62d552

Browse files
authored
Merge pull request #677 from apache/fix_minor_issues_with_count_min
Fix minor issues with count min
2 parents 9d1f24f + c6a5154 commit f62d552

1 file changed

Lines changed: 20 additions & 24 deletions

File tree

src/main/java/org/apache/datasketches/count/CountMinSketch.java

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,20 @@
1919

2020
package org.apache.datasketches.count;
2121

22+
import static java.lang.foreign.ValueLayout.JAVA_BYTE;
23+
import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
24+
25+
import java.lang.foreign.MemorySegment;
26+
import java.nio.charset.StandardCharsets;
27+
import java.util.Random;
28+
2229
import org.apache.datasketches.common.Family;
2330
import org.apache.datasketches.common.SketchesArgumentException;
2431
import org.apache.datasketches.common.SketchesException;
2532
import org.apache.datasketches.common.Util;
2633
import org.apache.datasketches.common.positional.PositionalSegment;
2734
import org.apache.datasketches.hash.MurmurHash3;
2835

29-
import java.lang.foreign.MemorySegment;
30-
import java.nio.charset.StandardCharsets;
31-
import java.util.Random;
32-
33-
import static java.lang.foreign.ValueLayout.JAVA_BYTE;
34-
import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
35-
import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
36-
import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED;
37-
38-
3936
/**
4037
* Java implementation of the CountMin sketch data structure of Cormode and Muthukrishnan.
4138
* This implementation is inspired by and compatible with the datasketches-cpp version by Charlie Dickens.
@@ -85,8 +82,8 @@ int mask() {
8582
throw new SketchesArgumentException("Number of buckets must be positive, got: " + numBuckets);
8683
}
8784
if (numBuckets < 3) {
88-
throw new SketchesArgumentException("Number of buckets must be at least 3 to ensure relative error ≤ 1.0. " +
89-
"With " + numBuckets + " buckets, relative error would be " + String.format("%.3f", Math.exp(1.0) / numBuckets));
85+
throw new SketchesArgumentException("Number of buckets must be at least 3 to ensure relative error ≤ 1.0. "
86+
+ "With " + numBuckets + " buckets, relative error would be " + String.format("%.3f", Math.exp(1.0) / numBuckets));
9087
}
9188

9289
// Check for potential overflow in array size calculation
@@ -101,7 +98,7 @@ int mask() {
10198
// be 2^31-1. We check only against 2^30 for simplicity.
10299
if (totalSize >= (1L << 30)) {
103100
throw new SketchesArgumentException("Sketch would require excessive memory: " + numHashes + " * " + numBuckets
104-
+ " = " + totalSize + " elements (~" + String.format("%d", totalSize * Long.BYTES / (1024 * 1024 * 1024)) + " GB). "
101+
+ " = " + totalSize + " elements (~" + String.format("%d", (totalSize * Long.BYTES) / (1024 * 1024 * 1024)) + " GB). "
105102
+ "Consider reducing numHashes or numBuckets.");
106103
}
107104

@@ -127,13 +124,12 @@ private static byte[] longToBytes(final long value) {
127124
return segment.toArray(JAVA_BYTE);
128125
}
129126

130-
131127
private long[] getHashes(final byte[] item) {
132128
final long[] updateLocations = new long[numHashes_];
133129

134130
for (int i = 0; i < numHashes_; i++) {
135131
final long[] index = MurmurHash3.hash(item, hashSeeds_[i]);
136-
updateLocations[i] = i * (long)numBuckets_ + Math.floorMod(index[0], numBuckets_);
132+
updateLocations[i] = (i * (long)numBuckets_) + Math.floorMod(index[0], numBuckets_);
137133
}
138134

139135
return updateLocations;
@@ -184,7 +180,7 @@ public long getTotalWeight_() {
184180
* @return The relative error.
185181
*/
186182
public double getRelativeError() {
187-
return Math.exp(1.0) / (double)numBuckets_;
183+
return Math.exp(1.0) / numBuckets_;
188184
}
189185

190186
/**
@@ -193,7 +189,7 @@ public double getRelativeError() {
193189
* @return Suggested number of hash functions.
194190
*/
195191
public static byte suggestNumHashes(final double confidence) {
196-
if (confidence < 0 || confidence > 1) {
192+
if ((confidence < 0) || (confidence > 1)) {
197193
throw new SketchesException("Confidence must be between 0 and 1.0 (inclusive).");
198194
}
199195
final int value = (int) Math.ceil(Math.log(1.0 / (1.0 - confidence)));
@@ -227,7 +223,7 @@ public void update(final long item, final long weight) {
227223
* @param weight The weight of the item.
228224
*/
229225
public void update(final String item, final long weight) {
230-
if (item == null || item.isEmpty()) {
226+
if ((item == null) || item.isEmpty()) {
231227
return;
232228
}
233229
final byte[] strByte = item.getBytes(StandardCharsets.UTF_8);
@@ -266,7 +262,7 @@ public long getEstimate(final long item) {
266262
* @return Estimated frequency.
267263
*/
268264
public long getEstimate(final String item) {
269-
if (item == null || item.isEmpty()) {
265+
if ((item == null) || item.isEmpty()) {
270266
return 0;
271267
}
272268

@@ -309,7 +305,7 @@ public long getUpperBound(final long item) {
309305
* @return Upper bound of estimated frequency.
310306
*/
311307
public long getUpperBound(final String item) {
312-
if (item == null || item.isEmpty()) {
308+
if ((item == null) || item.isEmpty()) {
313309
return 0;
314310
}
315311

@@ -345,7 +341,7 @@ public long getLowerBound(final long item) {
345341
* @return Lower bound of estimated frequency.
346342
*/
347343
public long getLowerBound(final String item) {
348-
if (item == null || item.isEmpty()) {
344+
if ((item == null) || item.isEmpty()) {
349345
return 0;
350346
}
351347

@@ -371,8 +367,8 @@ public void merge(final CountMinSketch other) {
371367
throw new SketchesException("Cannot merge a sketch with itself");
372368
}
373369

374-
final boolean acceptableConfig = getNumBuckets_() == other.getNumBuckets_()
375-
&& getNumHashes_() == other.getNumHashes_() && getSeed_() == other.getSeed_();
370+
final boolean acceptableConfig = (getNumBuckets_() == other.getNumBuckets_())
371+
&& (getNumHashes_() == other.getNumHashes_()) && (getSeed_() == other.getSeed_());
376372

377373
if (!acceptableConfig) {
378374
throw new SketchesException("Incompatible sketch configuration.");
@@ -396,9 +392,9 @@ private int getSerializedSizeBytes() {
396392
return preambleBytes + Long.BYTES + (sketchArray_.length * Long.BYTES);
397393
}
398394

399-
400395
/**
401396
* Returns the sketch as a byte array.
397+
* @return the result byte array
402398
*/
403399
public byte[] toByteArray() {
404400
final int serializedSizeBytes = getSerializedSizeBytes();

0 commit comments

Comments
 (0)