Skip to content

Commit 5b5f7ac

Browse files
committed
Add documentation
1 parent 799436b commit 5b5f7ac

1 file changed

Lines changed: 108 additions & 0 deletions

File tree

src/main/java/org/apache/datasketches/count/CountMinSketch.java

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,30 +84,59 @@ private long[] getHashes(byte[] item) {
8484
return updateLocations;
8585
}
8686

87+
/**
88+
* Checks if the CountMinSketch has processed any items.
89+
* @return True if the sketch is empty, otherwise false.
90+
*/
8791
public boolean isEmpty() {
8892
return totalWeight_ == 0;
8993
}
9094

95+
/**
96+
* Returns the number of hash functions used in this sketch.
97+
* @return The number of hash functions.
98+
*/
9199
public byte getNumHashes_() {
92100
return numHashes_;
93101
}
94102

103+
/**
104+
* Returns the number of buckets per hash function.
105+
* @return The number of buckets.
106+
*/
95107
public int getNumBuckets_() {
96108
return numBuckets_;
97109
}
98110

111+
/**
112+
* Returns the hash seed used by this sketch.
113+
* @return The seed value.
114+
*/
99115
public long getSeed_() {
100116
return seed_;
101117
}
102118

119+
/**
120+
* Returns the total weight of all items inserted into the sketch.
121+
* @return The total weight.
122+
*/
103123
public long getTotalWeight_() {
104124
return totalWeight_;
105125
}
106126

127+
/**
128+
* Returns the relative error of the sketch.
129+
* @return The relative error.
130+
*/
107131
public double getRelativeError() {
108132
return Math.exp(1.0) / (double)numBuckets_;
109133
}
110134

135+
/**
136+
* Suggests an appropriate number of hash functions to use for a given confidence level.
137+
* @param confidence The desired confidence level between 0 and 1.
138+
* @return Suggested number of hash functions.
139+
*/
111140
public byte suggestNumHashes(double confidence) {
112141
if (confidence < 0 || confidence > 1) {
113142
throw new SketchesException("Confidence must be between 0 and 1.0 (inclusive).");
@@ -116,15 +145,30 @@ public byte suggestNumHashes(double confidence) {
116145
return (byte) Math.min(value, 127);
117146
}
118147

148+
/**
149+
* Suggests an appropriate number of buckets per hash function for a given relative error.
150+
* @param relativeError The desired relative error.
151+
* @return Suggested number of buckets.
152+
*/
119153
public int suggestNumBuckets(double relativeError) {
120154
return (int) Math.ceil(Math.exp(1.0) / relativeError);
121155
}
122156

157+
/**
158+
* Updates the sketch with the provided item and weight.
159+
* @param item The item to update.
160+
* @param weight The weight of the item.
161+
*/
123162
public void update(final long item, final long weight) {
124163
byte[] longByte = ByteBuffer.allocate(8).putLong(item).array();
125164
update(longByte, weight);
126165
}
127166

167+
/**
168+
* Updates the sketch with the provided item and weight.
169+
* @param item The item to update.
170+
* @param weight The weight of the item.
171+
*/
128172
public void update(final String item, final long weight) {
129173
if (item == null || item.isEmpty()) {
130174
return;
@@ -133,6 +177,11 @@ public void update(final String item, final long weight) {
133177
update(strByte, weight);
134178
}
135179

180+
/**
181+
* Updates the sketch with the provided item and weight.
182+
* @param item The item to update.
183+
* @param weight The weight of the item.
184+
*/
136185
public void update(final byte[] item, final long weight) {
137186
if (item.length == 0) {
138187
return;
@@ -145,11 +194,21 @@ public void update(final byte[] item, final long weight) {
145194
}
146195
}
147196

197+
/**
198+
* Returns the estimated frequency for the given item.
199+
* @param item The item to estimate.
200+
* @return Estimated frequency.
201+
*/
148202
public long getEstimate(final long item) {
149203
byte[] longByte = ByteBuffer.allocate(8).putLong(item).array();
150204
return getEstimate(longByte);
151205
}
152206

207+
/**
208+
* Returns the estimated frequency for the given item.
209+
* @param item The item to estimate.
210+
* @return Estimated frequency.
211+
*/
153212
public long getEstimate(final String item) {
154213
if (item == null || item.isEmpty()) {
155214
return 0;
@@ -159,6 +218,11 @@ public long getEstimate(final String item) {
159218
return getEstimate(strByte);
160219
}
161220

221+
/**
222+
* Returns the estimated frequency for the given item.
223+
* @param item The item to estimate.
224+
* @return Estimated frequency.
225+
*/
162226
public long getEstimate(final byte[] item) {
163227
if (item.length == 0) {
164228
return 0;
@@ -173,11 +237,21 @@ public long getEstimate(final byte[] item) {
173237
return res;
174238
}
175239

240+
/**
241+
* Returns the upper bound of the estimated frequency for the given item.
242+
* @param item The item to estimate.
243+
* @return Upper bound of estimated frequency.
244+
*/
176245
public long getUpperBound(final long item) {
177246
byte[] longByte = ByteBuffer.allocate(8).putLong(item).array();
178247
return getUpperBound(longByte);
179248
}
180249

250+
/**
251+
* Returns the upper bound of the estimated frequency for the given item.
252+
* @param item The item to estimate.
253+
* @return Upper bound of estimated frequency.
254+
*/
181255
public long getUpperBound(final String item) {
182256
if (item == null || item.isEmpty()) {
183257
return 0;
@@ -187,6 +261,11 @@ public long getUpperBound(final String item) {
187261
return getUpperBound(strByte);
188262
}
189263

264+
/**
265+
* Returns the upper bound of the estimated frequency for the given item.
266+
* @param item The item to estimate.
267+
* @return Upper bound of estimated frequency.
268+
*/
190269
public long getUpperBound(final byte[] item) {
191270
if (item.length == 0) {
192271
return 0;
@@ -195,11 +274,21 @@ public long getUpperBound(final byte[] item) {
195274
return getEstimate(item) + (long)(getRelativeError() * getTotalWeight_());
196275
}
197276

277+
/**
278+
* Returns the lower bound of the estimated frequency for the given item.
279+
* @param item The item to estimate.
280+
* @return Lower bound of estimated frequency.
281+
*/
198282
public long getLowerBound(final long item) {
199283
byte[] longByte = ByteBuffer.allocate(8).putLong(item).array();
200284
return getLowerBound(longByte);
201285
}
202286

287+
/**
288+
* Returns the lower bound of the estimated frequency for the given item.
289+
* @param item The item to estimate.
290+
* @return Lower bound of estimated frequency.
291+
*/
203292
public long getLowerBound(final String item) {
204293
if (item == null || item.isEmpty()) {
205294
return 0;
@@ -209,10 +298,19 @@ public long getLowerBound(final String item) {
209298
return getLowerBound(strByte);
210299
}
211300

301+
/**
302+
* Returns the lower bound of the estimated frequency for the given item.
303+
* @param item The item to estimate.
304+
* @return Lower bound of estimated frequency.
305+
*/
212306
public long getLowerBound(final byte[] item) {
213307
return getEstimate(item);
214308
}
215309

310+
/**
311+
* Merges another CountMinSketch into this one. The sketches must have the same configuration.
312+
* @param other The other sketch to merge.
313+
*/
216314
public void merge(final CountMinSketch other) {
217315
if (this == other) {
218316
throw new SketchesException("Cannot merge a sketch with itself");
@@ -232,6 +330,10 @@ public void merge(final CountMinSketch other) {
232330
totalWeight_ += other.getTotalWeight_();
233331
}
234332

333+
/**
334+
* Serializes the sketch into the provided ByteBuffer.
335+
* @param buf The ByteBuffer to write into.
336+
*/
235337
public void serialize(ByteBuffer buf) {
236338
// Long 0
237339
final int preambleLongs = Family.COUNTMIN.getMinPreLongs();
@@ -262,6 +364,12 @@ public void serialize(ByteBuffer buf) {
262364
}
263365
}
264366

367+
/**
368+
* Deserializes a CountMinSketch from the provided byte array.
369+
* @param b The byte array containing the serialized sketch.
370+
* @param seed The seed used during serialization.
371+
* @return The deserialized CountMinSketch.
372+
*/
265373
public static CountMinSketch deserialize(final byte[] b, final long seed) {
266374
ByteBuffer buf = ByteBuffer.allocate(b.length);
267375
buf.put(b);

0 commit comments

Comments
 (0)