Skip to content

Commit 5d531c2

Browse files
committed
More cleanup after removing SerVer 1 and 2
1 parent 193cfdf commit 5d531c2

5 files changed

Lines changed: 105 additions & 351 deletions

File tree

src/main/java/org/apache/datasketches/theta/CompactSketch.java

Lines changed: 65 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,13 @@ public abstract class CompactSketch extends Sketch {
6868
* <p>The resulting sketch will not retain any link to the source MemorySegment and all of its data will be
6969
* copied to the heap CompactSketch.</p>
7070
*
71-
* <p>This method assumes that the sketch image was created with the correct hash seed, so it is not checked.
72-
* The resulting on-heap CompactSketch will be given the seedHash derived from the given sketch image.
73-
* However, Serial Version 1 sketch images do not have a seedHash field,
74-
* so the resulting heapified CompactSketch will be given the hash of the DEFAULT_UPDATE_SEED.</p>
71+
* <p>The {@link Util#DEFAULT_UPDATE_SEED DEFAULT_UPDATE_SEED} is assumed.</p>
7572
*
7673
* @param srcSeg an image of a CompactSketch.
7774
* @return a CompactSketch on the heap.
7875
*/
7976
public static CompactSketch heapify(final MemorySegment srcSeg) {
80-
//final boolean checkSeedHash = extractSerVer(srcSeg) != 1;
81-
return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED, false); //false for SerVer 1 only
77+
return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED);
8278
}
8379

8480
/**
@@ -87,133 +83,106 @@ public static CompactSketch heapify(final MemorySegment srcSeg) {
8783
* <p>The resulting sketch will not retain any link to the source MemorySegment and all of its data will be
8884
* copied to the heap CompactSketch.</p>
8985
*
90-
* <p>This method checks if the given expectedSeed was used to create the source MemorySegment image.
91-
* However, SerialVersion 1 sketch images cannot be checked as they don't have a seedHash field,
92-
* so the resulting heapified CompactSketch will be given the hash of the expectedSeed.</p>
86+
* <p>This method checks if the given expectedSeed was used to create the source MemorySegment image.</p>
9387
*
9488
* @param srcSeg an image of a CompactSketch that was created using the given expectedSeed.
9589
* @param expectedSeed the seed used to validate the given MemorySegment image.
9690
* <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
9791
* @return a CompactSketch on the heap.
9892
*/
9993
public static CompactSketch heapify(final MemorySegment srcSeg, final long expectedSeed) {
100-
return heapify(srcSeg, expectedSeed, true);
101-
}
102-
103-
private static CompactSketch heapify(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) {
10494
final int serVer = extractSerVer(srcSeg);
10595
final int familyID = extractFamilyID(srcSeg);
10696
final Family family = idToFamily(familyID);
10797
if (family != Family.COMPACT) {
10898
throw new SketchesArgumentException("Corrupted: " + family + " is not Compact!");
10999
}
110100
if (serVer == 4) {
111-
return heapifyV4(srcSeg, seed, enforceSeed);
101+
return heapifyV4(srcSeg, expectedSeed);
112102
}
113103
if (serVer == 3) {
114104
final int flags = extractFlags(srcSeg);
115105
final boolean srcOrdered = (flags & ORDERED_FLAG_MASK) != 0;
116106
final boolean empty = (flags & EMPTY_FLAG_MASK) != 0;
117-
if (enforceSeed && !empty) { PreambleUtil.checkSegmentSeedHash(srcSeg, seed); }
107+
if (!empty) { PreambleUtil.checkSegmentSeedHash(srcSeg, expectedSeed); }
118108
return CompactOperations.segmentToCompact(srcSeg, srcOrdered, null);
119109
}
120110
//not SerVer 3 or 4
121-
throw new SketchesArgumentException("Unknown Serialization Version: " + serVer);
111+
throw new SketchesArgumentException(
112+
"Corrupted: Serialization Version " + serVer + " not recognized.");
122113
}
123114

124115
/**
125116
* Wrap takes the CompactSketch image in given MemorySegment and refers to it directly.
126117
* There is no data copying onto the java heap.
127118
* The wrap operation enables fast read-only merging and access to all the public read-only API.
128119
*
129-
* <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
130-
* been explicitly stored as direct sketches can be wrapped.
131-
* Wrapping earlier serial version sketches will result in a heapify operation.
132-
* These early versions were never designed to "wrap".</p>
133-
*
134120
* <p>Wrapping any subclass of this class that is empty or contains only a single item will
135121
* result in heapified forms of empty and single item sketch respectively.
136122
* This is actually faster and consumes less overall space.</p>
137123
*
138-
* <p>This method assumes that the sketch image was created with the correct hash seed, so it is not checked.
139-
* However, Serial Version 1 sketch images do not have a seedHash field,
140-
* so the resulting on-heap CompactSketch will be given the hash of the DEFAULT_UPDATE_SEED.</p>
124+
* <p>The {@link Util#DEFAULT_UPDATE_SEED DEFAULT_UPDATE_SEED} is assumed.</p>
141125
*
142126
* @param srcSeg an image of a Sketch.
143-
* @return a CompactSketch backed by the given MemorySegment except as above.
127+
* @return a CompactSketch backed by the given MemorySegment.
144128
*/
145129
public static CompactSketch wrap(final MemorySegment srcSeg) {
146-
return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED, false);
130+
return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED);
147131
}
148132

149133
/**
150134
* Wrap takes the sketch image in the given MemorySegment and refers to it directly.
151135
* There is no data copying onto the java heap.
152136
* The wrap operation enables fast read-only merging and access to all the public read-only API.
153137
*
154-
* <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
155-
* been explicitly stored as direct sketches can be wrapped.
156-
* Wrapping earlier serial version sketches will result in a heapify operation.
157-
* These early versions were never designed to "wrap".</p>
158-
*
159138
* <p>Wrapping any subclass of this class that is empty or contains only a single item will
160139
* result in heapified forms of empty and single item sketch respectively.
161140
* This is actually faster and consumes less overall space.</p>
162141
*
163-
* <p>This method checks if the given expectedSeed was used to create the source MemorySegment image.
164-
* However, SerialVersion 1 sketches cannot be checked as they don't have a seedHash field,
165-
* so the resulting heapified CompactSketch will be given the hash of the expectedSeed.</p>
142+
* <p>This method checks if the given expectedSeed was used to create the source MemorySegment image.</p>
166143
*
167144
* @param srcSeg an image of a Sketch that was created using the given expectedSeed.
168145
* @param expectedSeed the seed used to validate the given MemorySegment image.
169146
* <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
170-
* @return a CompactSketch backed by the given MemorySegment except as above.
147+
* @return a CompactSketch backed by the given MemorySegment.
171148
*/
172149
public static CompactSketch wrap(final MemorySegment srcSeg, final long expectedSeed) {
173-
return wrap(srcSeg, expectedSeed, true);
174-
}
175-
176-
private static CompactSketch wrap(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) {
177150
final int serVer = extractSerVer(srcSeg);
178151
final int familyID = extractFamilyID(srcSeg);
179152
final Family family = Family.idToFamily(familyID);
180153
if (family != Family.COMPACT) {
181154
throw new SketchesArgumentException("Corrupted: " + family + " is not Compact!");
182155
}
183-
final short seedHash = Util.computeSeedHash(seed);
156+
final short seedHash = Util.computeSeedHash(expectedSeed);
157+
184158

185-
switch (serVer) {
186-
case 3: {
187-
if (PreambleUtil.isEmptyFlag(srcSeg)) {
188-
return EmptyCompactSketch.getHeapInstance(srcSeg);
189-
}
190-
if (otherCheckForSingleItem(srcSeg)) {
191-
return SingleItemSketch.heapify(srcSeg, enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
192-
}
193-
//not empty & not singleItem
194-
final int flags = extractFlags(srcSeg);
195-
final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0;
196-
if (!compactFlag) {
197-
throw new SketchesArgumentException(
198-
"Corrupted: COMPACT family sketch image must have compact flag set");
199-
}
200-
final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
201-
if (!readOnly) {
202-
throw new SketchesArgumentException(
203-
"Corrupted: COMPACT family sketch image must have Read-Only flag set");
204-
}
205-
return DirectCompactSketch.wrapInstance(srcSeg,
206-
enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
159+
if (serVer == 3) {
160+
if (PreambleUtil.isEmptyFlag(srcSeg)) {
161+
return EmptyCompactSketch.getHeapInstance(srcSeg);
162+
}
163+
if (otherCheckForSingleItem(srcSeg)) {
164+
return SingleItemSketch.heapify(srcSeg, seedHash);
207165
}
208-
case 4: {
209-
return DirectCompactCompressedSketch.wrapInstance(srcSeg,
210-
enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
166+
//not empty & not singleItem
167+
final int flags = extractFlags(srcSeg);
168+
final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0;
169+
if (!compactFlag) {
170+
throw new SketchesArgumentException(
171+
"Corrupted: COMPACT family sketch image must have compact flag set");
211172
}
212-
default: {
173+
final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
174+
if (!readOnly) {
213175
throw new SketchesArgumentException(
214-
"Corrupted: Serialization Version " + serVer + " not recognized.");
176+
"Corrupted: COMPACT family sketch image must have Read-Only flag set");
215177
}
178+
return DirectCompactSketch.wrapInstance(srcSeg, seedHash);
216179
}
180+
if (serVer == 4) {
181+
return DirectCompactCompressedSketch.wrapInstance(srcSeg, seedHash);
182+
}
183+
//not SerVer 3 or 4
184+
throw new SketchesArgumentException(
185+
"Corrupted: Serialization Version " + serVer + " not recognized.");
217186
}
218187

219188
/**
@@ -278,38 +247,38 @@ private static CompactSketch wrap(final byte[] bytes, final long seed, final boo
278247
}
279248
final short seedHash = Util.computeSeedHash(seed);
280249

281-
switch (serVer) {
282-
case 3: {
283-
final int flags = bytes[FLAGS_BYTE];
284-
if ((flags & EMPTY_FLAG_MASK) > 0) {
285-
return EmptyCompactSketch.getHeapInstance(MemorySegment.ofArray(bytes));
286-
}
287-
final int preLongs = bytes[PREAMBLE_LONGS_BYTE];
288-
if (otherCheckForSingleItem(preLongs, serVer, familyId, flags)) {
289-
return SingleItemSketch.heapify(MemorySegment.ofArray(bytes), enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT));
290-
}
291-
//not empty & not singleItem
292-
final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0;
293-
if (!compactFlag) {
294-
throw new SketchesArgumentException(
295-
"Corrupted: COMPACT family sketch image must have compact flag set");
296-
}
297-
final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
298-
if (!readOnly) {
299-
throw new SketchesArgumentException(
300-
"Corrupted: COMPACT family sketch image must have Read-Only flag set");
301-
}
302-
return WrappedCompactSketch.wrapInstance(bytes,
303-
enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT));
250+
251+
if (serVer == 3) {
252+
final int flags = bytes[FLAGS_BYTE];
253+
if ((flags & EMPTY_FLAG_MASK) > 0) {
254+
return EmptyCompactSketch.getHeapInstance(MemorySegment.ofArray(bytes));
255+
}
256+
final int preLongs = bytes[PREAMBLE_LONGS_BYTE];
257+
if (otherCheckForSingleItem(preLongs, serVer, familyId, flags)) {
258+
return SingleItemSketch.heapify(MemorySegment.ofArray(bytes), enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT));
304259
}
305-
case 4: {
306-
return WrappedCompactCompressedSketch.wrapInstance(bytes, seedHash);
260+
//not empty & not singleItem
261+
final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0;
262+
if (!compactFlag) {
263+
throw new SketchesArgumentException(
264+
"Corrupted: COMPACT family sketch image must have compact flag set");
307265
}
308-
default: {
266+
final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
267+
if (!readOnly) {
309268
throw new SketchesArgumentException(
310-
"Corrupted: Serialization Version " + serVer + " not recognized.");
269+
"Corrupted: COMPACT family sketch image must have Read-Only flag set");
311270
}
271+
return WrappedCompactSketch.wrapInstance(bytes,
272+
enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT));
273+
}
274+
if (serVer ==4) {
275+
return WrappedCompactCompressedSketch.wrapInstance(bytes, seedHash);
312276
}
277+
//not SerVer 3 or 4
278+
throw new SketchesArgumentException(
279+
"Corrupted: Serialization Version " + serVer + " not recognized.");
280+
281+
313282
}
314283

315284
//Sketch Overrides
@@ -436,12 +405,12 @@ private byte[] toByteArrayV4() {
436405
return bytes;
437406
}
438407

439-
private static CompactSketch heapifyV4(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) {
408+
private static CompactSketch heapifyV4(final MemorySegment srcSeg, final long seed) {
440409
final int preLongs = Sketch.getPreambleLongs(srcSeg);
441410
final int entryBits = extractEntryBitsV4(srcSeg);
442411
final int numEntriesBytes = extractNumEntriesBytesV4(srcSeg);
443412
final short seedHash = (short) extractSeedHash(srcSeg);
444-
if (enforceSeed) { PreambleUtil.checkSegmentSeedHash(srcSeg, seed); }
413+
PreambleUtil.checkSegmentSeedHash(srcSeg, seed);
445414
int offsetBytes = 8;
446415
long theta = Long.MAX_VALUE;
447416
if (preLongs > 1) {

src/main/java/org/apache/datasketches/theta/PreambleUtil.java

Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,10 @@ private PreambleUtil() {}
190190

191191
// ###### DO NOT MESS WITH THIS FROM HERE ...
192192
// Preamble byte Addresses
193-
static final int PREAMBLE_LONGS_BYTE = 0; //lower 6 bits in byte.
194-
static final int LG_RESIZE_FACTOR_BIT = 6; //upper 2 bits in byte. Not used by compact, direct
193+
static final int PREAMBLE_LONGS_BYTE = 0; //lower 6 bits in byte 0.
194+
static final int LG_RESIZE_FACTOR_BIT = 6; //upper 2 bits in byte 0. Used by Update, Alpha, not used by compact, direct
195195
static final int SER_VER_BYTE = 1;
196-
static final int FAMILY_BYTE = 2; //SerVer1,2 was SKETCH_TYPE_BYTE
196+
static final int FAMILY_BYTE = 2;
197197
static final int LG_NOM_LONGS_BYTE = 3; //not used by compact
198198
static final int LG_ARR_LONGS_BYTE = 4; //not used by compact
199199
static final int FLAGS_BYTE = 5;
@@ -203,28 +203,23 @@ private PreambleUtil() {}
203203
static final int THETA_LONG = 16; //8-byte aligned
204204
static final int UNION_THETA_LONG = 24; //8-byte aligned, only used by Union
205205

206-
// flag bit masks
207-
static final int RESERVED_FLAG_MASK = 1; //SerVer 1, 2, 3. Now Reserved, no longer used.
208-
static final int READ_ONLY_FLAG_MASK = 2; //Set but not read. Reserved. SerVer 1, 2, 3
209-
static final int EMPTY_FLAG_MASK = 4; //SerVer 2, 3
210-
static final int COMPACT_FLAG_MASK = 8; //SerVer 2 was NO_REBUILD_FLAG_MASK, 3
211-
static final int ORDERED_FLAG_MASK = 16;//SerVer 2 was UNORDERED_FLAG_MASK, 3
212-
static final int SINGLEITEM_FLAG_MASK = 32;//SerVer 3
213-
//The last 2 bits of the flags byte are reserved and assumed to be zero, for now.
214-
215-
//Backward compatibility: SerVer1 preamble always 3 longs, SerVer2 preamble: 1, 2, 3 longs
216-
// SKETCH_TYPE_BYTE 2 //SerVer1, SerVer2
217-
// V1, V2 types: Alpha = 1, QuickSelect = 2, SetSketch = 3; V3 only: Buffered QS = 4
218-
static final int LG_RESIZE_RATIO_BYTE_V1 = 5; //used by SerVer 1
219-
static final int FLAGS_BYTE_V1 = 6; //used by SerVer 1
206+
// flag byte bit masks
207+
static final int RESERVED_FLAG_MASK = 1; //Bit 0: Reserved, no longer used.
208+
static final int READ_ONLY_FLAG_MASK = 2; //Bit 1: Reserved, Set but not read.
209+
static final int EMPTY_FLAG_MASK = 4; //Bit 2:
210+
static final int COMPACT_FLAG_MASK = 8; //Bit 3:
211+
static final int ORDERED_FLAG_MASK = 16;//Bit 4:
212+
static final int SINGLEITEM_FLAG_MASK = 32;//Bit 5:
213+
//The last 2 bits (Bit 6,7) of the flags byte are reserved and assumed to be zero.
220214

221215
//Other constants
222216
static final int SER_VER = 3;
217+
static final int SER_VER_COMPRESSED = 4;
223218

224219
// serial version 4 compressed ordered sketch, not empty, not single item
225-
static final int ENTRY_BITS_BYTE_V4 = 3; // number of bits packed in deltas between hashes
226-
static final int NUM_ENTRIES_BYTES_BYTE_V4 = 4; // number of bytes used for the number of entries
227-
static final int THETA_LONG_V4 = 8; //8-byte aligned
220+
static final int ENTRY_BITS_BYTE_V4 = 3; // number of bits packed in deltas between hashes
221+
static final int NUM_ENTRIES_BYTES_BYTE_V4 = 4; // number of bytes used for the number of entries
222+
static final int THETA_LONG_V4 = 8; //8-byte aligned
228223

229224
/**
230225
* Computes the number of bytes required for an updatable sketch using a hash-table cache.
@@ -377,17 +372,13 @@ else if (preLongs == 3) {
377372
//@formatter:on
378373

379374
static int extractPreLongs(final MemorySegment seg) {
380-
return seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; //for SerVer 1,2,3
375+
return seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
381376
}
382377

383378
static int extractLgResizeFactor(final MemorySegment seg) {
384379
return seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT & 0X3;
385380
}
386381

387-
static int extractLgResizeRatioV1(final MemorySegment seg) {
388-
return seg.get(JAVA_BYTE, LG_RESIZE_RATIO_BYTE_V1) & 0X3;
389-
}
390-
391382
static int extractSerVer(final MemorySegment seg) {
392383
return seg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF;
393384
}
@@ -408,10 +399,6 @@ static int extractFlags(final MemorySegment seg) {
408399
return seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF;
409400
}
410401

411-
static int extractFlagsV1(final MemorySegment seg) {
412-
return seg.get(JAVA_BYTE, FLAGS_BYTE_V1) & 0XFF;
413-
}
414-
415402
static int extractSeedHash(final MemorySegment seg) {
416403
return seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT) & 0XFFFF;
417404
}

0 commit comments

Comments
 (0)