Skip to content

Commit 77da42d

Browse files
committed
This set of changes removed the largely redundant theta Sketches class.
1 parent 6612200 commit 77da42d

44 files changed

Lines changed: 612 additions & 1051 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/main/java/org/apache/datasketches/theta/CompactOperations.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID;
3434
import static org.apache.datasketches.theta.PreambleUtil.extractFlags;
3535
import static org.apache.datasketches.theta.PreambleUtil.extractLgArrLongs;
36-
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
3736
import static org.apache.datasketches.theta.PreambleUtil.extractSeedHash;
3837
import static org.apache.datasketches.theta.PreambleUtil.extractSerVer;
3938
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
@@ -122,7 +121,7 @@ static CompactSketch segmentToCompact(
122121
final MemorySegment dstWSeg)
123122
{
124123
//extract Pre0 fields and Flags from srcMem
125-
final int srcPreLongs = extractPreLongs(srcSeg);
124+
final int srcPreLongs = Sketch.getPreambleLongs(srcSeg);
126125
final int srcSerVer = extractSerVer(srcSeg); //not used
127126
final int srcFamId = extractFamilyID(srcSeg);
128127
final int srcLgArrLongs = extractLgArrLongs(srcSeg);

src/main/java/org/apache/datasketches/theta/CompactSketch.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID;
3636
import static org.apache.datasketches.theta.PreambleUtil.extractFlags;
3737
import static org.apache.datasketches.theta.PreambleUtil.extractNumEntriesBytesV4;
38-
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
3938
import static org.apache.datasketches.theta.PreambleUtil.extractSeedHash;
4039
import static org.apache.datasketches.theta.PreambleUtil.extractSerVer;
4140
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLongV4;
@@ -78,7 +77,8 @@ public abstract class CompactSketch extends Sketch {
7877
* @return a CompactSketch on the heap.
7978
*/
8079
public static CompactSketch heapify(final MemorySegment srcSeg) {
81-
return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED, false);
80+
//final boolean checkSeedHash = extractSerVer(srcSeg) != 1;
81+
return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED, false); //false for SerVer 1 only
8282
}
8383

8484
/**
@@ -459,7 +459,7 @@ private byte[] toByteArrayV4() {
459459
}
460460

461461
private static CompactSketch heapifyV4(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) {
462-
final int preLongs = extractPreLongs(srcSeg);
462+
final int preLongs = Sketch.getPreambleLongs(srcSeg);
463463
final int entryBits = extractEntryBitsV4(srcSeg);
464464
final int numEntriesBytes = extractNumEntriesBytesV4(srcSeg);
465465
final short seedHash = (short) extractSeedHash(srcSeg);

src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import static java.lang.foreign.ValueLayout.JAVA_BYTE;
2323
import static org.apache.datasketches.theta.PreambleUtil.extractEntryBitsV4;
2424
import static org.apache.datasketches.theta.PreambleUtil.extractNumEntriesBytesV4;
25-
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
2625
import static org.apache.datasketches.theta.PreambleUtil.extractSeedHash;
2726
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLongV4;
2827
import static org.apache.datasketches.theta.PreambleUtil.wholeBytesToHoldBits;
@@ -70,12 +69,12 @@ public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSe
7069
MemorySegment.copy(seg_, 0, dstSeg, 0, getCurrentBytes());
7170
return new DirectCompactSketch(dstSeg);
7271
}
73-
return CompactSketch.heapify(seg_);
72+
return CompactSketch.heapify(seg_, Util.DEFAULT_UPDATE_SEED);
7473
}
7574

7675
@Override
7776
public int getCurrentBytes() {
78-
final int preLongs = extractPreLongs(seg_);
77+
final int preLongs = Sketch.getPreambleLongs(seg_);
7978
final int entryBits = extractEntryBitsV4(seg_);
8079
final int numEntriesBytes = extractNumEntriesBytesV4(seg_);
8180
return preLongs * Long.BYTES + numEntriesBytes + wholeBytesToHoldBits(getRetainedEntries() * entryBits);
@@ -89,7 +88,7 @@ public int getRetainedEntries(final boolean valid) { //compact is always valid
8988
// number of entries is stored using variable length encoding
9089
// most significant bytes with all zeros are not stored
9190
// one byte in the preamble has the number of non-zero bytes used
92-
final int preLongs = extractPreLongs(seg_); // if > 1 then the second long has theta
91+
final int preLongs = Sketch.getPreambleLongs(seg_); // if > 1 then the second long has theta
9392
final int numEntriesBytes = extractNumEntriesBytesV4(seg_);
9493
int offsetBytes = preLongs > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE;
9594
int numEntries = 0;
@@ -101,7 +100,7 @@ public int getRetainedEntries(final boolean valid) { //compact is always valid
101100

102101
@Override
103102
public long getThetaLong() {
104-
final int preLongs = extractPreLongs(seg_);
103+
final int preLongs = Sketch.getPreambleLongs(seg_);
105104
return (preLongs > 1) ? extractThetaLongV4(seg_) : Long.MAX_VALUE;
106105
}
107106

@@ -119,7 +118,7 @@ public boolean isOrdered() {
119118
public HashIterator iterator() {
120119
return new MemorySegmentCompactCompressedHashIterator(
121120
seg_,
122-
(extractPreLongs(seg_) > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE)
121+
(Sketch.getPreambleLongs(seg_) > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE)
123122
+ extractNumEntriesBytesV4(seg_),
124123
extractEntryBitsV4(seg_),
125124
getRetainedEntries()

src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK;
2727
import static org.apache.datasketches.theta.PreambleUtil.extractCurCount;
2828
import static org.apache.datasketches.theta.PreambleUtil.extractFlags;
29-
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
3029
import static org.apache.datasketches.theta.PreambleUtil.extractSeedHash;
3130
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
3231
import static org.apache.datasketches.theta.SingleItemSketch.otherCheckForSingleItem;
@@ -53,7 +52,7 @@ class DirectCompactSketch extends CompactSketch {
5352

5453
/**
5554
* Construct this sketch with the given MemorySegment.
56-
* @param seg Read-only MemorySegment object with the order bit properly set.
55+
* @param seg (optional) Read-only MemorySegment object.
5756
*/
5857
DirectCompactSketch(final MemorySegment seg) {
5958
seg_ = seg;
@@ -82,21 +81,21 @@ public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSe
8281
@Override
8382
public int getCurrentBytes() {
8483
if (otherCheckForSingleItem(seg_)) { return 16; }
85-
final int preLongs = extractPreLongs(seg_);
84+
final int preLongs = Sketch.getPreambleLongs(seg_);
8685
final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_);
8786
return (preLongs + curCount) << 3;
8887
}
8988

9089
@Override
9190
public int getRetainedEntries(final boolean valid) { //compact is always valid
9291
if (otherCheckForSingleItem(seg_)) { return 1; }
93-
final int preLongs = extractPreLongs(seg_);
92+
final int preLongs = Sketch.getPreambleLongs(seg_);
9493
return (preLongs == 1) ? 0 : extractCurCount(seg_);
9594
}
9695

9796
@Override
9897
public long getThetaLong() {
99-
final int preLongs = extractPreLongs(seg_);
98+
final int preLongs = Sketch.getPreambleLongs(seg_);
10099
return (preLongs > 2) ? extractThetaLong(seg_) : Long.MAX_VALUE;
101100
}
102101

@@ -148,7 +147,7 @@ public byte[] toByteArray() {
148147
@Override
149148
long[] getCache() {
150149
if (otherCheckForSingleItem(seg_)) { return new long[] { seg_.get(JAVA_LONG_UNALIGNED, 8) }; }
151-
final int preLongs = extractPreLongs(seg_);
150+
final int preLongs = Sketch.getPreambleLongs(seg_);
152151
final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_);
153152
if (curCount > 0) {
154153
final long[] cache = new long[curCount];
@@ -160,12 +159,12 @@ long[] getCache() {
160159

161160
@Override
162161
int getCompactPreambleLongs() {
163-
return extractPreLongs(seg_);
162+
return Sketch.getPreambleLongs(seg_);
164163
}
165164

166165
@Override
167166
int getCurrentPreambleLongs() {
168-
return extractPreLongs(seg_);
167+
return Sketch.getPreambleLongs(seg_);
169168
}
170169

171170
@Override

src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@
3333
import static org.apache.datasketches.theta.PreambleUtil.THETA_LONG;
3434
import static org.apache.datasketches.theta.PreambleUtil.extractLgArrLongs;
3535
import static org.apache.datasketches.theta.PreambleUtil.extractLgNomLongs;
36-
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
37-
import static org.apache.datasketches.theta.PreambleUtil.getSegBytes;
36+
import static org.apache.datasketches.theta.PreambleUtil.getUpdatableSegBytes;
3837
import static org.apache.datasketches.theta.PreambleUtil.insertCurCount;
3938
import static org.apache.datasketches.theta.PreambleUtil.insertFamilyID;
4039
import static org.apache.datasketches.theta.PreambleUtil.insertFlags;
@@ -132,7 +131,7 @@ private DirectQuickSelectSketch(
132131
final int lgArrLongs = lgRF == 0 ? lgNomLongs + 1 : ThetaUtil.MIN_LG_ARR_LONGS;
133132

134133
//check Segment capacity
135-
final int minReqBytes = getSegBytes(lgArrLongs, preambleLongs);
134+
final int minReqBytes = getUpdatableSegBytes(lgArrLongs, preambleLongs);
136135
final long curSegCapBytes = dstSeg.byteSize();
137136
if (curSegCapBytes < minReqBytes) {
138137
throw new SketchesArgumentException(
@@ -176,7 +175,7 @@ static DirectQuickSelectSketch writableWrap(
176175
final MemorySegment srcSeg,
177176
final MemorySegmentRequest mSegReq,
178177
final long seed) {
179-
final int preambleLongs = extractPreLongs(srcSeg); //byte 0
178+
final int preambleLongs = Sketch.getPreambleLongs(srcSeg); //byte 0
180179
final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3
181180
final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4
182181

src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
import static org.apache.datasketches.theta.PreambleUtil.extractCurCount;
3838
import static org.apache.datasketches.theta.PreambleUtil.extractLgArrLongs;
3939
import static org.apache.datasketches.theta.PreambleUtil.extractLgNomLongs;
40-
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
4140
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
4241
import static org.apache.datasketches.theta.PreambleUtil.insertThetaLong;
4342

@@ -103,7 +102,7 @@ private DirectQuickSelectSketchR(final long seed, final MemorySegment srcSeg) {
103102
* @return instance of this sketch
104103
*/
105104
static DirectQuickSelectSketchR readOnlyWrap(final MemorySegment srcSeg, final long seed) {
106-
final int preambleLongs = extractPreLongs(srcSeg); //byte 0
105+
final int preambleLongs = Sketch.getPreambleLongs(srcSeg); //byte 0
107106
final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3
108107
final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4
109108

@@ -240,7 +239,7 @@ int getCompactPreambleLongs() {
240239

241240
@Override
242241
int getCurrentPreambleLongs() {
243-
return PreambleUtil.extractPreLongs(wseg_);
242+
return Sketch.getPreambleLongs(wseg_);
244243
}
245244

246245
@Override

src/main/java/org/apache/datasketches/theta/ForwardCompatibility.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
2323
import static org.apache.datasketches.theta.PreambleUtil.extractCurCount;
2424
import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID;
25-
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
2625
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
2726

2827
import java.lang.foreign.MemorySegment;
@@ -56,7 +55,7 @@ private ForwardCompatibility() { }
5655
*/
5756
static final CompactSketch heapify1to3(final MemorySegment srcSeg, final short seedHash) {
5857
final int segCap = (int) srcSeg.byteSize();
59-
final int preLongs = extractPreLongs(srcSeg); //always 3 for serVer 1
58+
final int preLongs = Sketch.getPreambleLongs(srcSeg); //always 3 for serVer 1
6059
if (preLongs != 3) {
6160
throw new SketchesArgumentException("PreLongs must be 3 for SerVer 1: " + preLongs);
6261
}
@@ -97,7 +96,7 @@ static final CompactSketch heapify1to3(final MemorySegment srcSeg, final short s
9796
*/
9897
static final CompactSketch heapify2to3(final MemorySegment srcSeg, final short seedHash) {
9998
final int segCap = (int) srcSeg.byteSize();
100-
final int preLongs = extractPreLongs(srcSeg); //1,2 or 3
99+
final int preLongs = Sketch.getPreambleLongs(srcSeg); //1,2 or 3
101100
final int familyId = extractFamilyID(srcSeg); //1,2,3,4
102101
if ((familyId < 1) || (familyId > 4)) {
103102
throw new SketchesArgumentException("Family (Sketch Type) must be 1 to 4: " + familyId);

src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import static java.lang.Math.min;
2424
import static java.lang.Math.sqrt;
2525
import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
26+
import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED;
2627
import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE;
2728
import static org.apache.datasketches.common.Util.checkBounds;
2829
import static org.apache.datasketches.theta.PreambleUtil.extractCurCount;
@@ -31,7 +32,6 @@
3132
import static org.apache.datasketches.theta.PreambleUtil.extractLgNomLongs;
3233
import static org.apache.datasketches.theta.PreambleUtil.extractLgResizeFactor;
3334
import static org.apache.datasketches.theta.PreambleUtil.extractP;
34-
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
3535
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
3636
import static org.apache.datasketches.theta.UpdateReturnState.InsertedCountIncremented;
3737
import static org.apache.datasketches.theta.UpdateReturnState.InsertedCountNotIncremented;
@@ -112,6 +112,17 @@ static HeapAlphaSketch newHeapInstance(final int lgNomLongs, final long seed, fi
112112
return has;
113113
}
114114

115+
/**
116+
* Heapify a sketch from a MemorySegment object containing sketch data.
117+
* @param srcSeg The source MemorySegment object.
118+
* It must have a size of at least 24 bytes.
119+
* The assumed seed is {@link org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED DEFAULT_UPDATE_SEED}
120+
* @return instance of this sketch
121+
*/
122+
static HeapAlphaSketch heapifyInstance(final MemorySegment srcSeg) {
123+
return heapifyInstance(srcSeg, DEFAULT_UPDATE_SEED);
124+
}
125+
115126
/**
116127
* Heapify a sketch from a MemorySegment object containing sketch data.
117128
* @param srcSeg The source MemorySegment object.
@@ -123,7 +134,7 @@ static HeapAlphaSketch newHeapInstance(final int lgNomLongs, final long seed, fi
123134
static HeapAlphaSketch heapifyInstance(final MemorySegment srcSeg, final long expectedSeed) {
124135
Objects.requireNonNull(srcSeg, "Source MemorySegment must not be null");
125136
checkBounds(0, 24, srcSeg.byteSize());
126-
final int preambleLongs = extractPreLongs(srcSeg); //byte 0
137+
final int preambleLongs = Sketch.getPreambleLongs(srcSeg); //byte 0
127138
final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3
128139
final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4
129140

@@ -234,14 +245,14 @@ public boolean isEmpty() {
234245
* <pre>
235246
* Long || Start Byte Adr:
236247
* Adr:
237-
* || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
238-
* 0 || Seed Hash | Flags | LgArr | LgNom | FamID | SerVer | lgRF | PreLongs=3 |
248+
* || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
249+
* 0 || Seed Hash | Flags | LgArr | LgNom | FamID=1 | SerVer=3 | lgRF | PreLongs=3 |
239250
*
240-
* || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
241-
* 1 ||-----------------p-----------------|----------Retained Entries Count---------------|
251+
* || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
252+
* 1 ||-----------------p-----------------|----------Retained Entries Count-------------------|
242253
*
243-
* || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 |
244-
* 2 ||---------------------------------Theta---------------------------------------------|
254+
* || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 |
255+
* 2 ||---------------------------------Theta-------------------------------------------------|
245256
* </pre>
246257
*/
247258

src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ final class HeapCompactSketch extends CompactSketch {
5757
* @param curCount correct value
5858
* @param thetaLong The correct
5959
* <a href="{@docRoot}/resources/dictionary.html#thetaLong">thetaLong</a>.
60+
* @param ordered true if cache is ordered.
6061
*/
6162
HeapCompactSketch(final long[] cache, final boolean empty, final short seedHash,
6263
final int curCount, final long thetaLong, final boolean ordered) {

src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
import static org.apache.datasketches.theta.PreambleUtil.extractLgNomLongs;
3131
import static org.apache.datasketches.theta.PreambleUtil.extractLgResizeFactor;
3232
import static org.apache.datasketches.theta.PreambleUtil.extractP;
33-
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
3433
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
3534
import static org.apache.datasketches.theta.UpdateReturnState.InsertedCountIncremented;
3635
import static org.apache.datasketches.theta.UpdateReturnState.InsertedCountIncrementedRebuilt;
@@ -108,7 +107,7 @@ private HeapQuickSelectSketch(final int lgNomLongs, final long seed, final float
108107
* @return instance of this sketch
109108
*/
110109
static HeapQuickSelectSketch heapifyInstance(final MemorySegment srcSeg, final long seed) {
111-
final int preambleLongs = extractPreLongs(srcSeg); //byte 0
110+
final int preambleLongs = Sketch.getPreambleLongs(srcSeg); //byte 0
112111
final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3
113112
final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4
114113

0 commit comments

Comments
 (0)