Skip to content

Commit bd99ec9

Browse files
committed
FFM Phase 5: sketches req, tdigest
1 parent 5aa035c commit bd99ec9

17 files changed

Lines changed: 461 additions & 314 deletions
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.datasketches.common;
21+
22+
import java.lang.foreign.ValueLayout;
23+
import java.nio.ByteOrder;
24+
25+
/**
26+
* Value Layouts for Non-native Endianness
27+
*/
28+
public final class SpecialValueLayouts {
29+
30+
private SpecialValueLayouts() { }
31+
32+
/**
33+
* The static final for NON <i>ByteOrder.nativeOrder()</i>.
34+
*/
35+
public static final ByteOrder NON_NATIVE_BYTE_ORDER =
36+
(ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN) ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN;
37+
38+
//Non-Native Endian Layouts
39+
40+
/**
41+
* The static final for NON <i>ByteOrder.nativeOrder() char</i>.
42+
*/
43+
public static final ValueLayout.OfChar JAVA_CHAR_UNALIGNED_NON_NATIVE =
44+
ValueLayout.JAVA_CHAR_UNALIGNED.withOrder(NON_NATIVE_BYTE_ORDER);
45+
46+
/**
47+
* The static final for NON <i>ByteOrder.nativeOrder() double</i>.
48+
*/
49+
public static final ValueLayout.OfDouble JAVA_DOUBLE_UNALIGNED_NON_NATIVE =
50+
ValueLayout.JAVA_DOUBLE_UNALIGNED.withOrder(NON_NATIVE_BYTE_ORDER);
51+
52+
/**
53+
* The static final for NON <i>ByteOrder.nativeOrder() float</i>.
54+
*/
55+
public static final ValueLayout.OfFloat JAVA_FLOAT_UNALIGNED_NON_NATIVE =
56+
ValueLayout.JAVA_FLOAT_UNALIGNED.withOrder(NON_NATIVE_BYTE_ORDER);
57+
58+
/**
59+
* The static final for NON <i>ByteOrder.nativeOrder() int</i>.
60+
*/
61+
public static final ValueLayout.OfInt JAVA_INT_UNALIGNED_NON_NATIVE =
62+
ValueLayout.JAVA_INT_UNALIGNED.withOrder(NON_NATIVE_BYTE_ORDER);
63+
64+
/**
65+
* The static final for NON <i>ByteOrder.nativeOrder() long</i>.
66+
*/
67+
public static final ValueLayout.OfLong JAVA_LONG_UNALIGNED_NON_NATIVE =
68+
ValueLayout.JAVA_LONG_UNALIGNED.withOrder(NON_NATIVE_BYTE_ORDER);
69+
70+
/**
71+
* The static final for NON <i>ByteOrder.nativeOrder() short</i>.
72+
*/
73+
public static final ValueLayout.OfShort JAVA_SHORT_UNALIGNED_NON_NATIVE =
74+
ValueLayout.JAVA_SHORT_UNALIGNED.withOrder(NON_NATIVE_BYTE_ORDER);
75+
76+
//Big-Endian Layouts
77+
78+
/**
79+
* The static final for <i>ByteOrder.BIG_ENDIAN char</i>.
80+
*/
81+
public static final ValueLayout.OfChar JAVA_CHAR_UNALIGNED_BIG_ENDIAN =
82+
ValueLayout.JAVA_CHAR_UNALIGNED.withOrder(ByteOrder.BIG_ENDIAN);
83+
84+
/**
85+
* The static final for <i>ByteOrder.BIG_ENDIAN double</i>.
86+
*/
87+
public static final ValueLayout.OfDouble JAVA_DOUBLE_UNALIGNED_BIG_ENDIAN =
88+
ValueLayout.JAVA_DOUBLE_UNALIGNED.withOrder(ByteOrder.BIG_ENDIAN);
89+
90+
/**
91+
* The static final for <i>ByteOrder.BIG_ENDIAN float</i>.
92+
*/
93+
public static final ValueLayout.OfFloat JAVA_FLOAT_UNALIGNED_BIG_ENDIAN =
94+
ValueLayout.JAVA_FLOAT_UNALIGNED.withOrder(ByteOrder.BIG_ENDIAN);
95+
96+
/**
97+
* The static final for <i>ByteOrder.BIG_ENDIAN int</i>.
98+
*/
99+
public static final ValueLayout.OfInt JAVA_INT_UNALIGNED_BIG_ENDIAN =
100+
ValueLayout.JAVA_INT_UNALIGNED.withOrder(ByteOrder.BIG_ENDIAN);
101+
102+
/**
103+
* The static final for <i>ByteOrder.BIG_ENDIAN long</i>.
104+
*/
105+
public static final ValueLayout.OfLong JAVA_LONG_UNALIGNED_BIG_ENDIAN =
106+
ValueLayout.JAVA_LONG_UNALIGNED.withOrder(ByteOrder.BIG_ENDIAN);
107+
108+
/**
109+
* The static final for <i>ByteOrder.BIG_ENDIAN short</i>.
110+
*/
111+
public static final ValueLayout.OfShort JAVA_SHORT_UNALIGNED_BIG_ENDIAN =
112+
ValueLayout.JAVA_SHORT_UNALIGNED.withOrder(ByteOrder.BIG_ENDIAN);
113+
114+
}

src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,9 @@ public static <T> ItemsSketch<T> getInstance(final MemorySegment srcSeg,
278278

279279
//Get countArray
280280
final long[] countArray = new long[activeItems];
281-
final int reqBytes = preBytes + (activeItems * Long.BYTES); //count Arr only
281+
final long reqBytes = (preBytes + ((long)activeItems * Long.BYTES)); //count Arr only
282282
checkBounds(0, reqBytes, srcSeg.byteSize()); //check MemorySegment capacity
283+
283284
MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preBytes, countArray, 0, activeItems);
284285

285286
//Get itemArray
@@ -530,7 +531,7 @@ public byte[] toByteArray(final ArrayOfItemsSerDe2<T> serDe) {
530531

531532
final int preBytes = preLongs << 3;
532533
MemorySegment.copy(hashMap.getActiveValues(), 0, seg, JAVA_LONG_UNALIGNED, preBytes, activeItems);
533-
MemorySegment.copy(bytes, 0, seg, JAVA_BYTE, preBytes + (this.getNumActiveItems() << 3), bytes.length);
534+
MemorySegment.copy(bytes, 0, seg, JAVA_BYTE, preBytes + (activeItems << 3), bytes.length);
534535
}
535536
return outArr;
536537
}

src/main/java/org/apache/datasketches/req/FloatBuffer.java

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@
2222
import static org.apache.datasketches.common.Util.LS;
2323
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
2424

25+
import java.lang.foreign.MemorySegment;
2526
import java.util.Arrays;
2627

28+
import org.apache.datasketches.common.positional.PositionalSegment;
2729
import org.apache.datasketches.common.SketchesArgumentException;
28-
import org.apache.datasketches.memory.WritableBuffer;
29-
import org.apache.datasketches.memory.WritableMemory;
3030
import org.apache.datasketches.quantilescommon.InequalitySearch;
3131
import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
3232

@@ -176,7 +176,7 @@ FloatBuffer ensureCapacity(final int newCapacity) {
176176
* @return this
177177
*/
178178
private FloatBuffer ensureSpace(final int space) {
179-
if (count_ + space > capacity_) {
179+
if ((count_ + space) > capacity_) {
180180
final int newCap = count_ + space + delta_;
181181
ensureCapacity(newCap);
182182
}
@@ -219,7 +219,7 @@ int getCountWithCriterion(final float item, final QuantileSearchCriteria searchC
219219
}
220220
final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.LE : InequalitySearch.LT;
221221
final int index = InequalitySearch.find(arr_, low, high, item, crit);
222-
return index == -1 ? 0 : index - low + 1;
222+
return index == -1 ? 0 : (index - low) + 1;
223223
}
224224

225225
/**
@@ -234,8 +234,8 @@ int getCountWithCriterion(final float item, final QuantileSearchCriteria searchC
234234
* @return the selected odds from the range
235235
*/
236236
FloatBuffer getEvensOrOdds(final int startOffset, final int endOffset, final boolean odds) {
237-
final int start = spaceAtBottom_ ? capacity_ - count_ + startOffset : startOffset;
238-
final int end = spaceAtBottom_ ? capacity_ - count_ + endOffset : endOffset;
237+
final int start = spaceAtBottom_ ? (capacity_ - count_) + startOffset : startOffset;
238+
final int end = spaceAtBottom_ ? (capacity_ - count_) + endOffset : endOffset;
239239
sort();
240240
final int range = endOffset - startOffset;
241241
if ((range & 1) == 1) {
@@ -265,7 +265,7 @@ float getItemFromIndex(final int index) {
265265
* @return an item given its offset
266266
*/
267267
float getItem(final int offset) {
268-
final int index = spaceAtBottom_ ? capacity_ - count_ + offset : offset;
268+
final int index = spaceAtBottom_ ? (capacity_ - count_) + offset : offset;
269269
return arr_[index];
270270
}
271271

@@ -317,11 +317,11 @@ boolean isEmpty() {
317317
* @return true iff this is exactly equal to that FloatBuffer.
318318
*/
319319
boolean isEqualTo(final FloatBuffer that) {
320-
if (capacity_ != that.capacity_
321-
|| count_ != that.count_
322-
|| delta_ != that.delta_
323-
|| sorted_ != that.sorted_
324-
|| spaceAtBottom_ != that.spaceAtBottom_) { return false; }
320+
if ((capacity_ != that.capacity_)
321+
|| (count_ != that.count_)
322+
|| (delta_ != that.delta_)
323+
|| (sorted_ != that.sorted_)
324+
|| (spaceAtBottom_ != that.spaceAtBottom_)) { return false; }
325325
for (int i = 0; i < capacity_; i++) {
326326
if (arr_[i] != that.arr_[i]) { return false; }
327327
}
@@ -354,7 +354,7 @@ FloatBuffer mergeSortIn(final FloatBuffer bufIn) {
354354
int i = capacity_ - count_;
355355
int j = bufIn.capacity_ - bufIn.count_;
356356
for (int k = tgtStart; k < capacity_; k++) {
357-
if (i < capacity_ && j < bufIn.capacity_) { //both valid
357+
if ((i < capacity_) && (j < bufIn.capacity_)) { //both valid
358358
arr_[k] = arr_[i] <= arrIn[j] ? arr_[i++] : arrIn[j++];
359359
} else if (i < capacity_) { //i is valid
360360
arr_[k] = arr_[i++];
@@ -368,7 +368,7 @@ FloatBuffer mergeSortIn(final FloatBuffer bufIn) {
368368
int i = count_ - 1;
369369
int j = bufInLen - 1;
370370
for (int k = totLen; k-- > 0; ) {
371-
if (i >= 0 && j >= 0) { //both valid
371+
if ((i >= 0) && (j >= 0)) { //both valid
372372
arr_[k] = arr_[i] >= arrIn[j] ? arr_[i--] : arrIn[j--];
373373
} else if (i >= 0) { //i is valid
374374
arr_[k] = arr_[i--];
@@ -401,18 +401,18 @@ FloatBuffer sort() {
401401
byte[] floatsToBytes() {
402402
final int bytes = Float.BYTES * count_;
403403
final byte[] arr = new byte[bytes];
404-
final WritableBuffer wbuf = WritableMemory.writableWrap(arr).asWritableBuffer();
404+
final PositionalSegment posSeg = PositionalSegment.wrap(MemorySegment.ofArray(arr));
405405
if (spaceAtBottom_) {
406-
wbuf.putFloatArray(arr_, capacity_ - count_, count_);
406+
posSeg.setFloatArray(arr_, capacity_ - count_, count_);
407407
} else {
408-
wbuf.putFloatArray(arr_, 0, count_);
408+
posSeg.setFloatArray(arr_, 0, count_);
409409
}
410-
assert wbuf.getPosition() == bytes;
410+
assert posSeg.getPosition() == bytes;
411411
return arr;
412412
}
413413

414414
/**
415-
* Returns a printable formatted string of the items of this buffer separated by a single space.
415+
* Returns a printable formatted string of the items of this FloatBuffer separated by a single space.
416416
* @param fmt The format for each printed item.
417417
* @param width the number of items to print per line
418418
* @return a printable, formatted string of the items of this buffer.
@@ -427,7 +427,7 @@ String toHorizList(final String fmt, final int width) {
427427
for (int i = start; i < end; i++) {
428428
final float v = arr_[i];
429429
final String str = String.format(fmt, v);
430-
if (i > start && ++cnt % width == 0) { sb.append(LS).append(spaces); }
430+
if ((i > start) && ((++cnt % width) == 0)) { sb.append(LS).append(spaces); }
431431
sb.append(str);
432432
}
433433
return sb.toString();

src/main/java/org/apache/datasketches/req/ReqCompactor.java

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@
2525
import static org.apache.datasketches.req.ReqSketch.MIN_K;
2626
import static org.apache.datasketches.req.ReqSketch.NOM_CAP_MULT;
2727

28+
import java.lang.foreign.MemorySegment;
2829
import java.util.Random;
2930

30-
import org.apache.datasketches.memory.WritableBuffer;
31-
import org.apache.datasketches.memory.WritableMemory;
31+
import org.apache.datasketches.common.positional.PositionalSegment;
3232
import org.apache.datasketches.req.ReqSketch.CompactorReturn;
3333

3434
/**
@@ -122,7 +122,7 @@ FloatBuffer compact(final CompactorReturn cReturn, final Random rand) {
122122
final long compactionRange = computeCompactionRange(secsToCompact);
123123
final int compactionStart = (int) (compactionRange & 0xFFFF_FFFFL); //low 32
124124
final int compactionEnd = (int) (compactionRange >>> 32); //high 32
125-
assert compactionEnd - compactionStart >= 2;
125+
assert (compactionEnd - compactionStart) >= 2;
126126

127127
if ((state & 1L) == 1L) { coin = !coin; } //if numCompactions odd, flip coin;
128128
else { coin = rand.nextBoolean(); } //random coin flip
@@ -137,7 +137,7 @@ FloatBuffer compact(final CompactorReturn cReturn, final Random rand) {
137137
buf.trimCount(buf.getCount() - (compactionEnd - compactionStart));
138138
state += 1;
139139
ensureEnoughSections();
140-
cReturn.deltaRetItems = buf.getCount() - startRetItems + promote.getCount();
140+
cReturn.deltaRetItems = (buf.getCount() - startRetItems) + promote.getCount();
141141
cReturn.deltaNomSize = getNomCapacity() - startNomCap;
142142
if (reqDebug != null) { reqDebug.emitCompactionDone(lgWeight); }
143143
return promote;
@@ -175,7 +175,7 @@ final int getNomCapacity() { //called from constructor
175175
*/
176176
int getSerializationBytes() {
177177
final int count = buf.getCount();
178-
return 8 + 4 + 1 + 1 + 2 + 4 + count * Float.BYTES; // 20 + array
178+
return 8 + 4 + 1 + 1 + 2 + 4 + (count * Float.BYTES); // 20 + array
179179
}
180180

181181
int getNumSections() {
@@ -227,9 +227,9 @@ ReqCompactor merge(final ReqCompactor other) {
227227
private boolean ensureEnoughSections() {
228228
final float szf;
229229
final int ne;
230-
if (state >= 1L << numSections - 1
231-
&& sectionSize > MIN_K
232-
&& (ne = nearestEven(szf = (float)(sectionSizeFlt / SQRT2))) >= MIN_K)
230+
if ((state >= (1L << (numSections - 1)))
231+
&& (sectionSize > MIN_K)
232+
&& ((ne = nearestEven(szf = (float)(sectionSizeFlt / SQRT2))) >= MIN_K))
233233
{
234234
sectionSizeFlt = szf;
235235
sectionSize = ne;
@@ -248,9 +248,9 @@ private boolean ensureEnoughSections() {
248248
*/
249249
private long computeCompactionRange(final int secsToCompact) {
250250
final int bufLen = buf.getCount();
251-
int nonCompact = getNomCapacity() / 2 + (numSections - secsToCompact) * sectionSize;
251+
int nonCompact = (getNomCapacity() / 2) + ((numSections - secsToCompact) * sectionSize);
252252
//make compacted region even:
253-
nonCompact = (bufLen - nonCompact & 1) == 1 ? nonCompact + 1 : nonCompact;
253+
nonCompact = ((bufLen - nonCompact) & 1) == 1 ? nonCompact + 1 : nonCompact;
254254
final long low = hra ? 0 : nonCompact;
255255
final long high = hra ? bufLen - nonCompact : bufLen;
256256
return (high << 32) + low;
@@ -293,16 +293,16 @@ static final int nearestEven(final float fltVal) {
293293
byte[] toByteArray() {
294294
final int bytes = getSerializationBytes();
295295
final byte[] arr = new byte[bytes];
296-
final WritableBuffer wbuf = WritableMemory.writableWrap(arr).asWritableBuffer();
297-
wbuf.putLong(state);
298-
wbuf.putFloat(sectionSizeFlt);
299-
wbuf.putByte(lgWeight);
300-
wbuf.putByte(numSections);
301-
wbuf.incrementPosition(2); //pad 2
296+
final PositionalSegment posSeg = PositionalSegment.wrap(MemorySegment.ofArray(arr));
297+
posSeg.setLong(state);
298+
posSeg.setFloat(sectionSizeFlt);
299+
posSeg.setByte(lgWeight);
300+
posSeg.setByte(numSections);
301+
posSeg.incrementPosition(2); //pad 2
302302
//buf.sort(); //sort if necessary
303-
wbuf.putInt(buf.getCount()); //count
304-
wbuf.putByteArray(buf.floatsToBytes(), 0, Float.BYTES * buf.getCount());
305-
assert wbuf.getPosition() == bytes;
303+
posSeg.setInt(buf.getCount()); //count
304+
posSeg.setByteArray(buf.floatsToBytes(), 0, Float.BYTES * buf.getCount());
305+
assert posSeg.getPosition() == bytes;
306306
return arr;
307307
}
308308

@@ -319,10 +319,9 @@ String toListPrefix() {
319319
final int secSz = getSectionSize();
320320
final int numSec = getNumSections();
321321
final long num = getState();
322-
final String prefix = String.format(
322+
return String.format(
323323
" C:%d Len:%d NomSz:%d SecSz:%d NumSec:%d State:%d",
324324
h, len, nom, secSz, numSec, num);
325-
return prefix;
326325
}
327326

328327
}

0 commit comments

Comments
 (0)