Skip to content

Commit 644b846

Browse files
committed
Frequency sketches migrated to FFM.
1 parent 23f7c29 commit 644b846

21 files changed

Lines changed: 4737 additions & 4 deletions
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.datasketches.common;
21+
22+
import java.lang.foreign.MemorySegment;
23+
import java.util.Objects;
24+
25+
/**
26+
* Base class for serializing and deserializing custom types.
27+
* @param <T> Type of item
28+
*
29+
* @author Alexander Saydakov
30+
*/
31+
public abstract class ArrayOfItemsSerDe2<T> {
32+
33+
/**
34+
* Serialize a single unserialized item to a byte array.
35+
*
36+
* @param item the item to be serialized
37+
* @return serialized representation of the given item
38+
*/
39+
public abstract byte[] serializeToByteArray(T item);
40+
41+
/**
42+
* Serialize an array of unserialized items to a byte array of contiguous serialized items.
43+
*
44+
* @param items array of items to be serialized
45+
* @return contiguous, serialized representation of the given array of unserialized items
46+
*/
47+
public abstract byte[] serializeToByteArray(T[] items);
48+
49+
/**
50+
* Deserialize a contiguous sequence of serialized items from the given MemorySegment
51+
* starting at a MemorySegment offset of zero and extending numItems.
52+
*
53+
* @param seg MemorySegment containing a contiguous sequence of serialized items
54+
* @param numItems number of items in the contiguous serialized sequence.
55+
* @return array of deserialized items
56+
* @see #deserializeFromMemorySegment(MemorySegment, long, int)
57+
*/
58+
public T[] deserializeFromMemorySegment(final MemorySegment seg, final int numItems) {
59+
return deserializeFromMemorySegment(seg, 0, numItems);
60+
}
61+
62+
/**
63+
* Deserialize a contiguous sequence of serialized items from the given MemorySegment
64+
* starting at the given MemorySegment <i>offsetBytes</i> and extending numItems.
65+
*
66+
* @param seg MemorySegment containing a contiguous sequence of serialized items
67+
* @param offsetBytes the starting offset in the given MemorySegment.
68+
* @param numItems number of items in the contiguous serialized sequence.
69+
* @return array of deserialized items
70+
*/
71+
public abstract T[] deserializeFromMemorySegment(MemorySegment seg, long offsetBytes, int numItems);
72+
73+
/**
74+
* Returns the serialized size in bytes of a single unserialized item.
75+
* @param item a specific item
76+
* @return the serialized size in bytes of a single unserialized item.
77+
*/
78+
public abstract int sizeOf(T item);
79+
80+
/**
81+
* Returns the serialized size in bytes of the array of items.
82+
* @param items an array of items.
83+
* @return the serialized size in bytes of the array of items.
84+
*/
85+
public int sizeOf(final T[] items) {
86+
Objects.requireNonNull(items, "Items must not be null");
87+
int totalBytes = 0;
88+
for (int i = 0; i < items.length; i++) {
89+
totalBytes += sizeOf(items[i]);
90+
}
91+
return totalBytes;
92+
}
93+
94+
/**
95+
* Returns the serialized size in bytes of the number of contiguous serialized items in MemorySegment.
96+
* The capacity of the given MemorySegment can be much larger that the required size of the items.
97+
* @param seg the given MemorySegment.
98+
* @param offsetBytes the starting offset in the given MemorySegment.
99+
* @param numItems the number of serialized items contained in the MemorySegment
100+
* @return the serialized size in bytes of the given number of items.
101+
*/
102+
public abstract int sizeOf(MemorySegment seg, long offsetBytes, int numItems);
103+
104+
/**
105+
* Returns a human readable string of an item.
106+
* @param item a specific item
107+
* @return a human readable string of an item.
108+
*/
109+
public abstract String toString(T item);
110+
111+
/**
112+
* Returns the concrete class of type T
113+
* @return the concrete class of type T
114+
*/
115+
public abstract Class<T> getClassOfT();
116+
}
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.datasketches.common;
21+
22+
import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
23+
import static org.apache.datasketches.common.ByteArrayUtil.putLongLE;
24+
25+
import java.lang.foreign.MemorySegment;
26+
import java.util.Objects;
27+
28+
/**
29+
* Methods of serializing and deserializing arrays of Long.
30+
*
31+
* @author Alexander Saydakov
32+
*/
33+
public class ArrayOfLongsSerDe2 extends ArrayOfItemsSerDe2<Long> {
34+
35+
@Override
36+
public byte[] serializeToByteArray(final Long item) {
37+
Objects.requireNonNull(item, "Item must not be null");
38+
final byte[] byteArr = new byte[Long.BYTES];
39+
putLongLE(byteArr, 0, item.longValue());
40+
return byteArr;
41+
}
42+
43+
@Override
44+
public byte[] serializeToByteArray(final Long[] items) {
45+
Objects.requireNonNull(items, "Items must not be null");
46+
if (items.length == 0) { return new byte[0]; }
47+
final byte[] bytes = new byte[Long.BYTES * items.length];
48+
final MemorySegment seg = MemorySegment.ofArray(bytes);
49+
long offset = 0;
50+
for (int i = 0; i < items.length; i++) {
51+
seg.set(JAVA_LONG_UNALIGNED, offset, items[i]);
52+
offset += Long.BYTES;
53+
}
54+
return bytes;
55+
}
56+
57+
@Override
58+
public Long[] deserializeFromMemorySegment(final MemorySegment seg, final int numItems) {
59+
return deserializeFromMemorySegment(seg, 0, numItems);
60+
}
61+
62+
@Override
63+
public Long[] deserializeFromMemorySegment(final MemorySegment seg, final long offsetBytes, final int numItems) {
64+
Objects.requireNonNull(seg, "MemorySegment must not be null");
65+
if (numItems <= 0) { return new Long[0]; }
66+
long offset = offsetBytes;
67+
Util.checkBounds(offset, Long.BYTES * (long)numItems, seg.byteSize());
68+
final Long[] array = new Long[numItems];
69+
for (int i = 0; i < numItems; i++) {
70+
array[i] = seg.get(JAVA_LONG_UNALIGNED, offset);
71+
offset += Long.BYTES;
72+
}
73+
return array;
74+
}
75+
76+
@Override
77+
public int sizeOf(final Long item) {
78+
Objects.requireNonNull(item, "Item must not be null");
79+
return Long.BYTES;
80+
}
81+
82+
@Override //override because this is simpler
83+
public int sizeOf(final Long[] items) {
84+
Objects.requireNonNull(items, "Items must not be null");
85+
return items.length * Long.BYTES;
86+
}
87+
88+
@Override
89+
public int sizeOf(final MemorySegment seg, final long offsetBytes, final int numItems) {
90+
Objects.requireNonNull(seg, "MemorySegment must not be null");
91+
return numItems * Long.BYTES;
92+
}
93+
94+
@Override
95+
public String toString(final Long item) {
96+
if (item == null) { return "null"; }
97+
return item.toString();
98+
}
99+
100+
@Override
101+
public Class<Long> getClassOfT() { return Long.class; }
102+
}
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.datasketches.common;
21+
22+
import static java.lang.foreign.ValueLayout.JAVA_BYTE;
23+
import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
24+
import static org.apache.datasketches.common.ByteArrayUtil.copyBytes;
25+
import static org.apache.datasketches.common.ByteArrayUtil.putIntLE;
26+
27+
import java.lang.foreign.MemorySegment;
28+
import java.nio.charset.StandardCharsets;
29+
import java.util.Objects;
30+
31+
/**
32+
* Methods of serializing and deserializing arrays of String.
33+
* This class serializes strings in UTF-8 format, which is more compact compared to
34+
* {@link ArrayOfUtf16StringsSerDe}. In an extreme case when all strings are in ASCII,
35+
* this method is 2 times more compact, but it takes more time to encode and decode
36+
* by a factor of 1.5 to 2.
37+
*
38+
* <p>The serialization
39+
*
40+
* @author Alexander Saydakov
41+
*/
42+
public class ArrayOfStringsSerDe2 extends ArrayOfItemsSerDe2<String> {
43+
44+
@Override
45+
public byte[] serializeToByteArray(final String item) {
46+
Objects.requireNonNull(item, "Item must not be null");
47+
if (item.isEmpty()) { return new byte[] { 0, 0, 0, 0 }; }
48+
final byte[] utf8ByteArr = item.getBytes(StandardCharsets.UTF_8);
49+
final int numBytes = utf8ByteArr.length;
50+
final byte[] out = new byte[numBytes + Integer.BYTES];
51+
copyBytes(utf8ByteArr, 0, out, 4, numBytes);
52+
putIntLE(out, 0, numBytes);
53+
return out;
54+
}
55+
56+
@Override
57+
public byte[] serializeToByteArray(final String[] items) {
58+
Objects.requireNonNull(items, "Items must not be null");
59+
if (items.length == 0) { return new byte[0]; }
60+
int totalBytes = 0;
61+
final int numItems = items.length;
62+
final byte[][] serialized2DArray = new byte[numItems][];
63+
for (int i = 0; i < numItems; i++) {
64+
serialized2DArray[i] = items[i].getBytes(StandardCharsets.UTF_8);
65+
totalBytes += serialized2DArray[i].length + Integer.BYTES;
66+
}
67+
final byte[] bytesOut = new byte[totalBytes];
68+
int offset = 0;
69+
for (int i = 0; i < numItems; i++) {
70+
final int utf8len = serialized2DArray[i].length;
71+
putIntLE(bytesOut, offset, utf8len);
72+
offset += Integer.BYTES;
73+
copyBytes(serialized2DArray[i], 0, bytesOut, offset, utf8len);
74+
offset += utf8len;
75+
}
76+
return bytesOut;
77+
}
78+
79+
@Override
80+
public String[] deserializeFromMemorySegment(final MemorySegment seg, final int numItems) {
81+
return deserializeFromMemorySegment(seg, 0, numItems);
82+
}
83+
84+
@Override
85+
public String[] deserializeFromMemorySegment(final MemorySegment seg, final long offsetBytes, final int numItems) {
86+
Objects.requireNonNull(seg, "MemorySegment must not be null");
87+
if (numItems <= 0) { return new String[0]; }
88+
final String[] array = new String[numItems];
89+
long offset = offsetBytes;
90+
for (int i = 0; i < numItems; i++) {
91+
Util.checkBounds(offset, Integer.BYTES, seg.byteSize());
92+
final int strLength = seg.get(JAVA_INT_UNALIGNED, offset);
93+
offset += Integer.BYTES;
94+
final byte[] utf8Bytes = new byte[strLength];
95+
Util.checkBounds(offset, strLength, seg.byteSize());
96+
MemorySegment.copy(seg, JAVA_BYTE, offset, utf8Bytes, 0, strLength);
97+
offset += strLength;
98+
array[i] = new String(utf8Bytes, StandardCharsets.UTF_8);
99+
}
100+
return array;
101+
}
102+
103+
@Override
104+
public int sizeOf(final String item) {
105+
Objects.requireNonNull(item, "Item must not be null");
106+
if (item.isEmpty()) { return Integer.BYTES; }
107+
return item.getBytes(StandardCharsets.UTF_8).length + Integer.BYTES;
108+
}
109+
110+
@Override
111+
public int sizeOf(final MemorySegment seg, final long offsetBytes, final int numItems) {
112+
Objects.requireNonNull(seg, "MemorySegment must not be null");
113+
if (numItems <= 0) { return 0; }
114+
long offset = offsetBytes;
115+
final long segCap = seg.byteSize();
116+
for (int i = 0; i < numItems; i++) {
117+
Util.checkBounds(offset, Integer.BYTES, segCap);
118+
final int itemLenBytes = seg.get(JAVA_INT_UNALIGNED, offset);
119+
offset += Integer.BYTES;
120+
Util.checkBounds(offset, itemLenBytes, segCap);
121+
offset += itemLenBytes;
122+
}
123+
return (int)(offset - offsetBytes);
124+
}
125+
126+
@Override
127+
public String toString(final String item) {
128+
if (item == null) { return "null"; }
129+
return item;
130+
}
131+
132+
@Override
133+
public Class<String> getClassOfT() { return String.class; }
134+
}

0 commit comments

Comments
 (0)