Skip to content

Commit c1633cb

Browse files
authored
Merge pull request #671 from apache/ffm_phase4
FFM Phase 4
2 parents 23f7c29 + 5aa035c commit c1633cb

50 files changed

Lines changed: 2241 additions & 1247 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.datasketches.common;
21+
22+
import static java.lang.foreign.ValueLayout.JAVA_BYTE;
23+
24+
import java.lang.foreign.MemorySegment;
25+
import java.util.Objects;
26+
27+
/**
28+
* Methods of serializing and deserializing arrays of Boolean as a bit array.
29+
*
30+
* @author Jon Malkin
31+
*/
32+
public class ArrayOfBooleansSerDe2 extends ArrayOfItemsSerDe2<Boolean> {
33+
/**
34+
* Computes number of bytes needed for packed bit encoding of the array of booleans. Rounds
35+
* partial bytes up to return a whole number of bytes.
36+
*
37+
* @param arrayLength Number of items in the array to serialize
38+
* @return Number of bytes needed to encode the array
39+
*/
40+
public static int computeBytesNeeded(final int arrayLength) {
41+
return (arrayLength >>> 3) + ((arrayLength & 0x7) > 0 ? 1 : 0);
42+
}
43+
44+
@Override
45+
public byte[] serializeToByteArray(final Boolean item) {
46+
Objects.requireNonNull(item, "Item must not be null");
47+
final byte[] bytes = new byte[1];
48+
bytes[0] = (item) ? (byte)1 : 0;
49+
return bytes;
50+
}
51+
52+
@Override
53+
public byte[] serializeToByteArray(final Boolean[] items) {
54+
Objects.requireNonNull(items, "Items must not be null");
55+
final int bytesNeeded = computeBytesNeeded(items.length);
56+
final byte[] bytes = new byte[bytesNeeded];
57+
final MemorySegment seg = MemorySegment.ofArray(bytes);
58+
59+
byte val = 0;
60+
for (int i = 0; i < items.length; ++i) {
61+
if (items[i]) {
62+
val |= 0x1 << (i & 0x7);
63+
}
64+
if ((i & 0x7) == 0x7) {
65+
seg.set(JAVA_BYTE, i >>> 3, val);
66+
val = 0;
67+
}
68+
}
69+
// write out any remaining values (if val=0, still good to be explicit)
70+
if ((items.length & 0x7) > 0) {
71+
seg.set(JAVA_BYTE, bytesNeeded - 1, val);
72+
}
73+
return bytes;
74+
}
75+
76+
@Override
77+
public Boolean[] deserializeFromMemorySegment(final MemorySegment seg, final int numItems) {
78+
return deserializeFromMemorySegment(seg, 0, numItems);
79+
}
80+
81+
@Override
82+
public Boolean[] deserializeFromMemorySegment(final MemorySegment seg, final long offsetBytes, final int numItems) {
83+
Objects.requireNonNull(seg, "MemorySegment must not be null");
84+
if (numItems <= 0) { return new Boolean[0]; }
85+
final int numBytes = computeBytesNeeded(numItems);
86+
Util.checkBounds(offsetBytes, numBytes, seg.byteSize());
87+
final Boolean[] array = new Boolean[numItems];
88+
89+
byte srcVal = 0;
90+
for (int i = 0, b = 0; i < numItems; ++i) {
91+
if ((i & 0x7) == 0x0) { // should trigger on first iteration
92+
srcVal = seg.get(JAVA_BYTE, offsetBytes + b++);
93+
}
94+
array[i] = ((srcVal >>> (i & 0x7)) & 0x1) == 1;
95+
}
96+
return array;
97+
}
98+
99+
@Override
100+
public int sizeOf(final Boolean item) {
101+
Objects.requireNonNull(item, "Item must not be null");
102+
return computeBytesNeeded(1);
103+
}
104+
105+
@Override //needs to override default due to the bit packing, which must be computed.
106+
public int sizeOf(final Boolean[] items) {
107+
Objects.requireNonNull(items, "Item must not be null");
108+
return computeBytesNeeded(items.length);
109+
}
110+
111+
@Override
112+
public int sizeOf(final MemorySegment seg, final long offsetBytes, final int numItems) {
113+
Objects.requireNonNull(seg, "MemorySegment must not be null");
114+
return computeBytesNeeded(numItems);
115+
}
116+
117+
@Override
118+
public String toString(final Boolean item) {
119+
if (item == null) { return "null"; }
120+
return item ? "true" : "false";
121+
}
122+
123+
@Override
124+
public Class<Boolean> getClassOfT() { return Boolean.class; }
125+
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.datasketches.common;
21+
22+
import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED;
23+
import static org.apache.datasketches.common.ByteArrayUtil.putDoubleLE;
24+
25+
import java.lang.foreign.MemorySegment;
26+
import java.util.Objects;
27+
28+
/**
29+
* Methods of serializing and deserializing arrays of Double.
30+
*
31+
* @author Alexander Saydakov
32+
*/
33+
public class ArrayOfDoublesSerDe2 extends ArrayOfItemsSerDe2<Double> {
34+
35+
@Override
36+
public byte[] serializeToByteArray(final Double item) {
37+
Objects.requireNonNull(item, "Item must not be null");
38+
final byte[] byteArr = new byte[Double.BYTES];
39+
putDoubleLE(byteArr, 0, item.doubleValue());
40+
return byteArr;
41+
}
42+
43+
@Override
44+
public byte[] serializeToByteArray(final Double[] items) {
45+
Objects.requireNonNull(items, "Items must not be null");
46+
if (items.length == 0) { return new byte[0]; }
47+
final byte[] bytes = new byte[Double.BYTES * items.length];
48+
final MemorySegment seg = MemorySegment.ofArray(bytes);
49+
long offset = 0;
50+
for (int i = 0; i < items.length; i++) {
51+
seg.set(JAVA_DOUBLE_UNALIGNED, offset, items[i]);
52+
offset += Double.BYTES;
53+
}
54+
return bytes;
55+
}
56+
57+
@Override
58+
public Double[] deserializeFromMemorySegment(final MemorySegment seg, final int numItems) {
59+
return deserializeFromMemorySegment(seg, 0, numItems);
60+
}
61+
62+
@Override
63+
public Double[] deserializeFromMemorySegment(final MemorySegment seg, final long offsetBytes, final int numItems) {
64+
Objects.requireNonNull(seg, "MemorySegment must not be null");
65+
if (numItems <= 0) { return new Double[0]; }
66+
long offset = offsetBytes;
67+
Util.checkBounds(offset, Double.BYTES * (long)numItems, seg.byteSize());
68+
final Double[] array = new Double[numItems];
69+
70+
for (int i = 0; i < numItems; i++) {
71+
array[i] = seg.get(JAVA_DOUBLE_UNALIGNED, offset);
72+
offset += Double.BYTES;
73+
}
74+
return array;
75+
}
76+
77+
@Override
78+
public int sizeOf(final Double item) {
79+
Objects.requireNonNull(item, "Item must not be null");
80+
return Double.BYTES;
81+
}
82+
83+
@Override //override because this is simpler
84+
public int sizeOf(final Double[] items) {
85+
Objects.requireNonNull(items, "Items must not be null");
86+
return items.length * Double.BYTES;
87+
}
88+
89+
@Override
90+
public int sizeOf(final MemorySegment seg, final long offsetBytes, final int numItems) {
91+
Objects.requireNonNull(seg, "MemorySegment must not be null");
92+
return numItems * Double.BYTES;
93+
}
94+
95+
@Override
96+
public String toString(final Double item) {
97+
if (item == null) { return "null"; }
98+
return item.toString();
99+
}
100+
101+
@Override
102+
public Class<Double> getClassOfT() { return Double.class; }
103+
}
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.datasketches.common;
21+
22+
import java.lang.foreign.MemorySegment;
23+
import java.util.Objects;
24+
25+
/**
26+
* Base class for serializing and deserializing custom types.
27+
* @param <T> Type of item
28+
*
29+
* @author Alexander Saydakov
30+
*/
31+
public abstract class ArrayOfItemsSerDe2<T> {
32+
33+
/**
34+
* Serialize a single unserialized item to a byte array.
35+
*
36+
* @param item the item to be serialized
37+
* @return serialized representation of the given item
38+
*/
39+
public abstract byte[] serializeToByteArray(T item);
40+
41+
/**
42+
* Serialize an array of unserialized items to a byte array of contiguous serialized items.
43+
*
44+
* @param items array of items to be serialized
45+
* @return contiguous, serialized representation of the given array of unserialized items
46+
*/
47+
public abstract byte[] serializeToByteArray(T[] items);
48+
49+
/**
50+
* Deserialize a contiguous sequence of serialized items from the given MemorySegment
51+
* starting at a MemorySegment offset of zero and extending numItems.
52+
*
53+
* @param seg MemorySegment containing a contiguous sequence of serialized items
54+
* @param numItems number of items in the contiguous serialized sequence.
55+
* @return array of deserialized items
56+
* @see #deserializeFromMemorySegment(MemorySegment, long, int)
57+
*/
58+
public T[] deserializeFromMemorySegment(final MemorySegment seg, final int numItems) {
59+
return deserializeFromMemorySegment(seg, 0, numItems);
60+
}
61+
62+
/**
63+
* Deserialize a contiguous sequence of serialized items from the given MemorySegment
64+
* starting at the given MemorySegment <i>offsetBytes</i> and extending numItems.
65+
*
66+
* @param seg MemorySegment containing a contiguous sequence of serialized items
67+
* @param offsetBytes the starting offset in the given MemorySegment.
68+
* @param numItems number of items in the contiguous serialized sequence.
69+
* @return array of deserialized items
70+
*/
71+
public abstract T[] deserializeFromMemorySegment(MemorySegment seg, long offsetBytes, int numItems);
72+
73+
/**
74+
* Returns the serialized size in bytes of a single unserialized item.
75+
* @param item a specific item
76+
* @return the serialized size in bytes of a single unserialized item.
77+
*/
78+
public abstract int sizeOf(T item);
79+
80+
/**
81+
* Returns the serialized size in bytes of the array of items.
82+
* @param items an array of items.
83+
* @return the serialized size in bytes of the array of items.
84+
*/
85+
public int sizeOf(final T[] items) {
86+
Objects.requireNonNull(items, "Items must not be null");
87+
int totalBytes = 0;
88+
for (int i = 0; i < items.length; i++) {
89+
totalBytes += sizeOf(items[i]);
90+
}
91+
return totalBytes;
92+
}
93+
94+
/**
95+
* Returns the serialized size in bytes of the number of contiguous serialized items in MemorySegment.
96+
* The capacity of the given MemorySegment can be much larger that the required size of the items.
97+
* @param seg the given MemorySegment.
98+
* @param offsetBytes the starting offset in the given MemorySegment.
99+
* @param numItems the number of serialized items contained in the MemorySegment
100+
* @return the serialized size in bytes of the given number of items.
101+
*/
102+
public abstract int sizeOf(MemorySegment seg, long offsetBytes, int numItems);
103+
104+
/**
105+
* Returns a human readable string of an item.
106+
* @param item a specific item
107+
* @return a human readable string of an item.
108+
*/
109+
public abstract String toString(T item);
110+
111+
/**
112+
* Returns the concrete class of type T
113+
* @return the concrete class of type T
114+
*/
115+
public abstract Class<T> getClassOfT();
116+
}

0 commit comments

Comments
 (0)