Skip to content

Commit 487faa0

Browse files
committed
transpose-by-key
1 parent 7459fd8 commit 487faa0

2 files changed

Lines changed: 61 additions & 1 deletion

File tree

src/tech/v3/dataset_api.clj

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,7 @@ test/data/stocks.csv [10 3]:
601601
[col missing scalar-val]
602602
(let [missing (bitmap/->bitmap missing)]
603603
(if (.isEmpty missing)
604-
col
604+
col
605605
(let [cbuf (dtype/->buffer (ds-proto/column-buffer col))
606606
col-dt (dtype/elemwise-datatype col)
607607
ec (.lsize cbuf)]
@@ -1469,6 +1469,27 @@ _unnamed [4 5]:
14691469
(column-names final-ds))))))))))
14701470

14711471

1472+
(defn transpose-by-key
1473+
"Transposes a dataset by turning the values in column `k` into new column headers.
1474+
1475+
The remaining columns are rotated such that their original names become a
1476+
metadata column (defaulting to `:col`).
1477+
1478+
Arguments:
1479+
* `ds`: The source dataset (tech.ml.dataset).
1480+
* `k`: The column key whose row values will become the new column names.
1481+
* `options`: (Optional) A map where `:column-name` specifies the name of the
1482+
newly created column containing the old header names. Defaults to `:col`.
1483+
1484+
Returns a new dataset where rows have been flipped to columns based on `k`."
1485+
([ds k] (transpose-by-key ds k nil))
1486+
([ds k options]
1487+
(column ds k)
1488+
(let [sub-ds (dissoc ds k)]
1489+
(->> (lznc/map hamf/vector (column ds k) (rowvecs sub-ds))
1490+
(into {(get options :column-name :col) (column-names sub-ds)})
1491+
(->>dataset)))))
1492+
14721493

14731494
(comment
14741495
(export-symbols/write-api! 'tech.v3.dataset-api
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
(ns tech.v3.libs.arrow.jpnz-lz4
2+
(:require [tech.v3.datatype :as dtype]
3+
[clojure.tools.logging :as log])
4+
(:import [tech.v3.datatype.array_buffer ArrayBuffer]
5+
[java.io ByteArrayOutputStream ByteArrayInputStream]))
6+
7+
(defn- ensure-bytes-array-buffer
8+
^ArrayBuffer [data]
9+
(if-let [ary-buf (dtype/as-array-buffer data)]
10+
(if (= :int8 (dtype/elemwise-datatype ary-buf))
11+
ary-buf
12+
(dtype/make-container :int8 data))))
13+
14+
(defn create-jpnz-lz4-frame-compressor
15+
[comp-map]
16+
(assert (= :lz4 (get comp-map :compression-type)))
17+
(fn [compbuf dstbuf]
18+
(let [^ByteArrayOutputStream dstbuf (or dstbuf (java.io.ByteArrayOutputStream.))
19+
os (net.jpountz.lz4.LZ4FrameOutputStream. dstbuf)
20+
srcbuf (ensure-bytes-array-buffer compbuf)
21+
^bytes src-data (.ary-data srcbuf)]
22+
(.write os src-data (unchecked-int (.offset srcbuf)) (unchecked-int (.n-elems srcbuf)))
23+
(.close os)
24+
(let [final-bytes (.toByteArray dstbuf)]
25+
(.reset dstbuf)
26+
{:writer-cache dstbuf
27+
:dst-buffer final-bytes}))))
28+
29+
(defn create-jpnz-lz4-decompressor
30+
[]
31+
(log/warn "Unable to load native lz4 library, falling back to jpountz.
32+
Dependent block frames are not supported!!")
33+
(fn [srcbuf dstbuf]
34+
(let [src-byte-data (dtype/->byte-array srcbuf)
35+
bis (ByteArrayInputStream. src-byte-data)
36+
is (net.jpountz.lz4.LZ4FrameInputStream. bis)
37+
temp-dstbuf (byte-array (dtype/ecount dstbuf))]
38+
(.read is temp-dstbuf)
39+
(dtype/copy! temp-dstbuf dstbuf))))

0 commit comments

Comments
 (0)