Skip to content

Commit 7e6a018

Browse files
committed
potential pathway of research - incomplete
1 parent c9b6a8b commit 7e6a018

3 files changed

Lines changed: 44 additions & 31 deletions

File tree

src/tech/v3/dataset/impl/column.clj

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141

4242

4343
(defn- reduce-column-buffer
44-
[rfn acc src missing primitive-missing-value sidx eidx]
44+
[rfn acc src missing primitive-missing-value missing-value sidx eidx]
4545
(let [sidx (long sidx)
4646
eidx (long eidx)
4747
^RoaringBitmap missing missing
@@ -52,10 +52,10 @@
5252
(RoaringBitmap/and missing (bitmap/->bitmap sidx eidx))))
5353
^java.util.function.LongBinaryOperator next-missing
5454
(hamf-fn/long-binary-operator
55-
sidx next-missing-idx
56-
(if (== sidx next-missing-idx)
57-
(long (if (.hasNext int-iter) (Integer/toUnsignedLong (.next int-iter)) -1))
58-
next-missing-idx))
55+
sidx next-missing-idx
56+
(if (== sidx next-missing-idx)
57+
(long (if (.hasNext int-iter) (Integer/toUnsignedLong (.next int-iter)) -1))
58+
next-missing-idx))
5959
missing-idx (.applyAsLong next-missing 0 0)]
6060
(cond
6161
(instance? IFn$OLO rfn)
@@ -90,15 +90,15 @@
9090
missing-idx missing-idx]
9191
(if (< sidx eidx)
9292
(let [acc (rfn acc (if (== sidx missing-idx)
93-
nil
93+
missing-value
9494
(.readObject src sidx)))]
9595
(if (reduced? acc)
9696
(deref acc)
9797
(recur (unchecked-inc sidx) acc (.applyAsLong next-missing sidx missing-idx))))
9898
acc)))))
9999

100100
(defn- kv-reduce-column-buffer
101-
[rfn acc src missing primitive-missing-value sidx eidx]
101+
[rfn acc src missing primitive-missing-value missing-value op-dtype sidx eidx]
102102
(let [sidx (long sidx)
103103
eidx (long eidx)
104104
^RoaringBitmap missing missing
@@ -130,7 +130,8 @@
130130
(defn ^:no-doc make-column-buffer
131131
(^Buffer [^RoaringBitmap missing data dtype op-dtype]
132132
(let [^Buffer src (dtype-proto/->buffer data)
133-
missing-value (column-base/datatype->missing-value dtype)
133+
missing-value (when (identical? dtype op-dtype)
134+
(column-base/datatype->missing-value dtype))
134135
primitive-missing-value (column-base/datatype->packed-missing-value dtype)]
135136
;;Sometimes we can utilize a pure passthrough.
136137
(if (.isEmpty missing)
@@ -170,7 +171,8 @@
170171
(readObject [rdr idx] (.readObject this (+ idx sidx)))
171172
(reduce [this rfn acc]
172173
(reduce-column-buffer rfn acc src missing
173-
primitive-missing-value sidx eidx))
174+
primitive-missing-value missing-value
175+
sidx eidx))
174176
(kvreduce [this rfn acc]
175177
(kv-reduce-column-buffer rfn acc src missing primitive-missing-value sidx eidx))))))
176178
(readLong [this idx]
@@ -182,7 +184,8 @@
182184
Double/NaN
183185
(.readDouble src idx)))
184186
(readObject [this idx]
185-
(when-not (.contains missing idx)
187+
(if (.contains missing idx)
188+
missing-value
186189
(.readObject src idx)))
187190
(writeLong [this idx val]
188191
(.remove missing (unchecked-int idx))
@@ -197,7 +200,7 @@
197200
(.writeObject src idx val))
198201
(.add missing (unchecked-int idx))))
199202
(reduce [this rfn acc]
200-
(reduce-column-buffer rfn acc src missing primitive-missing-value
203+
(reduce-column-buffer rfn acc src missing primitive-missing-value missing-value
201204
0 (.lsize src)))
202205
(kvreduce [this rfn acc]
203206
(kv-reduce-column-buffer rfn acc src missing primitive-missing-value

src/tech/v3/dataset/impl/dataset.clj

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -315,9 +315,12 @@
315315
retval
316316
(throw (RuntimeException. (str "Column not found: " cname)))))
317317

318-
(rowvecs [ds options]
318+
(rowvecs [ds {:keys [datatype] :as options}]
319319
(let [readers (hamf/object-array-list
320-
(lznc/map dtype/->reader columns))
320+
(lznc/map (if datatype
321+
#(dtype-proto/elemwise-reader-cast % datatype)
322+
dtype/->reader)
323+
columns))
321324
n-cols (count readers)
322325
n-rows (long (ds-proto/row-count ds))
323326
copying? (get options :copying? true)
@@ -335,24 +338,24 @@
335338
8 (row-vec-copying 8)
336339
(let [crange (hamf/range n-cols)]
337340
(hamf-fn/long->obj
338-
row-idx
339-
(->> crange
340-
(lznc/map (hamf-fn/long->obj
341-
col-idx (.readObject ^Buffer (.get readers col-idx) row-idx)))
342-
(hamf/vec)))))
341+
row-idx
342+
(->> crange
343+
(lznc/map (hamf-fn/long->obj
344+
col-idx (.readObject ^Buffer (.get readers col-idx) row-idx)))
345+
(hamf/vec)))))
343346
;;Non-copying in-place reader
344347
(let [crange (hamf/range n-cols)]
345348
(hamf-fn/long->obj
346-
row-idx
347-
(reify ObjectReader
348-
(lsize [this] n-cols)
349-
(readObject [this col-idx]
350-
(.readObject ^Buffer (.get readers col-idx) row-idx))
351-
(reduce [this rfn acc]
352-
(reduce (hamf-rf/long-accumulator
353-
acc col-idx (rfn acc ((.get readers col-idx) row-idx)))
354-
acc
355-
crange))))))]
349+
row-idx
350+
(reify ObjectReader
351+
(lsize [this] n-cols)
352+
(readObject [this col-idx]
353+
(.readObject ^Buffer (.get readers col-idx) row-idx))
354+
(reduce [this rfn acc]
355+
(reduce (hamf-rf/long-accumulator
356+
acc col-idx (rfn acc ((.get readers col-idx) row-idx)))
357+
acc
358+
crange))))))]
356359
(reify ObjectReader
357360
(lsize [rdr] n-rows)
358361
(readObject [rdr row-idx] (.invokePrim row-fn row-idx))
@@ -361,8 +364,8 @@
361364
(ds-proto/rowvecs options)))
362365
(reduce [rdr rfn acc]
363366
(reduce (hamf-rf/long-accumulator
364-
acc row-idx
365-
(rfn acc (.invokePrim row-fn row-idx)))
367+
acc row-idx
368+
(rfn acc (.invokePrim row-fn row-idx)))
366369
acc (hamf/range n-rows))))))
367370

368371

test/tech/v3/dataset_test.clj

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1299,7 +1299,7 @@
12991299
(let [src-ds (ds/->dataset {:a [1 2 nil 4]})
13001300
dst-ds (assoc src-ds :b (dfn/+ (:a src-ds ) 1))]
13011301
(is (= 1 (dtype/ecount (ds/missing (dst-ds :b)))))
1302-
(is (= [2.0 3.0 nil 5.0]
1302+
(is (= [2.0 3.0 ##NaN 5.0]
13031303
(vec (dst-ds :b))))))
13041304

13051305

@@ -1869,6 +1869,13 @@
18691869
(vec (ds/rows (ds/head (ds/column-map df2 :d (fn ^long [a b c] 1) [:a :b :c]) 5)))))
18701870
(is (= (vec (repeat 5 {:a 1 :b 2 :c 3 :d 1.0}))
18711871
(vec (ds/rows (ds/head (ds/column-map df2 :d (fn ^double [a b c] 1.0) [:a :b :c]) 5)))))))
1872+
1873+
(deftest tc-issue-200
1874+
(let [ds (ds/->dataset {:a [0 Double/NaN 2]})
1875+
]))
1876+
1877+
1878+
18721879
(comment
18731880
(require '[criterium.core :as crit])
18741881
(def data (vec (repeatedly 100000 (fn [] {:a (rand-int 20) :b (rand) :c (rand)}))))

0 commit comments

Comments
 (0)