From 52f5239159845ba84f72006ded7e4d88cbd30573 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 25 May 2026 15:01:33 -0400 Subject: [PATCH] arena: handle mismatched null inner cells in trivial binary ToT ops arena_trivial_binary sized the result by the left operand only and read the right operand unconditionally. A cell present in left but null in right then read a null slab (segfault), and a cell present in right but null in left was silently dropped. Two ToT arrays with the same outer shape can differ in which inner cells are populated within an outer tile (e.g. occ_tile_size>1 aggregates several pairs, some screened to null), which exercises exactly this. Size the result by the union of left/right cell presence and combine a lone cell against an implicit zero slab: correct for the linear ops (add: l+0 / 0+r; subt: l-0 / 0-r) and numerically correct for mult (l*0 = 0). The same kernel backs both Tensor and Tensor>, so both inner types are fixed. Add regression tests (lone-left, both, both-null, lone-right cells) for add/subt/mult on both inner types. --- src/TiledArray/tensor/arena_kernels.h | 35 +++++++-- tests/arena_tensor_kernels.cpp | 96 ++++++++++++++++++++++++ tests/arena_tot_trivial.cpp | 104 ++++++++++++++++++++++++-- 3 files changed, 222 insertions(+), 13 deletions(-) diff --git a/src/TiledArray/tensor/arena_kernels.h b/src/TiledArray/tensor/arena_kernels.h index 4db5212ef3..403de9ffd4 100644 --- a/src/TiledArray/tensor/arena_kernels.h +++ b/src/TiledArray/tensor/arena_kernels.h @@ -316,21 +316,44 @@ OuterTensor arena_trivial_binary(const LeftTensor& left, using inner_range_t = typename OuterTensor::value_type::range_type; TA_ASSERT(left.range().volume() == right.range().volume()); TA_ASSERT(left.nbatch() == right.nbatch()); - auto range_fn = [&left](std::size_t ord) -> inner_range_t { + // Union sparsity: a result cell is present if *either* operand cell is. + // ToT arrays with the same outer shape can still differ in which inner cells + // are populated within an outer tile (e.g. occ_tile_size>1 aggregates several + // pairs, some screened to null). A cell present in only one operand is + // combined against an implicit zero slab below -- correct for the linear ops + // (add: l+0 / 0+r; subt: l-0 / 0-r) and numerically correct for mult (l*0=0, + // emitted as an explicit zero tile). Without this, a lone-left cell would + // read a null right slab (segfault) and a lone-right cell would be silently + // dropped, losing that addend. + auto range_fn = [&left, &right](std::size_t ord) -> inner_range_t { const auto& l = left.data()[ord]; - return l.empty() ? inner_range_t{} : l.range(); + if (!l.empty()) return l.range(); + const auto& r = right.data()[ord]; + return r.empty() ? inner_range_t{} : r.range(); }; OuterTensor result = arena_outer_init( left.range(), left.nbatch(), range_fn, alignof(elem_t), /*zero_init=*/false); const std::size_t N_cells = left.range().volume() * left.nbatch(); + std::vector zeros; // grown lazily; implicit-zero slab for lone cells for (std::size_t ord = 0; ord < N_cells; ++ord) { auto& dst = result.data()[ord]; if (dst.empty()) continue; - TA_ASSERT(left.data()[ord].size() == right.data()[ord].size()); - TA_ASSERT(left.data()[ord].size() == dst.size()); - fill_op(dst.data(), left.data()[ord].data(), right.data()[ord].data(), - dst.size()); + const auto& l = left.data()[ord]; + const auto& r = right.data()[ord]; + const std::size_t n = dst.size(); + const bool have_l = !l.empty(); + const bool have_r = !r.empty(); + TA_ASSERT(!have_l || l.size() == n); + TA_ASSERT(!have_r || r.size() == n); + if (have_l && have_r) { + fill_op(dst.data(), l.data(), r.data(), n); + } else { + if (zeros.size() < n) zeros.assign(n, elem_t{}); + const elem_t* l_ptr = have_l ? l.data() : zeros.data(); + const elem_t* r_ptr = have_r ? r.data() : zeros.data(); + fill_op(dst.data(), l_ptr, r_ptr, n); + } } return result; } diff --git a/tests/arena_tensor_kernels.cpp b/tests/arena_tensor_kernels.cpp index 09eb0aa232..94dcb39f2b 100644 --- a/tests/arena_tensor_kernels.cpp +++ b/tests/arena_tensor_kernels.cpp @@ -725,4 +725,100 @@ BOOST_AUTO_TEST_CASE(tot_axpy_to_accumulates_scaled_operand) { } } +// --- mismatched null-inner-cell coverage -------------------------------- +// occ_tile_size>1 (and any block-sparse ToT) can produce two operands with +// the same outer shape but different *inner*-cell sparsity within an outer +// tile. Regression coverage for the bug where arena_trivial_binary sized the +// result by the left operand only and read the right unconditionally: a cell +// present in left but null in right read a null slab (segfault), and a cell +// present in right but null in left was silently dropped. + +namespace { + +/// Build an Outer of `n_outer` cells; cell `ord` is null iff `present[ord]` +/// is false, otherwise a length-`n_inner` ArenaTensor filled deterministically. +Outer make_outer_sparse(std::size_t n_outer, std::size_t n_inner, double base, + const std::vector& present) { + TA::Range outer_r{static_cast(n_outer)}; + auto shape_fn = [n_inner, &present](std::size_t ord) { + return present[ord] ? TA::Range{static_cast(n_inner)} : TA::Range(); + }; + Outer outer = TA::detail::arena_outer_init(outer_r, 1, shape_fn); + for (std::size_t ord = 0; ord < n_outer; ++ord) { + Inner& inner = outer.data()[ord]; + if (!inner) continue; + for (std::size_t i = 0; i < inner.size(); ++i) + inner.data()[i] = base + ord * 100.0 + i; + } + return outer; +} + +// L present on {0,1,2}, R present on {1,2,4}, over 5 outer cells: +// 0 = lone-left, 1&2 = both, 3 = both-null, 4 = lone-right. +constexpr std::size_t kNo = 5, kNi = 4; +const std::vector kLpresent{true, true, true, false, false}; +const std::vector kRpresent{false, true, true, false, true}; + +} // namespace + +BOOST_AUTO_TEST_CASE(trivial_add_mismatched_null_inners) { + Outer L = make_outer_sparse(kNo, kNi, 1.0, kLpresent); + Outer R = make_outer_sparse(kNo, kNi, 0.5, kRpresent); + Outer sum = L.add(R); // must not segfault on lone-left cell 0 + for (std::size_t ord = 0; ord < kNo; ++ord) { + const Inner &l = L.data()[ord], &r = R.data()[ord], &d = sum.data()[ord]; + const bool hl = bool(l), hr = bool(r); + if (!hl && !hr) { + BOOST_CHECK(!d); // both null -> null result + } else { + BOOST_REQUIRE(bool(d)); + for (std::size_t i = 0; i < d.size(); ++i) { + const double lv = hl ? l.data()[i] : 0.0; + const double rv = hr ? r.data()[i] : 0.0; + BOOST_CHECK_EQUAL(d.data()[i], lv + rv); // union: lone-right kept too + } + } + } +} + +BOOST_AUTO_TEST_CASE(trivial_subt_mismatched_null_inners) { + Outer L = make_outer_sparse(kNo, kNi, 5.0, kLpresent); + Outer R = make_outer_sparse(kNo, kNi, 1.0, kRpresent); + Outer diff = L.subt(R); + for (std::size_t ord = 0; ord < kNo; ++ord) { + const Inner &l = L.data()[ord], &r = R.data()[ord], &d = diff.data()[ord]; + const bool hl = bool(l), hr = bool(r); + if (!hl && !hr) { + BOOST_CHECK(!d); + } else { + BOOST_REQUIRE(bool(d)); + for (std::size_t i = 0; i < d.size(); ++i) { + const double lv = hl ? l.data()[i] : 0.0; + const double rv = hr ? r.data()[i] : 0.0; + BOOST_CHECK_EQUAL(d.data()[i], lv - rv); // lone-right -> -r + } + } + } +} + +BOOST_AUTO_TEST_CASE(trivial_mult_mismatched_null_inners) { + Outer L = make_outer_sparse(kNo, kNi, 2.0, kLpresent); + Outer R = make_outer_sparse(kNo, kNi, 0.5, kRpresent); + Outer prod = L.mult(R); + for (std::size_t ord = 0; ord < kNo; ++ord) { + const Inner &l = L.data()[ord], &r = R.data()[ord], &d = prod.data()[ord]; + const bool hl = bool(l), hr = bool(r); + if (hl && hr) { + BOOST_REQUIRE(bool(d)); + for (std::size_t i = 0; i < d.size(); ++i) + BOOST_CHECK_EQUAL(d.data()[i], l.data()[i] * r.data()[i]); + } else if (bool(d)) { + // a lone cell multiplies against an implicit zero -> a zero tile + // (numerically equivalent to absent); tolerate either policy. + for (std::size_t i = 0; i < d.size(); ++i) + BOOST_CHECK_EQUAL(d.data()[i], 0.0); + } + } +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/arena_tot_trivial.cpp b/tests/arena_tot_trivial.cpp index 627bd5a7cc..f61e7985ec 100644 --- a/tests/arena_tot_trivial.cpp +++ b/tests/arena_tot_trivial.cpp @@ -55,7 +55,7 @@ bool inners_share_one_slab(const outer_t& tot) { return true; } -} +} // namespace BOOST_AUTO_TEST_SUITE(arena_tot_trivial_suite, TA_UT_LABEL_SERIAL) @@ -88,8 +88,8 @@ BOOST_AUTO_TEST_CASE(add_bit_equal_and_one_slab) { for (std::size_t ord = 0; ord < L.range().volume(); ++ord) { inner_t inner((L.data() + ord)->range()); for (std::size_t i = 0; i < inner.range().volume(); ++i) - inner.at_ordinal(i) = (L.data() + ord)->at_ordinal(i) + - (R.data() + ord)->at_ordinal(i); + inner.at_ordinal(i) = + (L.data() + ord)->at_ordinal(i) + (R.data() + ord)->at_ordinal(i); *(baseline.data() + ord) = std::move(inner); } BOOST_CHECK(tot_equal(arena_result, baseline)); @@ -104,8 +104,8 @@ BOOST_AUTO_TEST_CASE(subt_bit_equal_and_one_slab) { for (std::size_t ord = 0; ord < L.range().volume(); ++ord) { inner_t inner((L.data() + ord)->range()); for (std::size_t i = 0; i < inner.range().volume(); ++i) - inner.at_ordinal(i) = (L.data() + ord)->at_ordinal(i) - - (R.data() + ord)->at_ordinal(i); + inner.at_ordinal(i) = + (L.data() + ord)->at_ordinal(i) - (R.data() + ord)->at_ordinal(i); *(baseline.data() + ord) = std::move(inner); } BOOST_CHECK(tot_equal(arena_result, baseline)); @@ -120,8 +120,8 @@ BOOST_AUTO_TEST_CASE(mult_elementwise_bit_equal_and_one_slab) { for (std::size_t ord = 0; ord < L.range().volume(); ++ord) { inner_t inner((L.data() + ord)->range()); for (std::size_t i = 0; i < inner.range().volume(); ++i) - inner.at_ordinal(i) = (L.data() + ord)->at_ordinal(i) * - (R.data() + ord)->at_ordinal(i); + inner.at_ordinal(i) = + (L.data() + ord)->at_ordinal(i) * (R.data() + ord)->at_ordinal(i); *(baseline.data() + ord) = std::move(inner); } BOOST_CHECK(tot_equal(arena_result, baseline)); @@ -141,4 +141,94 @@ BOOST_AUTO_TEST_CASE(arena_outlives_source) { (9.0 + ord * 100.0 + i) * 2.0); } +// --- mismatched null-inner-cell coverage (non-arena inner) --------------- +// Same kernel (arena_trivial_binary) backs Tensor>; exercise +// the union-sparsity / implicit-zero path with mismatched per-cell nulls. +// An unassigned outer cell is a default (empty) inner Tensor. + +namespace { + +/// `present[ord]==false` leaves cell `ord` a null (empty) inner tensor. +outer_t make_tot_sparse(std::size_t N_outer, std::size_t n_inner, double base, + const std::vector& present) { + outer_t outer(TA::Range{static_cast(N_outer)}, 1); + for (std::size_t ord = 0; ord < N_outer; ++ord) { + if (!present[ord]) continue; // leave default-constructed -> empty + inner_t inner(TA::Range{static_cast(n_inner)}); + for (std::size_t i = 0; i < n_inner; ++i) + inner.at_ordinal(i) = base + ord * 100.0 + i; + *(outer.data() + ord) = std::move(inner); + } + return outer; +} + +// 0 = lone-left, 1&2 = both, 3 = both-null, 4 = lone-right. +const std::vector nz_L{true, true, true, false, false}; +const std::vector nz_R{false, true, true, false, true}; + +} // namespace + +BOOST_AUTO_TEST_CASE(add_mismatched_null_inners) { + outer_t L = make_tot_sparse(5, 4, 1.0, nz_L); + outer_t R = make_tot_sparse(5, 4, 0.5, nz_R); + outer_t sum = L.add(R); // must not segfault on lone-left cell 0 + for (std::size_t ord = 0; ord < 5; ++ord) { + const inner_t& l = *(L.data() + ord); + const inner_t& r = *(R.data() + ord); + const inner_t& d = *(sum.data() + ord); + const bool hl = !l.empty(), hr = !r.empty(); + if (!hl && !hr) { + BOOST_CHECK(d.empty()); + } else { + BOOST_REQUIRE(!d.empty()); + for (std::size_t i = 0; i < d.range().volume(); ++i) { + const double lv = hl ? l.at_ordinal(i) : 0.0; + const double rv = hr ? r.at_ordinal(i) : 0.0; + BOOST_CHECK_EQUAL(d.at_ordinal(i), lv + rv); + } + } + } +} + +BOOST_AUTO_TEST_CASE(subt_mismatched_null_inners) { + outer_t L = make_tot_sparse(5, 4, 5.0, nz_L); + outer_t R = make_tot_sparse(5, 4, 1.0, nz_R); + outer_t diff = L.subt(R); + for (std::size_t ord = 0; ord < 5; ++ord) { + const inner_t& l = *(L.data() + ord); + const inner_t& r = *(R.data() + ord); + const inner_t& d = *(diff.data() + ord); + const bool hl = !l.empty(), hr = !r.empty(); + if (!hl && !hr) { + BOOST_CHECK(d.empty()); + } else { + BOOST_REQUIRE(!d.empty()); + for (std::size_t i = 0; i < d.range().volume(); ++i) { + const double lv = hl ? l.at_ordinal(i) : 0.0; + const double rv = hr ? r.at_ordinal(i) : 0.0; + BOOST_CHECK_EQUAL(d.at_ordinal(i), lv - rv); + } + } + } +} + +BOOST_AUTO_TEST_CASE(mult_mismatched_null_inners) { + outer_t L = make_tot_sparse(5, 4, 2.0, nz_L); + outer_t R = make_tot_sparse(5, 4, 0.5, nz_R); + outer_t prod = L.mult(R); + for (std::size_t ord = 0; ord < 5; ++ord) { + const inner_t& l = *(L.data() + ord); + const inner_t& r = *(R.data() + ord); + const inner_t& d = *(prod.data() + ord); + if (!l.empty() && !r.empty()) { + BOOST_REQUIRE(!d.empty()); + for (std::size_t i = 0; i < d.range().volume(); ++i) + BOOST_CHECK_EQUAL(d.at_ordinal(i), l.at_ordinal(i) * r.at_ordinal(i)); + } else if (!d.empty()) { + for (std::size_t i = 0; i < d.range().volume(); ++i) + BOOST_CHECK_EQUAL(d.at_ordinal(i), 0.0); + } + } +} + BOOST_AUTO_TEST_SUITE_END()