diff --git a/src/TiledArray/tensor/arena_kernels.h b/src/TiledArray/tensor/arena_kernels.h index 4db5212ef3..403de9ffd4 100644 --- a/src/TiledArray/tensor/arena_kernels.h +++ b/src/TiledArray/tensor/arena_kernels.h @@ -316,21 +316,44 @@ OuterTensor arena_trivial_binary(const LeftTensor& left, using inner_range_t = typename OuterTensor::value_type::range_type; TA_ASSERT(left.range().volume() == right.range().volume()); TA_ASSERT(left.nbatch() == right.nbatch()); - auto range_fn = [&left](std::size_t ord) -> inner_range_t { + // Union sparsity: a result cell is present if *either* operand cell is. + // ToT arrays with the same outer shape can still differ in which inner cells + // are populated within an outer tile (e.g. occ_tile_size>1 aggregates several + // pairs, some screened to null). A cell present in only one operand is + // combined against an implicit zero slab below -- correct for the linear ops + // (add: l+0 / 0+r; subt: l-0 / 0-r) and numerically correct for mult (l*0=0, + // emitted as an explicit zero tile). Without this, a lone-left cell would + // read a null right slab (segfault) and a lone-right cell would be silently + // dropped, losing that addend. + auto range_fn = [&left, &right](std::size_t ord) -> inner_range_t { const auto& l = left.data()[ord]; - return l.empty() ? inner_range_t{} : l.range(); + if (!l.empty()) return l.range(); + const auto& r = right.data()[ord]; + return r.empty() ? inner_range_t{} : r.range(); }; OuterTensor result = arena_outer_init( left.range(), left.nbatch(), range_fn, alignof(elem_t), /*zero_init=*/false); const std::size_t N_cells = left.range().volume() * left.nbatch(); + std::vector zeros; // grown lazily; implicit-zero slab for lone cells for (std::size_t ord = 0; ord < N_cells; ++ord) { auto& dst = result.data()[ord]; if (dst.empty()) continue; - TA_ASSERT(left.data()[ord].size() == right.data()[ord].size()); - TA_ASSERT(left.data()[ord].size() == dst.size()); - fill_op(dst.data(), left.data()[ord].data(), right.data()[ord].data(), - dst.size()); + const auto& l = left.data()[ord]; + const auto& r = right.data()[ord]; + const std::size_t n = dst.size(); + const bool have_l = !l.empty(); + const bool have_r = !r.empty(); + TA_ASSERT(!have_l || l.size() == n); + TA_ASSERT(!have_r || r.size() == n); + if (have_l && have_r) { + fill_op(dst.data(), l.data(), r.data(), n); + } else { + if (zeros.size() < n) zeros.assign(n, elem_t{}); + const elem_t* l_ptr = have_l ? l.data() : zeros.data(); + const elem_t* r_ptr = have_r ? r.data() : zeros.data(); + fill_op(dst.data(), l_ptr, r_ptr, n); + } } return result; } diff --git a/tests/arena_tensor_kernels.cpp b/tests/arena_tensor_kernels.cpp index 09eb0aa232..94dcb39f2b 100644 --- a/tests/arena_tensor_kernels.cpp +++ b/tests/arena_tensor_kernels.cpp @@ -725,4 +725,100 @@ BOOST_AUTO_TEST_CASE(tot_axpy_to_accumulates_scaled_operand) { } } +// --- mismatched null-inner-cell coverage -------------------------------- +// occ_tile_size>1 (and any block-sparse ToT) can produce two operands with +// the same outer shape but different *inner*-cell sparsity within an outer +// tile. Regression coverage for the bug where arena_trivial_binary sized the +// result by the left operand only and read the right unconditionally: a cell +// present in left but null in right read a null slab (segfault), and a cell +// present in right but null in left was silently dropped. + +namespace { + +/// Build an Outer of `n_outer` cells; cell `ord` is null iff `present[ord]` +/// is false, otherwise a length-`n_inner` ArenaTensor filled deterministically. +Outer make_outer_sparse(std::size_t n_outer, std::size_t n_inner, double base, + const std::vector& present) { + TA::Range outer_r{static_cast(n_outer)}; + auto shape_fn = [n_inner, &present](std::size_t ord) { + return present[ord] ? TA::Range{static_cast(n_inner)} : TA::Range(); + }; + Outer outer = TA::detail::arena_outer_init(outer_r, 1, shape_fn); + for (std::size_t ord = 0; ord < n_outer; ++ord) { + Inner& inner = outer.data()[ord]; + if (!inner) continue; + for (std::size_t i = 0; i < inner.size(); ++i) + inner.data()[i] = base + ord * 100.0 + i; + } + return outer; +} + +// L present on {0,1,2}, R present on {1,2,4}, over 5 outer cells: +// 0 = lone-left, 1&2 = both, 3 = both-null, 4 = lone-right. +constexpr std::size_t kNo = 5, kNi = 4; +const std::vector kLpresent{true, true, true, false, false}; +const std::vector kRpresent{false, true, true, false, true}; + +} // namespace + +BOOST_AUTO_TEST_CASE(trivial_add_mismatched_null_inners) { + Outer L = make_outer_sparse(kNo, kNi, 1.0, kLpresent); + Outer R = make_outer_sparse(kNo, kNi, 0.5, kRpresent); + Outer sum = L.add(R); // must not segfault on lone-left cell 0 + for (std::size_t ord = 0; ord < kNo; ++ord) { + const Inner &l = L.data()[ord], &r = R.data()[ord], &d = sum.data()[ord]; + const bool hl = bool(l), hr = bool(r); + if (!hl && !hr) { + BOOST_CHECK(!d); // both null -> null result + } else { + BOOST_REQUIRE(bool(d)); + for (std::size_t i = 0; i < d.size(); ++i) { + const double lv = hl ? l.data()[i] : 0.0; + const double rv = hr ? r.data()[i] : 0.0; + BOOST_CHECK_EQUAL(d.data()[i], lv + rv); // union: lone-right kept too + } + } + } +} + +BOOST_AUTO_TEST_CASE(trivial_subt_mismatched_null_inners) { + Outer L = make_outer_sparse(kNo, kNi, 5.0, kLpresent); + Outer R = make_outer_sparse(kNo, kNi, 1.0, kRpresent); + Outer diff = L.subt(R); + for (std::size_t ord = 0; ord < kNo; ++ord) { + const Inner &l = L.data()[ord], &r = R.data()[ord], &d = diff.data()[ord]; + const bool hl = bool(l), hr = bool(r); + if (!hl && !hr) { + BOOST_CHECK(!d); + } else { + BOOST_REQUIRE(bool(d)); + for (std::size_t i = 0; i < d.size(); ++i) { + const double lv = hl ? l.data()[i] : 0.0; + const double rv = hr ? r.data()[i] : 0.0; + BOOST_CHECK_EQUAL(d.data()[i], lv - rv); // lone-right -> -r + } + } + } +} + +BOOST_AUTO_TEST_CASE(trivial_mult_mismatched_null_inners) { + Outer L = make_outer_sparse(kNo, kNi, 2.0, kLpresent); + Outer R = make_outer_sparse(kNo, kNi, 0.5, kRpresent); + Outer prod = L.mult(R); + for (std::size_t ord = 0; ord < kNo; ++ord) { + const Inner &l = L.data()[ord], &r = R.data()[ord], &d = prod.data()[ord]; + const bool hl = bool(l), hr = bool(r); + if (hl && hr) { + BOOST_REQUIRE(bool(d)); + for (std::size_t i = 0; i < d.size(); ++i) + BOOST_CHECK_EQUAL(d.data()[i], l.data()[i] * r.data()[i]); + } else if (bool(d)) { + // a lone cell multiplies against an implicit zero -> a zero tile + // (numerically equivalent to absent); tolerate either policy. + for (std::size_t i = 0; i < d.size(); ++i) + BOOST_CHECK_EQUAL(d.data()[i], 0.0); + } + } +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/arena_tot_trivial.cpp b/tests/arena_tot_trivial.cpp index 627bd5a7cc..f61e7985ec 100644 --- a/tests/arena_tot_trivial.cpp +++ b/tests/arena_tot_trivial.cpp @@ -55,7 +55,7 @@ bool inners_share_one_slab(const outer_t& tot) { return true; } -} +} // namespace BOOST_AUTO_TEST_SUITE(arena_tot_trivial_suite, TA_UT_LABEL_SERIAL) @@ -88,8 +88,8 @@ BOOST_AUTO_TEST_CASE(add_bit_equal_and_one_slab) { for (std::size_t ord = 0; ord < L.range().volume(); ++ord) { inner_t inner((L.data() + ord)->range()); for (std::size_t i = 0; i < inner.range().volume(); ++i) - inner.at_ordinal(i) = (L.data() + ord)->at_ordinal(i) + - (R.data() + ord)->at_ordinal(i); + inner.at_ordinal(i) = + (L.data() + ord)->at_ordinal(i) + (R.data() + ord)->at_ordinal(i); *(baseline.data() + ord) = std::move(inner); } BOOST_CHECK(tot_equal(arena_result, baseline)); @@ -104,8 +104,8 @@ BOOST_AUTO_TEST_CASE(subt_bit_equal_and_one_slab) { for (std::size_t ord = 0; ord < L.range().volume(); ++ord) { inner_t inner((L.data() + ord)->range()); for (std::size_t i = 0; i < inner.range().volume(); ++i) - inner.at_ordinal(i) = (L.data() + ord)->at_ordinal(i) - - (R.data() + ord)->at_ordinal(i); + inner.at_ordinal(i) = + (L.data() + ord)->at_ordinal(i) - (R.data() + ord)->at_ordinal(i); *(baseline.data() + ord) = std::move(inner); } BOOST_CHECK(tot_equal(arena_result, baseline)); @@ -120,8 +120,8 @@ BOOST_AUTO_TEST_CASE(mult_elementwise_bit_equal_and_one_slab) { for (std::size_t ord = 0; ord < L.range().volume(); ++ord) { inner_t inner((L.data() + ord)->range()); for (std::size_t i = 0; i < inner.range().volume(); ++i) - inner.at_ordinal(i) = (L.data() + ord)->at_ordinal(i) * - (R.data() + ord)->at_ordinal(i); + inner.at_ordinal(i) = + (L.data() + ord)->at_ordinal(i) * (R.data() + ord)->at_ordinal(i); *(baseline.data() + ord) = std::move(inner); } BOOST_CHECK(tot_equal(arena_result, baseline)); @@ -141,4 +141,94 @@ BOOST_AUTO_TEST_CASE(arena_outlives_source) { (9.0 + ord * 100.0 + i) * 2.0); } +// --- mismatched null-inner-cell coverage (non-arena inner) --------------- +// Same kernel (arena_trivial_binary) backs Tensor>; exercise +// the union-sparsity / implicit-zero path with mismatched per-cell nulls. +// An unassigned outer cell is a default (empty) inner Tensor. + +namespace { + +/// `present[ord]==false` leaves cell `ord` a null (empty) inner tensor. +outer_t make_tot_sparse(std::size_t N_outer, std::size_t n_inner, double base, + const std::vector& present) { + outer_t outer(TA::Range{static_cast(N_outer)}, 1); + for (std::size_t ord = 0; ord < N_outer; ++ord) { + if (!present[ord]) continue; // leave default-constructed -> empty + inner_t inner(TA::Range{static_cast(n_inner)}); + for (std::size_t i = 0; i < n_inner; ++i) + inner.at_ordinal(i) = base + ord * 100.0 + i; + *(outer.data() + ord) = std::move(inner); + } + return outer; +} + +// 0 = lone-left, 1&2 = both, 3 = both-null, 4 = lone-right. +const std::vector nz_L{true, true, true, false, false}; +const std::vector nz_R{false, true, true, false, true}; + +} // namespace + +BOOST_AUTO_TEST_CASE(add_mismatched_null_inners) { + outer_t L = make_tot_sparse(5, 4, 1.0, nz_L); + outer_t R = make_tot_sparse(5, 4, 0.5, nz_R); + outer_t sum = L.add(R); // must not segfault on lone-left cell 0 + for (std::size_t ord = 0; ord < 5; ++ord) { + const inner_t& l = *(L.data() + ord); + const inner_t& r = *(R.data() + ord); + const inner_t& d = *(sum.data() + ord); + const bool hl = !l.empty(), hr = !r.empty(); + if (!hl && !hr) { + BOOST_CHECK(d.empty()); + } else { + BOOST_REQUIRE(!d.empty()); + for (std::size_t i = 0; i < d.range().volume(); ++i) { + const double lv = hl ? l.at_ordinal(i) : 0.0; + const double rv = hr ? r.at_ordinal(i) : 0.0; + BOOST_CHECK_EQUAL(d.at_ordinal(i), lv + rv); + } + } + } +} + +BOOST_AUTO_TEST_CASE(subt_mismatched_null_inners) { + outer_t L = make_tot_sparse(5, 4, 5.0, nz_L); + outer_t R = make_tot_sparse(5, 4, 1.0, nz_R); + outer_t diff = L.subt(R); + for (std::size_t ord = 0; ord < 5; ++ord) { + const inner_t& l = *(L.data() + ord); + const inner_t& r = *(R.data() + ord); + const inner_t& d = *(diff.data() + ord); + const bool hl = !l.empty(), hr = !r.empty(); + if (!hl && !hr) { + BOOST_CHECK(d.empty()); + } else { + BOOST_REQUIRE(!d.empty()); + for (std::size_t i = 0; i < d.range().volume(); ++i) { + const double lv = hl ? l.at_ordinal(i) : 0.0; + const double rv = hr ? r.at_ordinal(i) : 0.0; + BOOST_CHECK_EQUAL(d.at_ordinal(i), lv - rv); + } + } + } +} + +BOOST_AUTO_TEST_CASE(mult_mismatched_null_inners) { + outer_t L = make_tot_sparse(5, 4, 2.0, nz_L); + outer_t R = make_tot_sparse(5, 4, 0.5, nz_R); + outer_t prod = L.mult(R); + for (std::size_t ord = 0; ord < 5; ++ord) { + const inner_t& l = *(L.data() + ord); + const inner_t& r = *(R.data() + ord); + const inner_t& d = *(prod.data() + ord); + if (!l.empty() && !r.empty()) { + BOOST_REQUIRE(!d.empty()); + for (std::size_t i = 0; i < d.range().volume(); ++i) + BOOST_CHECK_EQUAL(d.at_ordinal(i), l.at_ordinal(i) * r.at_ordinal(i)); + } else if (!d.empty()) { + for (std::size_t i = 0; i < d.range().volume(); ++i) + BOOST_CHECK_EQUAL(d.at_ordinal(i), 0.0); + } + } +} + BOOST_AUTO_TEST_SUITE_END()