Skip to content

Commit 6d02579

Browse files
authored
fix: incorrect test for when it's safe to use hwy code path (#5138)
We didn't have a complete enough test for when it was safe to hwy on a buffer. Encapsulate in new function HwySupports so that all the logic lives in one place and doesn't have to be replicated everywhere. The more complete test now checks for: - contiguous scanlines - right number of channels (only indirectly checked) - 2D image, not 3D volume (not previously checked) - pixel data window fully contains the ROI (not previously checked) - correct data type (should always be true, but was not checked) This eliminates some test failures, which were specifically cases where the ROI being operated on did not overlap with the data area of the ImageBufs, and the Highway related code did not handle those in the way that the old ImageBuf::Iterator transparently handled running off the edge and various wrap modes. Signed-off-by: Larry Gritz <lg@larrygritz.com>
1 parent b56d00d commit 6d02579

5 files changed

Lines changed: 111 additions & 136 deletions

File tree

src/libOpenImageIO/imagebufalgo_addsub.cpp

Lines changed: 45 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -138,39 +138,29 @@ static bool
138138
add_impl(ImageBuf& R, const ImageBuf& A, const ImageBuf& B, ROI roi,
139139
int nthreads)
140140
{
141-
#if defined(OIIO_USE_HWY) && OIIO_USE_HWY
142-
if (OIIO::pvt::enable_hwy && R.localpixels() && A.localpixels()
143-
&& B.localpixels()) {
144-
auto Rv = HwyPixels(R);
145-
auto Av = HwyPixels(A);
146-
auto Bv = HwyPixels(B);
147-
const int nchannels = roi.nchannels();
148-
const bool contig = ChannelsContiguous<Rtype>(Rv, nchannels)
149-
&& ChannelsContiguous<Atype>(Av, nchannels)
150-
&& ChannelsContiguous<Btype>(Bv, nchannels);
151-
if (contig) {
152-
// Use native integer path for scale-invariant add when all types
153-
// match and are integer types (much faster: 6-12x vs 3-5x with
154-
// float conversion).
155-
constexpr bool all_same = std::is_same_v<Rtype, Atype>
156-
&& std::is_same_v<Atype, Btype>;
157-
constexpr bool is_integer = std::is_integral_v<Rtype>;
158-
if constexpr (all_same && is_integer)
159-
return add_impl_hwy_native_int<Rtype>(R, A, B, roi, nthreads);
160-
return add_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi, nthreads);
161-
}
162-
141+
#if OIIO_USE_HWY
142+
// First case: hwy enabled, all images have local pixels and the
143+
// number of channels in the ROI. and fully encompass the ROI.
144+
if (OIIO::pvt::enable_hwy && HwySupports<Rtype>(R, roi)
145+
&& HwySupports<Atype>(A, roi) && HwySupports<Btype>(B, roi)) {
146+
// Use native integer path for scale-invariant add when all types
147+
// match and are integer types (much faster: 6-12x vs 3-5x with
148+
// float conversion).
149+
constexpr bool all_same = std::is_same_v<Rtype, Atype>
150+
&& std::is_same_v<Atype, Btype>;
151+
constexpr bool is_integer = std::is_integral_v<Rtype>;
152+
if constexpr (all_same && is_integer)
153+
return add_impl_hwy_native_int<Rtype>(R, A, B, roi, nthreads);
154+
return add_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi, nthreads);
155+
}
156+
// Second case: the buffers are RGBA but we are only adding RGB
157+
// (preserving alpha).
158+
// Is this a case we will actually encounter?
159+
if (OIIO::pvt::enable_hwy && HwySupports<Rtype>(R, roi, 4)
160+
&& HwySupports<Atype>(A, roi, 4) && HwySupports<Btype>(B, roi, 4)
161+
&& (roi.chbegin == 0 && roi.chend == 3)) {
163162
// Handle the common RGBA + RGB ROI strided case (preserving alpha).
164-
if (roi.chbegin == 0 && roi.chend == 3) {
165-
const bool contig4 = (Rv.nchannels >= 4 && Av.nchannels >= 4
166-
&& Bv.nchannels >= 4)
167-
&& ChannelsContiguous<Rtype>(Rv, 4)
168-
&& ChannelsContiguous<Atype>(Av, 4)
169-
&& ChannelsContiguous<Btype>(Bv, 4);
170-
if (contig4)
171-
return add_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi,
172-
nthreads);
173-
}
163+
return add_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi, nthreads);
174164
}
175165
#endif
176166
return add_impl_scalar<Rtype, Atype, Btype>(R, A, B, roi, nthreads);
@@ -181,7 +171,7 @@ static bool
181171
add_impl(ImageBuf& R, const ImageBuf& A, cspan<float> b, ROI roi, int nthreads)
182172
{
183173
#if OIIO_USE_HWY
184-
if (OIIO::pvt::enable_hwy && R.localpixels() && A.localpixels())
174+
if (OIIO::pvt::enable_hwy && HwySupports<Rtype>(R, roi))
185175
return add_impl_hwy<Rtype, Atype>(R, A, b, roi, nthreads);
186176
#endif
187177
return add_impl_scalar<Rtype, Atype>(R, A, b, roi, nthreads);
@@ -238,39 +228,29 @@ static bool
238228
sub_impl(ImageBuf& R, const ImageBuf& A, const ImageBuf& B, ROI roi,
239229
int nthreads)
240230
{
241-
#if defined(OIIO_USE_HWY) && OIIO_USE_HWY
242-
if (OIIO::pvt::enable_hwy && R.localpixels() && A.localpixels()
243-
&& B.localpixels()) {
244-
auto Rv = HwyPixels(R);
245-
auto Av = HwyPixels(A);
246-
auto Bv = HwyPixels(B);
247-
const int nchannels = roi.nchannels();
248-
const bool contig = ChannelsContiguous<Rtype>(Rv, nchannels)
249-
&& ChannelsContiguous<Atype>(Av, nchannels)
250-
&& ChannelsContiguous<Btype>(Bv, nchannels);
251-
if (contig) {
252-
// Use native integer path for scale-invariant sub when all types
253-
// match and are integer types (much faster: 6-12x vs 3-5x with
254-
// float conversion).
255-
constexpr bool all_same = std::is_same_v<Rtype, Atype>
256-
&& std::is_same_v<Atype, Btype>;
257-
constexpr bool is_integer = std::is_integral_v<Rtype>;
258-
if constexpr (all_same && is_integer)
259-
return sub_impl_hwy_native_int<Rtype>(R, A, B, roi, nthreads);
260-
return sub_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi, nthreads);
261-
}
262-
231+
#if OIIO_USE_HWY
232+
// First case: hwy enabled, all images have local pixels and the
233+
// number of channels in the ROI. and fully encompass the ROI.
234+
if (OIIO::pvt::enable_hwy && HwySupports<Rtype>(R, roi)
235+
&& HwySupports<Atype>(A, roi) && HwySupports<Btype>(B, roi)) {
236+
// Use native integer path for scale-invariant sub when all types
237+
// match and are integer types (much faster: 6-12x vs 3-5x with
238+
// float conversion).
239+
constexpr bool all_same = std::is_same_v<Rtype, Atype>
240+
&& std::is_same_v<Atype, Btype>;
241+
constexpr bool is_integer = std::is_integral_v<Rtype>;
242+
if constexpr (all_same && is_integer)
243+
return sub_impl_hwy_native_int<Rtype>(R, A, B, roi, nthreads);
244+
return sub_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi, nthreads);
245+
}
246+
// Second case: the buffers are RGBA but we are only subtracting RGB
247+
// (preserving alpha).
248+
// Is this a case we will actually encounter?
249+
if (OIIO::pvt::enable_hwy && HwySupports<Rtype>(R, roi, 4)
250+
&& HwySupports<Atype>(A, roi, 4) && HwySupports<Btype>(B, roi, 4)
251+
&& (roi.chbegin == 0 && roi.chend == 3)) {
263252
// Handle the common RGBA + RGB ROI strided case (preserving alpha).
264-
if (roi.chbegin == 0 && roi.chend == 3) {
265-
const bool contig4 = (Rv.nchannels >= 4 && Av.nchannels >= 4
266-
&& Bv.nchannels >= 4)
267-
&& ChannelsContiguous<Rtype>(Rv, 4)
268-
&& ChannelsContiguous<Atype>(Av, 4)
269-
&& ChannelsContiguous<Btype>(Bv, 4);
270-
if (contig4)
271-
return sub_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi,
272-
nthreads);
273-
}
253+
return sub_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi, nthreads);
274254
}
275255
#endif
276256
return sub_impl_scalar<Rtype, Atype, Btype>(R, A, B, roi, nthreads);

src/libOpenImageIO/imagebufalgo_hwy_pvt.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,35 @@ RoiRowPtr(const HwyLocalPixelsView<ByteT>& v, int y, const ROI& roi) noexcept
8787
return ChannelPtr<T>(v, roi.xbegin, y, roi.chbegin);
8888
}
8989

90+
91+
/// Can we use Hwy acceleration with this ImageBuf, over the specified ROI? If
92+
/// not supplied, the ROI defaults to the entire data window of the ImageBuf,
93+
/// and the number of channels defaults to the number of channels in the ROI.
94+
template<typename T>
95+
inline bool
96+
HwySupports(const ImageBuf& A, const ROI& roi = ROI(), int nchannels = 0)
97+
{
98+
if (nchannels <= 0)
99+
nchannels = roi.defined() ? roi.nchannels() : A.nchannels();
100+
return
101+
// The data type must match what we're looking for (T)
102+
(A.spec().format.basetype == BaseTypeFromC<T>::value)
103+
104+
// The ImageBuf has the number of pixels we're expecting
105+
&& A.spec().nchannels == nchannels
106+
107+
// The scanlines must consist of contiguous pixels in memory (no
108+
// padding between channels or pixels within a scanline). Note that
109+
// this test implicitly also ensures "local" in-memory pixels.
110+
&& A.contiguous_scanline()
111+
// For now, we only support 2D images (no volumes)
112+
&& A.spec().depth == 1
113+
// The data window must fully encompass the ROI we're operating on
114+
// (if an ROI was supplied)
115+
&& (!roi.defined() || A.roi().contains(roi));
116+
}
117+
118+
90119
// -----------------------------------------------------------------------
91120
// Type Traits
92121
// -----------------------------------------------------------------------

src/libOpenImageIO/imagebufalgo_mad.cpp

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -71,32 +71,21 @@ mad_impl(ImageBuf& R, const ImageBuf& A, const ImageBuf& B, const ImageBuf& C,
7171
ROI roi, int nthreads)
7272
{
7373
#if OIIO_USE_HWY
74-
if (OIIO::pvt::enable_hwy && R.localpixels() && A.localpixels()
75-
&& B.localpixels() && C.localpixels()) {
76-
auto Rv = HwyPixels(R);
77-
auto Av = HwyPixels(A);
78-
auto Bv = HwyPixels(B);
79-
auto Cv = HwyPixels(C);
80-
const int nchannels = roi.nchannels();
81-
const bool contig = ChannelsContiguous<Rtype>(Rv, nchannels)
82-
&& ChannelsContiguous<ABCtype>(Av, nchannels)
83-
&& ChannelsContiguous<ABCtype>(Bv, nchannels)
84-
&& ChannelsContiguous<ABCtype>(Cv, nchannels);
85-
if (contig)
86-
return mad_impl_hwy<Rtype, ABCtype>(R, A, B, C, roi, nthreads);
87-
74+
if (OIIO::pvt::enable_hwy && HwySupports<Rtype>(R, roi)
75+
&& HwySupports<ABCtype>(A, roi) && HwySupports<ABCtype>(B, roi)
76+
&& HwySupports<ABCtype>(C, roi)) {
77+
// All-channel case
78+
return mad_impl_hwy<Rtype, ABCtype>(R, A, B, C, roi, nthreads);
79+
}
80+
# if 0
81+
if (OIIO::pvt::enable_hwy && HwySupports<Rtype>(R, roi, 4)
82+
&& HwySupports<Atype>(A, roi, 4) && HwySupports<Btype>(B, roi, 4)
83+
&& HwySupports<Btype>(C, roi, 4)
84+
&& (roi.chbegin == 0 && roi.chend == 3)) {
8885
// Handle the common RGBA + RGB ROI strided case (preserving alpha).
89-
if (roi.chbegin == 0 && roi.chend == 3) {
90-
const bool contig4 = (Rv.nchannels >= 4 && Av.nchannels >= 4
91-
&& Bv.nchannels >= 4 && Cv.nchannels >= 4)
92-
&& ChannelsContiguous<Rtype>(Rv, 4)
93-
&& ChannelsContiguous<ABCtype>(Av, 4)
94-
&& ChannelsContiguous<ABCtype>(Bv, 4)
95-
&& ChannelsContiguous<ABCtype>(Cv, 4);
96-
if (contig4)
97-
return mad_impl_hwy<Rtype, ABCtype>(R, A, B, C, roi, nthreads);
98-
}
86+
return mad_impl_hwy<Rtype, ABCtype>(R, A, B, C, roi, nthreads);
9987
}
88+
# endif
10089
#endif
10190
return mad_impl_scalar<Rtype, ABCtype>(R, A, B, C, roi, nthreads);
10291
}

src/libOpenImageIO/imagebufalgo_muldiv.cpp

Lines changed: 22 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -175,30 +175,19 @@ static bool
175175
mul_impl(ImageBuf& R, const ImageBuf& A, const ImageBuf& B, ROI roi,
176176
int nthreads)
177177
{
178-
#if defined(OIIO_USE_HWY) && OIIO_USE_HWY
179-
if (OIIO::pvt::enable_hwy && R.localpixels() && A.localpixels()
180-
&& B.localpixels()) {
181-
auto Rv = HwyPixels(R);
182-
auto Av = HwyPixels(A);
183-
auto Bv = HwyPixels(B);
184-
const int nchannels = roi.nchannels();
185-
const bool contig = ChannelsContiguous<Rtype>(Rv, nchannels)
186-
&& ChannelsContiguous<Atype>(Av, nchannels)
187-
&& ChannelsContiguous<Btype>(Bv, nchannels);
188-
if (contig)
189-
return mul_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi, nthreads);
190-
178+
#if OIIO_USE_HWY
179+
if (OIIO::pvt::enable_hwy && HwySupports<Rtype>(R, roi)
180+
&& HwySupports<Atype>(A, roi) && HwySupports<Btype>(B, roi)) {
181+
return mul_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi, nthreads);
182+
}
183+
// Second case: the buffers are RGBA but we are only multiplying RGB
184+
// (preserving alpha).
185+
// Is this a case we will actually encounter?
186+
if (OIIO::pvt::enable_hwy && HwySupports<Rtype>(R, roi, 4)
187+
&& HwySupports<Atype>(A, roi, 4) && HwySupports<Btype>(B, roi, 4)
188+
&& (roi.chbegin == 0 && roi.chend == 3)) {
191189
// Handle the common RGBA + RGB ROI strided case (preserving alpha).
192-
if (roi.chbegin == 0 && roi.chend == 3) {
193-
const bool contig4 = (Rv.nchannels >= 4 && Av.nchannels >= 4
194-
&& Bv.nchannels >= 4)
195-
&& ChannelsContiguous<Rtype>(Rv, 4)
196-
&& ChannelsContiguous<Atype>(Av, 4)
197-
&& ChannelsContiguous<Btype>(Bv, 4);
198-
if (contig4)
199-
return mul_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi,
200-
nthreads);
201-
}
190+
return mul_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi, nthreads);
202191
}
203192
#endif
204193
return mul_impl_scalar<Rtype, Atype, Btype>(R, A, B, roi, nthreads);
@@ -343,30 +332,17 @@ static bool
343332
div_impl(ImageBuf& R, const ImageBuf& A, const ImageBuf& B, ROI roi,
344333
int nthreads)
345334
{
346-
#if defined(OIIO_USE_HWY) && OIIO_USE_HWY
347-
if (OIIO::pvt::enable_hwy && R.localpixels() && A.localpixels()
348-
&& B.localpixels()) {
349-
auto Rv = HwyPixels(R);
350-
auto Av = HwyPixels(A);
351-
auto Bv = HwyPixels(B);
352-
const int nchannels = roi.nchannels();
353-
const bool contig = ChannelsContiguous<Rtype>(Rv, nchannels)
354-
&& ChannelsContiguous<Atype>(Av, nchannels)
355-
&& ChannelsContiguous<Btype>(Bv, nchannels);
356-
if (contig)
357-
return div_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi, nthreads);
358-
335+
#if OIIO_USE_HWY
336+
if (OIIO::pvt::enable_hwy && HwySupports<Rtype>(R, roi)
337+
&& HwySupports<Atype>(A, roi) && HwySupports<Btype>(B, roi)) {
338+
return div_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi, nthreads);
339+
}
340+
// Handle the common RGBA + RGB ROI strided case (preserving alpha).
341+
if (OIIO::pvt::enable_hwy && HwySupports<Rtype>(R, roi, 4)
342+
&& HwySupports<Atype>(A, roi, 4) && HwySupports<Btype>(B, roi, 4)
343+
&& (roi.chbegin == 0 && roi.chend == 3)) {
359344
// Handle the common RGBA + RGB ROI strided case (preserving alpha).
360-
if (roi.chbegin == 0 && roi.chend == 3) {
361-
const bool contig4 = (Rv.nchannels >= 4 && Av.nchannels >= 4
362-
&& Bv.nchannels >= 4)
363-
&& ChannelsContiguous<Rtype>(Rv, 4)
364-
&& ChannelsContiguous<Atype>(Av, 4)
365-
&& ChannelsContiguous<Btype>(Bv, 4);
366-
if (contig4)
367-
return div_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi,
368-
nthreads);
369-
}
345+
return div_impl_hwy<Rtype, Atype, Btype>(R, A, B, roi, nthreads);
370346
}
371347
#endif
372348
return div_impl_scalar<Rtype, Atype, Btype>(R, A, B, roi, nthreads);

src/libOpenImageIO/imagebufalgo_xform.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1435,7 +1435,8 @@ resample_(ImageBuf& dst, const ImageBuf& src, bool interpolate, ROI roi,
14351435
int nthreads)
14361436
{
14371437
#if OIIO_USE_HWY
1438-
if (OIIO::pvt::enable_hwy && dst.localpixels() && src.localpixels())
1438+
if (OIIO::pvt::enable_hwy && HwySupports<DSTTYPE>(dst, roi)
1439+
&& HwySupports<SRCTYPE>(src, ROI()))
14391440
return resample_hwy<DSTTYPE, SRCTYPE>(dst, src, interpolate, roi,
14401441
nthreads);
14411442
#endif

0 commit comments

Comments
 (0)