diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 20918314cba4f9..46723fc4ac6949 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -152,6 +152,7 @@ set( JIT_SOURCES jiteh.cpp jithashtable.cpp jitmetadata.cpp + knownbits.cpp layout.cpp lclmorph.cpp lclvars.cpp @@ -378,6 +379,7 @@ set( JIT_HEADERS jitmetadatalist.h jitpch.h jitstd.h + knownbits.h lir.h loopcloning.h loopcloningopts.h diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index 75859cd8c2b234..b4abac8c1a8a47 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -12,6 +12,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "jitpch.h" #include "rangecheck.h" +#include "knownbits.h" #ifdef _MSC_VER #pragma hdrstop #endif @@ -4083,6 +4084,22 @@ void Compiler::optAssertionProp_RangeProperties(ASSERT_VALARG_TP assertions, *isKnownNonZero = true; } } + + // Known bits can also establish non-negativity (sign bit known 0) and non-zeroness (some bit + // known 1). Covers TYP_LONG and bit patterns an interval cannot express (e.g. "x & 7"). + if (!*isKnownNonZero || !*isKnownNonNegative) + { + const uint64_t signBit = 1ull << ((genTypeSize(genActualType(tree)) * BITS_PER_BYTE) - 1); + const KnownBits kb = KnownBits::Compute(this, treeVN, assertions); + if ((kb.knownZero & signBit) != 0) + { + *isKnownNonNegative = true; + } + if (kb.knownOne != 0) + { + *isKnownNonZero = true; + } + } } //------------------------------------------------------------------------ @@ -4514,6 +4531,23 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, } } + // See if we can fold the relop based on known bits. This complements the range-based folding + // above (which is limited to TYP_INT) by reasoning about individual bits and TYP_LONG values. + if (varTypeIsIntegral(op1) && (op1VN != ValueNumStore::NoVN) && (op2VN != ValueNumStore::NoVN)) + { + const unsigned width = genTypeSize(genActualType(op1)) * BITS_PER_BYTE; + const KnownBits kb1 = KnownBits::Compute(this, op1VN, assertions); + const KnownBits kb2 = KnownBits::Compute(this, op2VN, assertions); + + const int relopResult = KnownBitsOps::EvalRelop(tree->OperGet(), tree->IsUnsigned(), kb1, kb2, width); + if (relopResult >= 0) + { + JITDUMP("Folding relop [%06u] based on known bits.\n", dspTreeID(tree)); + newTree = gtWrapWithSideEffects(relopResult == 1 ? gtNewTrue() : gtNewFalse(), tree, GTF_ALL_EFFECT); + return optAssertionProp_Update(newTree, tree, stmt); + } + } + // Else check if we have an equality check involving a local or an indir if (!tree->OperIs(GT_EQ, GT_NE)) { @@ -5537,6 +5571,20 @@ GenTree* Compiler::optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, GenTree return optAssertionProp_Update(newTree, arrBndsChk, stmt); }; + // Known-bits elimination: redundant if (uint)index is provably < (uint)length. Catches masked + // indices and bit patterns the range-based paths cannot express. On 64-bit targets the index + // and length can both be TYP_I_IMPL (see fgMorphIndexAddr), so derive the width from the actual + // operand type instead of hardcoding 32 -- otherwise we'd discard the high 32 bits of a native-int + // index and could prove a (uint)idx < (uint)len fact that doesn't hold for the full value. + assert(genActualType(arrBndsChk->GetIndex()) == genActualType(arrBndsChk->GetArrayLength())); + const unsigned width = genTypeSize(genActualType(arrBndsChk->GetIndex())) * BITS_PER_BYTE; + const KnownBits kbIdx = KnownBits::Compute(this, vnCurIdx, assertions); + const KnownBits kbLen = KnownBits::Compute(this, vnCurLen, assertions); + if (KnownBitsOps::EvalRelop(GT_LT, /* isUnsigned */ true, kbIdx, kbLen, width) == 1) + { + return dropBoundsCheck(INDEBUG("known bits prove (uint)index < (uint)length")); + } + // First, check if we have arr[arr.Length - cns] when we know arr.Length is >= cns. ValueNum add0, add1; if (vnStore->IsVNBinFunc(vnCurIdx, VNF_ADD, &add0, &add1)) diff --git a/src/coreclr/jit/knownbits.cpp b/src/coreclr/jit/knownbits.cpp new file mode 100644 index 00000000000000..82e8b0ab6efd7d --- /dev/null +++ b/src/coreclr/jit/knownbits.cpp @@ -0,0 +1,282 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#include "knownbits.h" + +//------------------------------------------------------------------------ +// MergeKnownBitsAssertions: Refine "*pBits" using whatever the live assertions tell us about "num". +// +// Arguments: +// comp - the compiler context +// num - the value number being analyzed +// assertions - the assertion set live at the consumer +// width - bit width (32 or 64) of "num" +// budget - recursive search budget (currently unused here, kept for symmetry with Compute) +// pBits - in/out: the lattice for "num" so far; refined in place by intersecting with each +// fact this routine can extract from the assertion table +// +static void MergeKnownBitsAssertions( + Compiler* comp, ValueNum num, ASSERT_VALARG_TP assertions, unsigned width, int /*budget*/, KnownBits* pBits) +{ + if (BitVecOps::MayBeUninit(assertions) || BitVecOps::IsEmpty(comp->apTraits, assertions) || + !comp->optAssertionHasAssertionsForVN(num)) + { + return; + } + + const uint64_t signBit = 1ull << (width - 1); + const KnownBits signBitZero(signBit, 0); + + // Tightest signed upper bound "num <= signedUpperBound" gathered from signed "num < C" / "num <= C" + // assertions with a non-negative bound. On its own a signed upper bound says nothing about the high + // bits (num could be negative), so we only apply it after the loop, and only once we also know num + // is non-negative (sign bit 0) -- then num is in [0, signedUpperBound] and its upper bits are 0. + bool haveSignedUpperBound = false; + uint64_t signedUpperBound = 0; + + BitVecOps::Iter iter(comp->apTraits, assertions); + unsigned index = 0; + while (iter.NextElem(&index)) + { + const Compiler::AssertionDsc& cur = comp->optGetAssertion(GetAssertionIndex(index)); + if (cur.GetOp1().GetVN() != num) + { + continue; + } + + // "num == const": fully determines the bits. + if (cur.KindIs(Compiler::OAK_EQUAL)) + { + int64_t eqCns; + if (comp->vnStore->IsVNIntegralConstant(cur.GetOp2().GetVN(), &eqCns)) + { + *pBits = KnownBits::Intersect(*pBits, KnownBits::FromConstant((uint64_t)eqCns, width)); + } + continue; + } + + // Relops of the form "num const". + if (cur.IsRelop() && cur.GetOp2().KindIs(Compiler::O2K_CONST_INT)) + { + const int64_t relCns = cur.GetOp2().GetIntConstant(); + + if (cur.KindIs(Compiler::OAK_LT_UN) && (relCns > 0)) + { + // (uint)num < C => num u<= C-1 => upper bits are 0. + *pBits = KnownBits::Intersect(*pBits, KnownBits::FromUnsignedUpperBound((uint64_t)(relCns - 1), width)); + } + else if (cur.KindIs(Compiler::OAK_LE_UN) && (relCns >= 0)) + { + // (uint)num <= C => upper bits are 0. + *pBits = KnownBits::Intersect(*pBits, KnownBits::FromUnsignedUpperBound((uint64_t)relCns, width)); + } + else if (cur.KindIs(Compiler::OAK_GE) && (relCns >= 0)) + { + // num >= 0 (signed) => sign bit is 0. + *pBits = KnownBits::Intersect(*pBits, signBitZero); + } + else if (cur.KindIs(Compiler::OAK_GT) && (relCns >= -1)) + { + // num > -1 (signed) => num >= 0 => sign bit is 0. + *pBits = KnownBits::Intersect(*pBits, signBitZero); + } + else if (cur.KindIs(Compiler::OAK_LT) && (relCns >= 1)) + { + // num < C (signed), C >= 1. If num is also non-negative (handled after the loop), + // num is in [0, C-1], so record C-1 as a candidate upper bound. + const uint64_t ub = (uint64_t)(relCns - 1); + if (!haveSignedUpperBound || (ub < signedUpperBound)) + { + haveSignedUpperBound = true; + signedUpperBound = ub; + } + } + else if (cur.KindIs(Compiler::OAK_LE) && (relCns >= 0)) + { + // num <= C (signed), C >= 0. If num is also non-negative, num is in [0, C]. + const uint64_t ub = (uint64_t)relCns; + if (!haveSignedUpperBound || (ub < signedUpperBound)) + { + haveSignedUpperBound = true; + signedUpperBound = ub; + } + } + continue; + } + + // "(uint)num num is non-negative. + // + // IsVNNeverNegative on an O2K_VN_ADD_CNS asserts only that the "vn" part is non-negative. + // The full expression "vn + cns" can only be guaranteed non-negative when cns == 0, so we + // require it explicitly here -- otherwise a negative cns could make the bound itself + // negative and we'd derive a false non-negativity fact for num. Same shape as rangecheck.cpp. + // + if (cur.KindIs(Compiler::OAK_LT_UN, Compiler::OAK_LE_UN) && cur.GetOp2().KindIs(Compiler::O2K_VN_ADD_CNS) && + cur.GetOp2().IsVNNeverNegative() && (cur.GetOp2().GetCns() == 0)) + { + *pBits = KnownBits::Intersect(*pBits, signBitZero); + } + } + + // If we gathered a signed upper bound and num is now known non-negative (from any of the facts + // above or from its value-number structure), num is in [0, signedUpperBound]: its upper bits are 0. + // Example: "a > 10 && a < 1000" => sign bit 0 (from a > 10) plus upper bits 0 (from a < 1000), + // proving a fits in a smaller type (e.g. making "checked((int)a)" non-overflowing). + if (haveSignedUpperBound && ((pBits->knownZero & signBit) != 0)) + { + *pBits = KnownBits::Intersect(*pBits, KnownBits::FromUnsignedUpperBound(signedUpperBound, width)); + } +} + +//------------------------------------------------------------------------ +// ComputeWorker: Recursive worker for KnownBits::Compute. +// +// Arguments: +// comp - the compiler context +// num - the value number to analyze +// assertions - the assertion set live at the consumer +// budget - recursive search budget; decremented at every recursive step. Returns the +// fully-unknown lattice when the budget is exhausted. +// visited - set of phi VNs we have already entered, used to guard against infinite recursion +// on loop-carried phis +// +// Returns: +// KnownBits for "num" within its natural width (32 or 64). Always truncated to that width on +// return so the "bits above width are 0/0" invariant holds. +// +static KnownBits ComputeWorker( + Compiler* comp, ValueNum num, ASSERT_VALARG_TP assertions, int budget, ValueNumStore::SmallValueNumSet* visited) +{ + KnownBits result; + if ((num == ValueNumStore::NoVN) || (budget <= 0)) + { + return result; + } + + const var_types vnType = comp->vnStore->TypeOfVN(num); + if (!varTypeIsIntegral(vnType) || varTypeIsGC(vnType)) + { + // We only reason about (non-GC) integral values. + return result; + } + + const unsigned width = (genActualType(vnType) == TYP_LONG) ? 64 : 32; + + // Constants are fully known. + int64_t cnsVal; + if (comp->vnStore->IsVNIntegralConstant(num, &cnsVal)) + { + return KnownBits::FromConstant((uint64_t)cnsVal, width); + } + + VNFuncApp f; + if (comp->vnStore->GetVNFunc(num, &f)) + { + switch (f.GetFunc()) + { + case VNF_AND: + case VNF_OR: + case VNF_UDIV: + { + const KnownBits a = ComputeWorker(comp, f.GetArg(0), assertions, --budget, visited); + const KnownBits b = ComputeWorker(comp, f.GetArg(1), assertions, --budget, visited); + + if (f.FuncIs(VNF_UDIV)) + result = KnownBitsOps::UDiv(a, b, width); + else if (f.FuncIs(VNF_AND)) + result = KnownBitsOps::And(a, b); + else if (f.FuncIs(VNF_OR)) + result = KnownBitsOps::Or(a, b); + else + unreached(); + break; + } + + case VNF_Cast: + case VNF_CastOvf: + { + var_types castToType; + bool srcIsUnsigned; + comp->vnStore->GetCastOperFromVN(f.GetArg(1), &castToType, &srcIsUnsigned); + + const ValueNum srcVN = f.GetArg(0); + const var_types srcType = comp->vnStore->TypeOfVN(srcVN); + if (varTypeIsIntegral(srcType) && !varTypeIsGC(srcType) && varTypeIsIntegral(castToType)) + { + const unsigned srcWidth = genTypeSize(genActualType(srcType)) * BITS_PER_BYTE; + const KnownBits bits = ComputeWorker(comp, srcVN, assertions, --budget, visited); + result = KnownBitsOps::Cast(bits, srcWidth, castToType, srcIsUnsigned); + } + break; + } + + case VNF_EQ: + case VNF_NE: + case VNF_LT: + case VNF_LE: + case VNF_GT: + case VNF_GE: + case VNF_LT_UN: + case VNF_LE_UN: + case VNF_GT_UN: + case VNF_GE_UN: + // A relop always produces 0 or 1; we don't try to fold the comparison here, just + // record the [0, 1] range so a consumer reading this VN sees a single low bit. + result = KnownBits::FromUnsignedUpperBound(1, width); + break; + + case VNF_MDARR_LENGTH: + case VNF_ARR_LENGTH: + // Array length is in [0, CORINFO_Array_MaxLength], so its upper bits are 0. + result = KnownBits::FromUnsignedUpperBound(CORINFO_Array_MaxLength, width); + break; + + default: + break; + } + } + + result = result.Truncate(width); + + // Phi: a bit is known in the phi result only if it is known and equal along every reaching + // edge. We Union (LLVM's intersectWith) the per-edge KnownBits to compute that. + if (!result.IsConstant(width) && comp->vnStore->IsPhiDef(num) && visited->Add(comp, num)) + { + KnownBits phiBits; + bool first = true; + auto visitor = [comp, &phiBits, &first, &budget, visited](ValueNum vn, ASSERT_TP reachAss) { + const KnownBits edge = ComputeWorker(comp, vn, reachAss, --budget, visited); + phiBits = first ? edge : KnownBits::Union(phiBits, edge); + first = false; + + // Once nothing is known, merging more edges cannot recover any information. + return phiBits.IsUnknown() ? Compiler::AssertVisit::Abort : Compiler::AssertVisit::Continue; + }; + if ((comp->optVisitReachingAssertions(num, visitor) == Compiler::AssertVisit::Continue) && !first) + { + result = KnownBits::Intersect(result, phiBits); + } + } + + MergeKnownBitsAssertions(comp, num, assertions, width, budget, &result); + return result.Truncate(width); +} + +//------------------------------------------------------------------------ +// KnownBits::Compute: Entry point for the bit-level analog of +// RangeCheck::GetRangeFromAssertions. Returns which bits of "num" are known 0/1, derived from +// its value-number structure and the incoming assertions. Supports 32- and 64-bit integral VNs; +// on unsupported types returns the fully-unknown lattice. +// +// See KnownBits::Compute in knownbits.h for the parameter documentation. +// +KnownBits KnownBits::Compute(Compiler* comp, ValueNum num, ASSERT_VALARG_TP assertions, int budget) +{ + ValueNumStore::SmallValueNumSet visited; + return ComputeWorker(comp, num, assertions, budget, &visited); +} diff --git a/src/coreclr/jit/knownbits.h b/src/coreclr/jit/knownbits.h new file mode 100644 index 00000000000000..334dca2c46c7c7 --- /dev/null +++ b/src/coreclr/jit/knownbits.h @@ -0,0 +1,336 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// KnownBits is a 32/64-bit fixed-width "known bits" lattice for an integral value, together with +// transfer functions that compute the lattice for the result of an operation from its operands. +// It is the bit-level analog of the interval/range analysis in rangecheck.{h,cpp}. +// +// The struct (KnownBits) and the transfer functions (KnownBitsOps) are ports of LLVM's +// `llvm::KnownBits` from `llvm/Support/KnownBits.{h,cpp}`. Differences from LLVM: +// +// * The lattice is fixed-width (32 or 64 bits) and stored in two uint64_t fields; LLVM uses APInt +// so it can carry any bit width. An explicit `width` parameter takes the place of getBitWidth(); +// masking and sign-extension are done explicitly here. +// +// * Only the subset of operations needed by today's consumers (And/Or/UDiv/Cast/EvalRelop) is +// ported; the rest of LLVM's catalog (mul, urem, shifts, sadd_sat, abs, ...) is intentionally +// omitted and can be added back if and when a consumer wants it. +// +// * `KnownBits::Compute` is the analysis driver (the analog of `RangeCheck::GetRangeFromAssertions`): +// it derives the known bits of a value number from its VN structure and the incoming assertions. +// + +#pragma once + +#include "compiler.h" + +// "Known bits" lattice. For each bit i in [0, width): +// * set in knownZero => definitely 0 +// * set in knownOne => definitely 1 +// * set in neither => unknown +// Invariants: (knownZero & knownOne) == 0, and bits at positions >= width are 0 in both masks. +struct KnownBits +{ + uint64_t knownZero; + uint64_t knownOne; + + KnownBits() + : knownZero(0) + , knownOne(0) + { + } + KnownBits(uint64_t z, uint64_t o) + : knownZero(z) + , knownOne(o) + { + assert((z & o) == 0); + } + + // Mask covering the low "width" bits (width must be 32 or 64). Replaces APInt's implicit width. + static uint64_t WidthMask(unsigned width) + { + assert((width == 32) || (width == 64)); + return (width == 64) ? UINT64_MAX : 0xFFFFFFFFull; + } + + // Mask with the low "n" bits set, n in [0, 64]. Analog of APInt::getLowBitsSet. + static uint64_t LowMask(unsigned n) + { + assert(n <= 64); + return (n == 0) ? 0 : (UINT64_MAX >> (64 - n)); + } + + // Port of llvm::KnownBits::isUnknown. + bool IsUnknown() const + { + return (knownZero == 0) && (knownOne == 0); + } + + // Port of llvm::KnownBits::isConstant. + bool IsConstant(unsigned width) const + { + const uint64_t mask = WidthMask(width); + return ((knownZero | knownOne) & mask) == mask; + } + + // Port of llvm::KnownBits::getConstant. + uint64_t GetConstant(unsigned width) const + { + assert(IsConstant(width)); + return knownOne & WidthMask(width); + } + + // Port of llvm::KnownBits::trunc. + KnownBits Truncate(unsigned width) const + { + const uint64_t mask = WidthMask(width); + return KnownBits(knownZero & mask, knownOne & mask); + } + + // Port of llvm::KnownBits::makeConstant. + static KnownBits FromConstant(uint64_t value, unsigned width) + { + const uint64_t mask = WidthMask(width); + value &= mask; + return KnownBits(~value & mask, value); + } + + // KnownBits with all bits above the highest set bit of maxVal forced to 0. Used to fold an + // unsigned upper bound "value <= maxVal" into the lattice. + static KnownBits FromUnsignedUpperBound(uint64_t maxVal, unsigned width) + { + const uint64_t mask = WidthMask(width); + if (maxVal >= mask) + { + return KnownBits(); + } + // bitLen = number of bits needed to represent maxVal (0 when maxVal == 0); LeadingZeroCount(0) == 64. + const unsigned bitLen = 64 - (unsigned)BitOperations::LeadingZeroCount(maxVal); + return KnownBits(~LowMask(bitLen) & mask, 0); + } + + // Combine two facts about the *same* value (assertion refinement). Conflicting bits (one says + // 0, the other 1) imply a dead path and are dropped to "unknown" so we never assert a false + // fact. This is llvm::KnownBits::unionWith with the conflict-drop step folded in; the name + // describes the *intersection* of the two sets of possible values. + static KnownBits Intersect(const KnownBits& a, const KnownBits& b) + { + const uint64_t z = a.knownZero | b.knownZero; + const uint64_t o = a.knownOne | b.knownOne; + const uint64_t conflict = z & o; + return KnownBits(z & ~conflict, o & ~conflict); + } + + // Merge facts across two possible values (e.g. phi inputs). Port of llvm::KnownBits::intersectWith. + static KnownBits Union(const KnownBits& a, const KnownBits& b) + { + return KnownBits(a.knownZero & b.knownZero, a.knownOne & b.knownOne); + } + + // Sign-extend the low "width" bits of "value" to a 64-bit signed integer. + static int64_t SignExtend(uint64_t value, unsigned width) + { + if (width == 64) + { + return (int64_t)value; + } + const uint64_t mask = WidthMask(width); + const uint64_t signBit = 1ull << (width - 1); + value &= mask; + return (int64_t)(((value & signBit) != 0) ? (value | ~mask) : value); + } + + // Express as a signed [lo, hi] range. Succeeds only when the sign bit is known (otherwise the + // range would straddle 0). Combines llvm::KnownBits::getSignedMinValue / getSignedMaxValue, + // gated on a known sign bit so the caller gets a single contiguous interval. + bool TryGetSignedRange(unsigned width, int64_t* lo, int64_t* hi) const + { + const uint64_t mask = WidthMask(width); + const uint64_t signBit = 1ull << (width - 1); + const uint64_t minBits = knownOne & mask; // unknown bits taken as 0 + const uint64_t maxBits = ~knownZero & mask; // unknown bits taken as 1 + + if ((knownZero & signBit) != 0) + { + // Sign bit known 0 => value is non-negative. + *lo = (int64_t)minBits; + *hi = (int64_t)maxBits; + return true; + } + if ((knownOne & signBit) != 0) + { + // Sign bit known 1 => value is negative; sign-extend both bounds. + *lo = SignExtend(minBits, width); + *hi = SignExtend(maxBits, width); + return true; + } + return false; + } + + // Port of llvm::KnownBits::getMinValue / getMaxValue (unsigned). + uint64_t GetUMin(unsigned width) const + { + return knownOne & WidthMask(width); + } + uint64_t GetUMax(unsigned width) const + { + return ~knownZero & WidthMask(width); + } + + // Derive KnownBits of "num" from its VN structure and the incoming assertions. Bit-level + // analog of RangeCheck::GetRangeFromAssertions. Returns the fully-unknown lattice on + // unsupported types or when "budget" is exhausted. See knownbits.cpp. + static KnownBits Compute(Compiler* comp, ValueNum num, ASSERT_VALARG_TP assertions, int budget = 10); +}; + +// Transfer functions from operand KnownBits to result KnownBits. Each is a port of the matching +// routine in llvm/lib/Support/KnownBits.cpp; the logic is unchanged, only APInt is replaced by +// uint64_t + explicit masking. +struct KnownBitsOps +{ + // Port of llvm::KnownBits::operator&=. Bit is 0 if either operand bit is 0; 1 only if both are 1. + static KnownBits And(const KnownBits& a, const KnownBits& b) + { + return KnownBits(a.knownZero | b.knownZero, a.knownOne & b.knownOne); + } + + // Port of llvm::KnownBits::operator|=. Bit is 1 if either operand bit is 1; 0 only if both are 0. + static KnownBits Or(const KnownBits& a, const KnownBits& b) + { + return KnownBits(a.knownZero & b.knownZero, a.knownOne | b.knownOne); + } + + // Port of llvm::KnownBits::udiv (we keep only the leading-zeros result and omit the + // exact-division low-bit refinement which doesn't trigger in measurements). + static KnownBits UDiv(const KnownBits& a, const KnownBits& b, unsigned width) + { + const uint64_t mask = KnownBits::WidthMask(width); + const uint64_t maxNum = ~a.knownZero & mask; // a.getMaxValue + const uint64_t minDenom = b.knownOne & mask; // b.getMinValue + if (maxNum == 0) + { + // 0 / x == 0. Matches LLVM's "if (LHS.isZero()) ... setAllZero". + return KnownBits::FromConstant(0, width); + } + // Largest possible result = maxNumerator / minDenominator; LLVM falls back to maxNum when + // minDenom == 0, we do the same. + const uint64_t maxRes = (minDenom == 0) ? maxNum : (maxNum / minDenom); + const unsigned bitLen = (maxRes == 0) ? 0 : (64 - (unsigned)BitOperations::LeadingZeroCount(maxRes)); + if (bitLen >= width) + { + return KnownBits(); + } + return KnownBits(mask & ~KnownBits::LowMask(bitLen), 0); + } + + // KnownBits of a cast from "srcWidth" bits to "castToType". Combines llvm::KnownBits::trunc + // and zext/sext into a single helper, because in the JIT a GT_CAST does both at once: it + // narrows to the value-bits of castToType and then sign- or zero-extends to the destination's + // normalized integer width (TYP_INT/TYP_LONG). + static KnownBits Cast(const KnownBits& srcKB, unsigned srcWidth, var_types castToType, bool srcIsUnsigned) + { + const unsigned vb = genTypeSize(castToType) * BITS_PER_BYTE; // value bits of dest type + const unsigned dstWidth = (vb <= 32) ? 32 : 64; // normalized to int/long + const unsigned passBits = min(vb, srcWidth); + const bool isWiden = (vb > srcWidth); + + // Low "passBits" bits pass through (LLVM trunc). + const uint64_t lowMask = KnownBits::LowMask(passBits); + KnownBits result(srcKB.knownZero & lowMask, srcKB.knownOne & lowMask); + + // Higher bits: zero-extend if unsigned (LLVM zext), otherwise replicate the sign bit when + // it is known (LLVM sext). + if (passBits < dstWidth) + { + const uint64_t extMask = KnownBits::LowMask(dstWidth) & ~lowMask; + const uint64_t signBit = 1ull << (passBits - 1); + const bool unsignd = isWiden ? srcIsUnsigned : varTypeIsUnsigned(castToType); + if (unsignd) + { + result.knownZero |= extMask; // zext + } + else if ((srcKB.knownOne & signBit) != 0) + { + result.knownOne |= extMask; // sext, sign known 1 + } + else if ((srcKB.knownZero & signBit) != 0) + { + result.knownZero |= extMask; // sext, sign known 0 + } + } + return result.Truncate(dstWidth); + } + + // Port of llvm::KnownBits::eq/ne/ult/ule/ugt/uge/slt/sle/sgt/sge (all min/max based), fused + // into one switch. Returns 1 when the comparison is always true, 0 when always false, -1 when + // undetermined (LLVM returns std::optional; -1 corresponds to std::nullopt). + static int EvalRelop(genTreeOps oper, bool isUnsigned, const KnownBits& a, const KnownBits& b, unsigned width) + { + const uint64_t mask = KnownBits::WidthMask(width); + + // Equality (LLVM KnownBits::eq): a and b must differ if some bit is known 1 in one and + // known 0 in the other ("LHS.One.intersects(RHS.Zero) || RHS.One.intersects(LHS.Zero)"). + if ((oper == GT_EQ) || (oper == GT_NE)) + { + const bool mustDiffer = + ((a.knownOne & b.knownZero & mask) != 0) || ((a.knownZero & b.knownOne & mask) != 0); + if (mustDiffer) + { + return (oper == GT_EQ) ? 0 : 1; + } + if (a.IsConstant(width) && b.IsConstant(width)) + { + return ((a.GetConstant(width) == b.GetConstant(width)) == (oper == GT_EQ)) ? 1 : 0; + } + return -1; + } + + // Ordered comparisons reduce to min/max. Unsigned uses GetUMin/GetUMax directly; signed + // follows LLVM's getSignedMinValue/getSignedMaxValue which sign-extend after fixing the + // sign bit. + const uint64_t signBit = 1ull << (width - 1); + auto minMax = [=](const KnownBits& kb, uint64_t* lo, uint64_t* hi) { + uint64_t mn = kb.knownOne & mask; + uint64_t mx = ~kb.knownZero & mask; + if (!isUnsigned) + { + if ((kb.knownZero & signBit) == 0) + mn |= signBit; // sign could be 1 => most-negative candidate sets it + if ((kb.knownOne & signBit) == 0) + mx &= ~signBit; // sign could be 0 => most-positive candidate clears it + mn = (uint64_t)KnownBits::SignExtend(mn, width); + mx = (uint64_t)KnownBits::SignExtend(mx, width); + } + *lo = mn; + *hi = mx; + }; + + uint64_t aMin, aMax, bMin, bMax; + minMax(a, &aMin, &aMax); + minMax(b, &bMin, &bMax); + + auto lt = [=](uint64_t x, uint64_t y) { + return isUnsigned ? (x < y) : ((int64_t)x < (int64_t)y); + }; + auto le = [=](uint64_t x, uint64_t y) { + return isUnsigned ? (x <= y) : ((int64_t)x <= (int64_t)y); + }; + + // Same shape as LLVM's ult/ule/ugt/uge (and their signed counterparts): decidable iff one + // side's max is strictly/non-strictly below the other side's min. + switch (oper) + { + case GT_LT: + return lt(aMax, bMin) ? 1 : (le(bMax, aMin) ? 0 : -1); + case GT_LE: + return le(aMax, bMin) ? 1 : (lt(bMax, aMin) ? 0 : -1); + case GT_GT: + return lt(bMax, aMin) ? 1 : (le(aMax, bMin) ? 0 : -1); + case GT_GE: + return le(bMax, aMin) ? 1 : (lt(aMax, bMin) ? 0 : -1); + default: + return -1; + } + } +};