[RISCV] Lower the paadd/pasub intrinsics to existing ISD nodes. NFC#203646
[RISCV] Lower the paadd/pasub intrinsics to existing ISD nodes. NFC#203646topperc wants to merge 1 commit into
Conversation
This avoids having 2 different isel patterns for the same operation.
|
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesThis avoids having 2 different isel patterns for the same operation. Full diff: https://github.com/llvm/llvm-project/pull/203646.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 57af056c2d3c0..4a937fe2c6462 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -11736,6 +11736,29 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? ISD::CLMULH : ISD::CLMULR;
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
}
+ case Intrinsic::riscv_paadd:
+ case Intrinsic::riscv_paaddu:
+ case Intrinsic::riscv_pasub:
+ case Intrinsic::riscv_pasubu: {
+ unsigned Opc;
+ switch (IntNo) {
+ case Intrinsic::riscv_paadd:
+ Opc = ISD::AVGFLOORS;
+ break;
+ case Intrinsic::riscv_paaddu:
+ Opc = ISD::AVGFLOORU;
+ break;
+ case Intrinsic::riscv_pasub:
+ Opc = RISCVISD::ASUB;
+ break;
+ case Intrinsic::riscv_pasubu:
+ Opc = RISCVISD::ASUBU;
+ break;
+ }
+
+ return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2));
+ }
case Intrinsic::experimental_get_vector_length:
return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
case Intrinsic::riscv_vmv_x_s: {
@@ -15648,14 +15671,29 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
if (!Subtarget.is64Bit() || (VT != MVT::v4i8 && VT != MVT::v2i16))
return;
+ unsigned Opc;
+ switch (IntNo) {
+ case Intrinsic::riscv_paadd:
+ Opc = ISD::AVGFLOORS;
+ break;
+ case Intrinsic::riscv_paaddu:
+ Opc = ISD::AVGFLOORU;
+ break;
+ case Intrinsic::riscv_pasub:
+ Opc = RISCVISD::ASUB;
+ break;
+ case Intrinsic::riscv_pasubu:
+ Opc = RISCVISD::ASUBU;
+ break;
+ }
+
EVT WideVT = VT == MVT::v4i8 ? MVT::v8i8 : MVT::v4i16;
SDValue Undef = DAG.getUNDEF(VT);
SDValue Op0 =
DAG.getNode(ISD::CONCAT_VECTORS, DL, WideVT, N->getOperand(1), Undef);
SDValue Op1 =
DAG.getNode(ISD::CONCAT_VECTORS, DL, WideVT, N->getOperand(2), Undef);
- SDValue Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WideVT,
- N->getOperand(0), Op0, Op1);
+ SDValue Res = DAG.getNode(Opc, DL, WideVT, Op0, Op1);
Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
DAG.getVectorIdxConstant(0, DL)));
return;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index fe1e459967e92..79ff90545b68a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1990,22 +1990,12 @@ let Predicates = [HasStdExtP] in {
def : PatGprGpr<riscv_asub, PASUB_B, XLenVecI8VT>;
def : PatGprGpr<riscv_asubu, PASUBU_B, XLenVecI8VT>;
- def : PatGprGpr<int_riscv_paadd, PAADD_B, XLenVecI8VT>;
- def : PatGprGpr<int_riscv_paaddu, PAADDU_B, XLenVecI8VT>;
- def : PatGprGpr<int_riscv_pasub, PASUB_B, XLenVecI8VT>;
- def : PatGprGpr<int_riscv_pasubu, PASUBU_B, XLenVecI8VT>;
-
// 16-bit averaging patterns
def : PatGprGpr<avgfloors, PAADD_H, XLenVecI16VT>;
def : PatGprGpr<avgflooru, PAADDU_H, XLenVecI16VT>;
def : PatGprGpr<riscv_asub, PASUB_H, XLenVecI16VT>;
def : PatGprGpr<riscv_asubu, PASUBU_H, XLenVecI16VT>;
- def : PatGprGpr<int_riscv_paadd, PAADD_H, XLenVecI16VT>;
- def : PatGprGpr<int_riscv_paaddu, PAADDU_H, XLenVecI16VT>;
- def : PatGprGpr<int_riscv_pasub, PASUB_H, XLenVecI16VT>;
- def : PatGprGpr<int_riscv_pasubu, PASUBU_H, XLenVecI16VT>;
-
// 8-bit absolute difference patterns
def : Pat<(XLenVecI8VT (abs GPR:$rs1)), (PABD_B GPR:$rs1, (XLenVecI8VT X0))>;
def : PatGprGpr<abds, PABD_B, XLenVecI8VT>;
@@ -2280,19 +2270,6 @@ let append Predicates = [IsRV32] in {
def : PatGprPairGprPair<riscv_asub, PASUB_DW, v2i32>;
def : PatGprPairGprPair<riscv_asubu, PASUBU_DW, v2i32>;
- def : PatGprPairGprPair<int_riscv_paadd, PAADD_DB, v8i8>;
- def : PatGprPairGprPair<int_riscv_paaddu, PAADDU_DB, v8i8>;
- def : PatGprPairGprPair<int_riscv_pasub, PASUB_DB, v8i8>;
- def : PatGprPairGprPair<int_riscv_pasubu, PASUBU_DB, v8i8>;
- def : PatGprPairGprPair<int_riscv_paadd, PAADD_DH, v4i16>;
- def : PatGprPairGprPair<int_riscv_paaddu, PAADDU_DH, v4i16>;
- def : PatGprPairGprPair<int_riscv_pasub, PASUB_DH, v4i16>;
- def : PatGprPairGprPair<int_riscv_pasubu, PASUBU_DH, v4i16>;
- def : PatGprPairGprPair<int_riscv_paadd, PAADD_DW, v2i32>;
- def : PatGprPairGprPair<int_riscv_paaddu, PAADDU_DW, v2i32>;
- def : PatGprPairGprPair<int_riscv_pasub, PASUB_DW, v2i32>;
- def : PatGprPairGprPair<int_riscv_pasubu, PASUBU_DW, v2i32>;
-
// 8-bit absolute difference patterns
def : Pat<(v8i8 (abs GPRPair:$rs1)), (PABD_DB GPRPair:$rs1, (v8i8 X0_Pair))>;
def : PatGprPairGprPair<abds, PABD_DB, v8i8>;
@@ -2501,14 +2478,10 @@ let append Predicates = [IsRV64] in {
// 32-bit averaging patterns
def : PatGprGpr<avgfloors, PAADD_W, v2i32>;
def : PatGprGpr<avgflooru, PAADDU_W, v2i32>;
- def : PatGprGpr<int_riscv_paadd, PAADD_W, v2i32>;
- def : PatGprGpr<int_riscv_paaddu, PAADDU_W, v2i32>;
// 32-bit averaging-sub patterns
def : PatGprGpr<riscv_asub, PASUB_W, v2i32>;
def : PatGprGpr<riscv_asubu, PASUBU_W, v2i32>;
- def : PatGprGpr<int_riscv_pasub, PASUB_W, v2i32>;
- def : PatGprGpr<int_riscv_pasubu, PASUBU_W, v2i32>;
// 32-bit multiply high patterns
def : PatGprGpr<mulhs, PMULH_W, v2i32>;
|
|
I noticed DAGCombiner already folds the full-precision return __builtin_convertvector(
(__builtin_convertvector(a, i16x4) + __builtin_convertvector(b, i16x4)) >> 1,
i8x4);which produces %0 = sext <4 x i8> %a to <4 x i16>
%1 = sext <4 x i8> %b to <4 x i16>
%2 = add nsw <4 x i16> %1, %0
%3 = lshr <4 x i16> %2, splat (i16 1)
%4 = trunc <4 x i16> %3 to <4 x i8>and selects to the expected instructions. As a side benefit, the RV64 widening for the 32-bit vector types would not be needed. |
If the pattern gets broken by another transform, we won't produce the expected instruction for the intrinsic. That's not a good experience for intrinsic programmers. |
|
That makes sense, thanks — a fragile IR pattern isn't a guarantee we can hand intrinsic users, so keeping the explicit intrinsics is the right tradeoff. LGTM. |
This avoids having 2 different isel patterns for the same operation.