[RISCV] Lower the paadd/pasub intrinsics to existing ISD nodes. NFC by topperc · Pull Request #203646 · llvm/llvm-project

topperc · 2026-06-12T22:33:43Z

This avoids having 2 different isel patterns for the same operation.

llvmorg-github-actions · 2026-06-12T22:34:32Z

@llvm/pr-subscribers-backend-risc-v

Author: Craig Topper (topperc)

Changes

This avoids having 2 different isel patterns for the same operation.

Full diff: https://github.com/llvm/llvm-project/pull/203646.diff

2 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+40-2)
(modified) llvm/lib/Target/RISCV/RISCVInstrInfoP.td (-27)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 57af056c2d3c0..4a937fe2c6462 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -11736,6 +11736,29 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? ISD::CLMULH : ISD::CLMULR;
     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
   }
+  case Intrinsic::riscv_paadd:
+  case Intrinsic::riscv_paaddu:
+  case Intrinsic::riscv_pasub:
+  case Intrinsic::riscv_pasubu: {
+    unsigned Opc;
+    switch (IntNo) {
+    case Intrinsic::riscv_paadd:
+      Opc = ISD::AVGFLOORS;
+      break;
+    case Intrinsic::riscv_paaddu:
+      Opc = ISD::AVGFLOORU;
+      break;
+    case Intrinsic::riscv_pasub:
+      Opc = RISCVISD::ASUB;
+      break;
+    case Intrinsic::riscv_pasubu:
+      Opc = RISCVISD::ASUBU;
+      break;
+    }
+
+    return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(1),
+                       Op.getOperand(2));
+  }
   case Intrinsic::experimental_get_vector_length:
     return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
   case Intrinsic::riscv_vmv_x_s: {
@@ -15648,14 +15671,29 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
       if (!Subtarget.is64Bit() || (VT != MVT::v4i8 && VT != MVT::v2i16))
         return;
 
+      unsigned Opc;
+      switch (IntNo) {
+      case Intrinsic::riscv_paadd:
+        Opc = ISD::AVGFLOORS;
+        break;
+      case Intrinsic::riscv_paaddu:
+        Opc = ISD::AVGFLOORU;
+        break;
+      case Intrinsic::riscv_pasub:
+        Opc = RISCVISD::ASUB;
+        break;
+      case Intrinsic::riscv_pasubu:
+        Opc = RISCVISD::ASUBU;
+        break;
+      }
+
       EVT WideVT = VT == MVT::v4i8 ? MVT::v8i8 : MVT::v4i16;
       SDValue Undef = DAG.getUNDEF(VT);
       SDValue Op0 =
           DAG.getNode(ISD::CONCAT_VECTORS, DL, WideVT, N->getOperand(1), Undef);
       SDValue Op1 =
           DAG.getNode(ISD::CONCAT_VECTORS, DL, WideVT, N->getOperand(2), Undef);
-      SDValue Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WideVT,
-                                N->getOperand(0), Op0, Op1);
+      SDValue Res = DAG.getNode(Opc, DL, WideVT, Op0, Op1);
       Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
                                     DAG.getVectorIdxConstant(0, DL)));
       return;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index fe1e459967e92..79ff90545b68a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1990,22 +1990,12 @@ let Predicates = [HasStdExtP] in {
   def : PatGprGpr<riscv_asub, PASUB_B, XLenVecI8VT>;
   def : PatGprGpr<riscv_asubu, PASUBU_B, XLenVecI8VT>;
 
-  def : PatGprGpr<int_riscv_paadd, PAADD_B, XLenVecI8VT>;
-  def : PatGprGpr<int_riscv_paaddu, PAADDU_B, XLenVecI8VT>;
-  def : PatGprGpr<int_riscv_pasub, PASUB_B, XLenVecI8VT>;
-  def : PatGprGpr<int_riscv_pasubu, PASUBU_B, XLenVecI8VT>;
-
   // 16-bit averaging patterns
   def : PatGprGpr<avgfloors, PAADD_H, XLenVecI16VT>;
   def : PatGprGpr<avgflooru, PAADDU_H, XLenVecI16VT>;
   def : PatGprGpr<riscv_asub, PASUB_H, XLenVecI16VT>;
   def : PatGprGpr<riscv_asubu, PASUBU_H, XLenVecI16VT>;
 
-  def : PatGprGpr<int_riscv_paadd, PAADD_H, XLenVecI16VT>;
-  def : PatGprGpr<int_riscv_paaddu, PAADDU_H, XLenVecI16VT>;
-  def : PatGprGpr<int_riscv_pasub, PASUB_H, XLenVecI16VT>;
-  def : PatGprGpr<int_riscv_pasubu, PASUBU_H, XLenVecI16VT>;
-
   // 8-bit absolute difference patterns
   def : Pat<(XLenVecI8VT (abs GPR:$rs1)), (PABD_B GPR:$rs1, (XLenVecI8VT X0))>;
   def : PatGprGpr<abds, PABD_B, XLenVecI8VT>;
@@ -2280,19 +2270,6 @@ let append Predicates = [IsRV32] in {
   def : PatGprPairGprPair<riscv_asub, PASUB_DW, v2i32>;
   def : PatGprPairGprPair<riscv_asubu, PASUBU_DW, v2i32>;
 
-  def : PatGprPairGprPair<int_riscv_paadd, PAADD_DB, v8i8>;
-  def : PatGprPairGprPair<int_riscv_paaddu, PAADDU_DB, v8i8>;
-  def : PatGprPairGprPair<int_riscv_pasub, PASUB_DB, v8i8>;
-  def : PatGprPairGprPair<int_riscv_pasubu, PASUBU_DB, v8i8>;
-  def : PatGprPairGprPair<int_riscv_paadd, PAADD_DH, v4i16>;
-  def : PatGprPairGprPair<int_riscv_paaddu, PAADDU_DH, v4i16>;
-  def : PatGprPairGprPair<int_riscv_pasub, PASUB_DH, v4i16>;
-  def : PatGprPairGprPair<int_riscv_pasubu, PASUBU_DH, v4i16>;
-  def : PatGprPairGprPair<int_riscv_paadd, PAADD_DW, v2i32>;
-  def : PatGprPairGprPair<int_riscv_paaddu, PAADDU_DW, v2i32>;
-  def : PatGprPairGprPair<int_riscv_pasub, PASUB_DW, v2i32>;
-  def : PatGprPairGprPair<int_riscv_pasubu, PASUBU_DW, v2i32>;
-
   // 8-bit absolute difference patterns
   def : Pat<(v8i8 (abs GPRPair:$rs1)), (PABD_DB GPRPair:$rs1, (v8i8 X0_Pair))>;
   def : PatGprPairGprPair<abds, PABD_DB, v8i8>;
@@ -2501,14 +2478,10 @@ let append Predicates = [IsRV64] in {
   // 32-bit averaging patterns
   def : PatGprGpr<avgfloors, PAADD_W, v2i32>;
   def : PatGprGpr<avgflooru, PAADDU_W, v2i32>;
-  def : PatGprGpr<int_riscv_paadd, PAADD_W, v2i32>;
-  def : PatGprGpr<int_riscv_paaddu, PAADDU_W, v2i32>;
 
   // 32-bit averaging-sub patterns
   def : PatGprGpr<riscv_asub, PASUB_W, v2i32>;
   def : PatGprGpr<riscv_asubu, PASUBU_W, v2i32>;
-  def : PatGprGpr<int_riscv_pasub, PASUB_W, v2i32>;
-  def : PatGprGpr<int_riscv_pasubu, PASUBU_W, v2i32>;
 
   // 32-bit multiply high patterns
   def : PatGprGpr<mulhs, PMULH_W, v2i32>;

sihuan · 2026-06-13T02:28:19Z

I noticed DAGCombiner already folds the full-precision (a +/- b) >> 1 into ISD::AVGFLOORS/ISD::AVGFLOORU (for the add) and RISCVISD::ASUB/ASUBU (for the sub). Given that, should we maybe go a step further and drop the llvm ir intrinsics entirely? Clang could emit the plain (a +/- b) >> 1 directly, with no llvm.riscv.* in the IR.
I tried a header wrapper along these lines:

return __builtin_convertvector(
    (__builtin_convertvector(a, i16x4) + __builtin_convertvector(b, i16x4)) >> 1,
    i8x4);

which produces

%0 = sext <4 x i8> %a to <4 x i16>
%1 = sext <4 x i8> %b to <4 x i16>
%2 = add nsw <4 x i16> %1, %0
%3 = lshr <4 x i16> %2, splat (i16 1)
%4 = trunc <4 x i16> %3 to <4 x i8>

and selects to the expected instructions. As a side benefit, the RV64 widening for the 32-bit vector types would not be needed.

topperc · 2026-06-13T04:42:06Z

I noticed DAGCombiner already folds the full-precision (a +/- b) >> 1 into ISD::AVGFLOORS/ISD::AVGFLOORU (for the add) and RISCVISD::ASUB/ASUBU (for the sub). Given that, should we maybe go a step further and drop the llvm ir intrinsics entirely? Clang could emit the plain (a +/- b) >> 1 directly, with no llvm.riscv.* in the IR. I tried a header wrapper along these lines:
return __builtin_convertvector(
    (__builtin_convertvector(a, i16x4) + __builtin_convertvector(b, i16x4)) >> 1,
    i8x4);
which produces
%0 = sext <4 x i8> %a to <4 x i16>
%1 = sext <4 x i8> %b to <4 x i16>
%2 = add nsw <4 x i16> %1, %0
%3 = lshr <4 x i16> %2, splat (i16 1)
%4 = trunc <4 x i16> %3 to <4 x i8>
and selects to the expected instructions. As a side benefit, the RV64 widening for the 32-bit vector types would not be needed.

If the pattern gets broken by another transform, we won't produce the expected instruction for the intrinsic. That's not a good experience for intrinsic programmers.

sihuan · 2026-06-13T14:25:05Z

That makes sense, thanks — a fragile IR pattern isn't a guarantee we can hand intrinsic users, so keeping the explicit intrinsics is the right tradeoff. LGTM.

[RISCV] Lower the paadd/pasub intrinsics to existing ISD nodes. NFC

e884762

This avoids having 2 different isel patterns for the same operation.

topperc requested review from 4vtomat, lenary and sihuan June 12, 2026 22:33

llvmorg-github-actions Bot added the backend:RISC-V label Jun 12, 2026

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[RISCV] Lower the paadd/pasub intrinsics to existing ISD nodes. NFC#203646

[RISCV] Lower the paadd/pasub intrinsics to existing ISD nodes. NFC#203646
topperc wants to merge 1 commit into
llvm:mainfrom
topperc:pr/paadd

topperc commented Jun 12, 2026

Uh oh!

llvmorg-github-actions Bot commented Jun 12, 2026

Uh oh!

sihuan commented Jun 13, 2026

Uh oh!

topperc commented Jun 13, 2026

Uh oh!

sihuan commented Jun 13, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

Conversation

topperc commented Jun 12, 2026

Uh oh!

llvmorg-github-actions Bot commented Jun 12, 2026

Uh oh!

sihuan commented Jun 13, 2026

Uh oh!

topperc commented Jun 13, 2026

Uh oh!

sihuan commented Jun 13, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants