[AArch64]Fix specification of NEON FP16 scalar CVT to/from fixed point by CarolineConcatto · Pull Request #201425 · llvm/llvm-project

CarolineConcatto · 2026-06-03T18:18:54Z

This patch implements the changes proposed in[1] for SelectionDAG and GlobalISel

[1]ARM-software/acle#435

This patch implements the changes proposed in[1] for SelectionDAG and GlobalISel [1]ARM-software/acle#435

llvmorg-github-actions · 2026-06-03T18:19:37Z

@llvm/pr-subscribers-backend-aarch64

Author: CarolineConcatto

Changes

This patch implements the changes proposed in[1] for SelectionDAG and GlobalISel

[1]ARM-software/acle#435

Full diff: https://github.com/llvm/llvm-project/pull/201425.diff

4 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+30-1)
(modified) llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp (+9)
(modified) llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp (+14-3)
(modified) llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll (+19-38)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 30b621bab41aa..e4e741f80ab2e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -9167,10 +9167,23 @@ def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
 def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
           (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
 
-// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
+def fixedpoint_scalar_xform : SDNodeXForm<timm, [{
+  (void)N;
+  return V;
+}]>;
+def gi_fixedpoint_scalar_xform
+    : GICustomOperandRenderer<"renderFixedPointScalarXForm">,
+      GISDNodeXFormEquiv<fixedpoint_scalar_xform>;
 
+// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
 def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
           (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
+let AddedComplexity = 1 in {
+def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 GPR32:$Rn), vecshiftR16:$imm)),
+          (SCVTFSWHri GPR32:$Rn, (fixedpoint_scalar_xform vecshiftR16:$imm))>;
+def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 GPR64:$Rn), vecshiftR16:$imm)),
+          (SCVTFSXHri GPR64:$Rn, (fixedpoint_scalar_xform vecshiftR16:$imm))>;
+}
 def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
           (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
 def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
@@ -9179,10 +9192,26 @@ def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
             (and FPR32:$Rn, (i32 65535)),
             vecshiftR16:$imm)),
           (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
+let AddedComplexity = 1 in {
+def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp GPR32:$Rn, vecshiftR16:$imm)),
+          (UCVTFSWHri GPR32:$Rn, (fixedpoint_scalar_xform vecshiftR16:$imm))>;
+def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 GPR64:$Rn), vecshiftR16:$imm)),
+          (UCVTFSXHri GPR64:$Rn, (fixedpoint_scalar_xform vecshiftR16:$imm))>;
+}
 def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
           (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
 def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
           (UCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>;
+let AddedComplexity = 1 in {
+def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
+          (FCVTZSSWHri FPR16:$Rn, (fixedpoint_scalar_xform vecshiftR32:$imm))>;
+def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
+          (FCVTZSSXHri FPR16:$Rn, (fixedpoint_scalar_xform vecshiftR64:$imm))>;
+def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)),
+          (FCVTZUSWHri FPR16:$Rn, (fixedpoint_scalar_xform vecshiftR32:$imm))>;
+def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
+          (FCVTZUSXHri FPR16:$Rn, (fixedpoint_scalar_xform vecshiftR64:$imm))>;
+}
 def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
           (i32 (INSERT_SUBREG
             (i32 (IMPLICIT_DEF)),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index cf650fd5c4e72..6ad1a061ddfef 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -488,6 +488,8 @@ class AArch64InstructionSelector : public InstructionSelector {
   ComplexRendererFns
   selectCVTFixedPointVecBase(const MachineOperand &Root,
                              bool isReciprocal = false) const;
+  void renderFixedPointScalarXForm(MachineInstrBuilder &MIB,
+                                   const MachineInstr &MI, int OpIdx) const;
   void renderFixedPointXForm(MachineInstrBuilder &MIB, const MachineInstr &MI,
                              int OpIdx = -1) const;
   void renderFixedPointRecipXForm(MachineInstrBuilder &MIB,
@@ -7941,6 +7943,13 @@ void AArch64InstructionSelector::renderFixedPointXForm(MachineInstrBuilder &MIB,
   (Renderer->front())(MIB);
 }
 
+void AArch64InstructionSelector::renderFixedPointScalarXForm(
+    MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
+  assert(OpIdx == 3 && MI.getOperand(OpIdx).isImm() &&
+         "Expected fixed-point conversion intrinsic immediate");
+  MIB.addImm(MI.getOperand(OpIdx).getImm());
+}
+
 void AArch64InstructionSelector::renderFixedPointRecipXForm(
     MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
   InstructionSelector::ComplexRendererFns Renderer =
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index e814316b0f2ed..088107b5f8594 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -1444,17 +1444,28 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       break;
     }
     case Intrinsic::aarch64_neon_vcvtfxs2fp:
-    case Intrinsic::aarch64_neon_vcvtfxu2fp:
-    case Intrinsic::aarch64_neon_vcvtfp2fxs:
-    case Intrinsic::aarch64_neon_vcvtfp2fxu:
+    case Intrinsic::aarch64_neon_vcvtfxu2fp: {
       // Override these intrinsics, because they would have a partial
       // mapping. This is needed for 'half' types, which otherwise don't
       // get legalised correctly.
       OpRegBankIdx[0] = PMI_FirstFPR;
+      OpRegBankIdx[2] = MRI.getType(MI.getOperand(2).getReg()).isVector()
+                            ? PMI_FirstFPR
+                            : PMI_FirstGPR;
+      // OpRegBankIdx[1] is the intrinsic ID.
+      // OpRegBankIdx[3] is an integer immediate.
+      break;
+    }
+    case Intrinsic::aarch64_neon_vcvtfp2fxs:
+    case Intrinsic::aarch64_neon_vcvtfp2fxu: {
+      OpRegBankIdx[0] = MRI.getType(MI.getOperand(0).getReg()).isVector()
+                            ? PMI_FirstFPR
+                            : PMI_FirstGPR;
       OpRegBankIdx[2] = PMI_FirstFPR;
       // OpRegBankIdx[1] is the intrinsic ID.
       // OpRegBankIdx[3] is an integer immediate.
       break;
+    }
     default: {
       // Check if we know that the intrinsic has any constraints on its register
       // banks. If it does, then update the mapping accordingly.
diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll
index da70599483a63..206d61e9e3b9a 100644
--- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll
+++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll
@@ -196,8 +196,7 @@ define dso_local half @test_vcvth_n_f16_s16_1(i16 %a) {
 ; CHECK-GI-LABEL: test_vcvth_n_f16_s16_1:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    sxth w8, w0
-; CHECK-GI-NEXT:    fmov s0, w8
-; CHECK-GI-NEXT:    scvtf h0, h0, #1
+; CHECK-GI-NEXT:    scvtf h0, w8, #1
 ; CHECK-GI-NEXT:    ret
 entry:
   %sext = sext i16 %a to i32
@@ -215,8 +214,7 @@ define dso_local half @test_vcvth_n_f16_s16_16(i16 %a) {
 ; CHECK-GI-LABEL: test_vcvth_n_f16_s16_16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    sxth w8, w0
-; CHECK-GI-NEXT:    fmov s0, w8
-; CHECK-GI-NEXT:    scvtf h0, h0, #16
+; CHECK-GI-NEXT:    scvtf h0, w8, #16
 ; CHECK-GI-NEXT:    ret
 entry:
   %sext = sext i16 %a to i32
@@ -227,8 +225,7 @@ entry:
 define dso_local half @test_vcvth_n_f16_s32_1(i32 %a) {
 ; CHECK-LABEL: test_vcvth_n_f16_s32_1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fmov s0, w0
-; CHECK-NEXT:    scvtf h0, h0, #1
+; CHECK-NEXT:    scvtf h0, w0, #1
 ; CHECK-NEXT:    ret
 entry:
   %vcvth_n_f16_s32 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 1)
@@ -238,8 +235,7 @@ entry:
 define dso_local half @test_vcvth_n_f16_s32_16(i32 %a) {
 ; CHECK-LABEL: test_vcvth_n_f16_s32_16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fmov s0, w0
-; CHECK-NEXT:    scvtf h0, h0, #16
+; CHECK-NEXT:    scvtf h0, w0, #16
 ; CHECK-NEXT:    ret
 entry:
   %vcvth_n_f16_s32 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 16)
@@ -249,8 +245,7 @@ entry:
 define dso_local i16 @test_vcvth_n_s16_f16_1(half %a) {
 ; CHECK-LABEL: test_vcvth_n_s16_f16_1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs h0, h0, #1
-; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    fcvtzs w0, h0, #1
 ; CHECK-NEXT:    ret
 entry:
   %fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1)
@@ -261,8 +256,7 @@ entry:
 define dso_local i16 @test_vcvth_n_s16_f16_16(half %a) {
 ; CHECK-LABEL: test_vcvth_n_s16_f16_16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs h0, h0, #16
-; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    fcvtzs w0, h0, #16
 ; CHECK-NEXT:    ret
 entry:
   %fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 16)
@@ -273,8 +267,7 @@ entry:
 define dso_local i32 @test_vcvth_n_s32_f16_1(half %a) {
 ; CHECK-LABEL: test_vcvth_n_s32_f16_1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs h0, h0, #1
-; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    fcvtzs w0, h0, #1
 ; CHECK-NEXT:    ret
 entry:
   %vcvth_n_s32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1)
@@ -284,8 +277,7 @@ entry:
 define dso_local i32 @test_vcvth_n_s32_f16_16(half %a) {
 ; CHECK-LABEL: test_vcvth_n_s32_f16_16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs h0, h0, #16
-; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    fcvtzs w0, h0, #16
 ; CHECK-NEXT:    ret
 entry:
   %vcvth_n_s32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 16)
@@ -295,8 +287,7 @@ entry:
 define dso_local i64 @test_vcvth_n_s64_f16_1(half %a) {
 ; CHECK-LABEL: test_vcvth_n_s64_f16_1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs h0, h0, #1
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    fcvtzs x0, h0, #1
 ; CHECK-NEXT:    ret
 entry:
   %vcvth_n_s64_f16 = tail call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 1)
@@ -306,8 +297,7 @@ entry:
 define dso_local i64 @test_vcvth_n_s64_f16_32(half %a) {
 ; CHECK-LABEL: test_vcvth_n_s64_f16_32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs h0, h0, #32
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    fcvtzs x0, h0, #32
 ; CHECK-NEXT:    ret
 entry:
   %vcvth_n_s64_f16 = tail call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 32)
@@ -324,8 +314,7 @@ define dso_local half @test_vcvth_n_f16_u16_1(i16 %a) {
 ; CHECK-GI-LABEL: test_vcvth_n_f16_u16_1:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    and w8, w0, #0xffff
-; CHECK-GI-NEXT:    fmov s0, w8
-; CHECK-GI-NEXT:    ucvtf h0, h0, #1
+; CHECK-GI-NEXT:    ucvtf h0, w8, #1
 ; CHECK-GI-NEXT:    ret
 entry:
   %0 = zext i16 %a to i32
@@ -343,8 +332,7 @@ define dso_local half @test_vcvth_n_f16_u16_16(i16 %a) {
 ; CHECK-GI-LABEL: test_vcvth_n_f16_u16_16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    and w8, w0, #0xffff
-; CHECK-GI-NEXT:    fmov s0, w8
-; CHECK-GI-NEXT:    ucvtf h0, h0, #16
+; CHECK-GI-NEXT:    ucvtf h0, w8, #16
 ; CHECK-GI-NEXT:    ret
 entry:
   %0 = zext i16 %a to i32
@@ -355,8 +343,7 @@ entry:
 define dso_local half @test_vcvth_n_f16_u32_1(i32 %a) {
 ; CHECK-LABEL: test_vcvth_n_f16_u32_1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fmov s0, w0
-; CHECK-NEXT:    ucvtf h0, h0, #1
+; CHECK-NEXT:    ucvtf h0, w0, #1
 ; CHECK-NEXT:    ret
 entry:
   %vcvth_n_f16_u32 = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 1)
@@ -366,8 +353,7 @@ entry:
 define dso_local half @test_vcvth_n_f16_u32_16(i32 %a) {
 ; CHECK-LABEL: test_vcvth_n_f16_u32_16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fmov s0, w0
-; CHECK-NEXT:    ucvtf h0, h0, #16
+; CHECK-NEXT:    ucvtf h0, w0, #16
 ; CHECK-NEXT:    ret
 entry:
   %vcvth_n_f16_u32 = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 16)
@@ -377,8 +363,7 @@ entry:
 define dso_local i16 @test_vcvth_n_u16_f16_1(half %a) {
 ; CHECK-LABEL: test_vcvth_n_u16_f16_1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu h0, h0, #1
-; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    fcvtzu w0, h0, #1
 ; CHECK-NEXT:    ret
 entry:
   %fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1)
@@ -389,8 +374,7 @@ entry:
 define dso_local i16 @test_vcvth_n_u16_f16_16(half %a) {
 ; CHECK-LABEL: test_vcvth_n_u16_f16_16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu h0, h0, #16
-; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    fcvtzu w0, h0, #16
 ; CHECK-NEXT:    ret
 entry:
   %fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 16)
@@ -401,8 +385,7 @@ entry:
 define dso_local i32 @test_vcvth_n_u32_f16_1(half %a) {
 ; CHECK-LABEL: test_vcvth_n_u32_f16_1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu h0, h0, #1
-; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    fcvtzu w0, h0, #1
 ; CHECK-NEXT:    ret
 entry:
   %vcvth_n_u32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1)
@@ -412,8 +395,7 @@ entry:
 define dso_local i32 @test_vcvth_n_u32_f16_16(half %a) {
 ; CHECK-LABEL: test_vcvth_n_u32_f16_16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu h0, h0, #16
-; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    fcvtzu w0, h0, #16
 ; CHECK-NEXT:    ret
 entry:
   %vcvth_n_u32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 16)
@@ -447,8 +429,7 @@ entry:
 define dso_local half @vcvth_n_f16_s64_test(i64 %a) {
 ; CHECK-LABEL: vcvth_n_f16_s64_test:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fmov d0, x0
-; CHECK-NEXT:    scvtf h0, h0, #16
+; CHECK-NEXT:    scvtf h0, x0, #16
 ; CHECK-NEXT:    ret
 entry:
   %vcvth_n_f16_s64 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 16)

CarolineConcatto · 2026-06-03T18:23:47Z

Hello,
I did the changes for ISEL and GlobalISel in the same patch. Is this ok?
I try to follow what was done in PR#178603, because I think it introduces less changes for GlobalISel.

Copilot

Pull request overview

Updates AArch64 codegen to match the revised ACLE specification for NEON FP16 scalar fixed-point conversions, ensuring SelectionDAG and GlobalISel select the correct scalar forms that convert directly between GPRs and FP registers.

Changes:

Update CodeGen checks to expect direct scalar scvtf/ucvtf and fcvtzs/fcvtzu forms (removing intermediate fmov sequences).
Refine GlobalISel register bank mappings for vcvtfx{u,s}2fp and vcvtfp2fx{u,s} so scalar integer operands/results use GPRs while FP operands remain in FPRs.
Add a GlobalISel custom operand renderer and TableGen xform plumbing to pass fixed-point immediates for scalar conversions.

Reviewed changes

Copilot reviewed 4 out of 4 changed files in this pull request and generated 1 comment.

File	Description
llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll	Updates expected assembly to validate direct scalar fixed-point convert instructions for both SDAG and GlobalISel.
llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp	Adjusts intrinsic operand bank assignments to correctly model scalar vs vector register bank requirements.
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp	Adds a dedicated renderer to emit scalar fixed-point immediates for the new patterns.
llvm/lib/Target/AArch64/AArch64InstrInfo.td	Introduces scalar fixed-point xform + patterns selecting direct GPR↔FPR scalar conversion opcodes for FP16.

+    : GICustomOperandRenderer<"renderFixedPointScalarXForm">,
+      GISDNodeXFormEquiv<fixedpoint_scalar_xform>;

+// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.


davemgreen

It looks like vcvth_n_f16_s16 will also generate llvm.aarch64.neon.vcvtfxs2fp.f16.i32. Should that be changed to generate a llvm.aarch64.neon.vcvtfxs2fp.f16.i16 now? It might require some extra legalization to deal with the i16, maybe a bitcast to f16 in SDAG and being treated like a FPR in GISel.

davemgreen · 2026-06-05T07:57:07Z

Can this reuse the existing fixedpoint_vec_xform, maybe with a rename? AFAIU it is just to get the types correct.

I did try that, but I think the scalar intrinsics don't need to use selectCVTFixedPointVecBase as we have for fixedpoint_vec_xform. Having a different render function for the scalar makes the code a bit better.
But I could add an if statement in renderFixedPointXForm, if you think that is better than having another render function.

[AArch64]Fix specification of NEON FP16 scalar CVT to/from fixed point

4c3c68c

This patch implements the changes proposed in[1] for SelectionDAG and GlobalISel [1]ARM-software/acle#435

CarolineConcatto requested review from Lukacma, Copilot and davemgreen June 3, 2026 18:18

llvmorg-github-actions Bot added the backend:AArch64 label Jun 3, 2026

Copilot started reviewing on behalf of CarolineConcatto June 3, 2026 18:19 View session

Copilot AI reviewed Jun 3, 2026

View reviewed changes

Comment thread llvm/lib/Target/AArch64/AArch64InstrInfo.td

: GICustomOperandRenderer<"renderFixedPointScalarXForm">,

GISDNodeXFormEquiv<fixedpoint_scalar_xform>;

// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.

Lukacma linked an issue Jun 4, 2026 that may be closed by this pull request

Miscompilation of vcvth_n intrinsics #197083

Open

davemgreen reviewed Jun 5, 2026

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AArch64]Fix specification of NEON FP16 scalar CVT to/from fixed point#201425

[AArch64]Fix specification of NEON FP16 scalar CVT to/from fixed point#201425
CarolineConcatto wants to merge 1 commit into
llvm:mainfrom
CarolineConcatto:fp16_fix_neon

CarolineConcatto commented Jun 3, 2026

Uh oh!

llvmorg-github-actions Bot commented Jun 3, 2026

Uh oh!

CarolineConcatto commented Jun 3, 2026

Uh oh!

Copilot AI left a comment

Uh oh!

davemgreen left a comment

Uh oh!

davemgreen Jun 5, 2026

Uh oh!

CarolineConcatto Jun 5, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

Conversation

CarolineConcatto commented Jun 3, 2026

Uh oh!

llvmorg-github-actions Bot commented Jun 3, 2026

Uh oh!

CarolineConcatto commented Jun 3, 2026

Uh oh!

Copilot AI left a comment

Choose a reason for hiding this comment

Pull request overview

Reviewed changes

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

davemgreen Jun 5, 2026

Choose a reason for hiding this comment

Uh oh!

CarolineConcatto Jun 5, 2026

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants