AMDGPU: Fix V_FMA_F16 selection on GFX9

GFX9 should select opsel version. Differential Revision: https://reviews.llvm.org/D54545 llvm-svn: 347265
2018-11-19 21:10:16 +00:00 · 2018-11-19 21:10:16 +00:00 · 700b1ef54d
parent 70c4858892
commit 700b1ef54d
3 changed files with 17 additions and 11 deletions
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@ -438,13 +438,20 @@ def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
  let Predicates = [Has16BitInsts, isGFX9];
 }

+def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma> {
+  let Predicates = [Has16BitInsts, isVIOnly];
+}
+def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, fma> {
+  let renamedInGFX9 = 1;
+  let Predicates = [Has16BitInsts, isGFX9];
+}
+
 let SubtargetPredicate = Has16BitInsts, isCommutable = 1 in {

 let renamedInGFX9 = 1 in {
 def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
 def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
 def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
-def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma>;
 def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>>;
 }

@ -452,7 +459,6 @@ let SubtargetPredicate = isGFX9 in {
 def V_MAD_F16_gfx9   : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
 def V_MAD_U16_gfx9   : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
 def V_MAD_I16_gfx9   : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
-def V_FMA_F16_gfx9   : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
 def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
 } // End SubtargetPredicate = isGFX9

--- a/llvm/test/CodeGen/AMDGPU/fdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdot2.ll
@ -8,16 +8,16 @@
 ; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions
 ; are not converted from f16 to f32.
 ; GCN-LABEL: {{^}}dotproduct_f16
-; GFX900: v_fma_legacy_f16
-; GFX900: v_fma_legacy_f16
+; GFX900: v_fma_f16
+; GFX900: v_fma_f16

 ; GFX906: v_mul_f16_e32
 ; GFX906: v_mul_f16_e32

-; GFX906-UNSAFE:  v_fma_legacy_f16
+; GFX906-UNSAFE:  v_fma_f16

 ; GFX906-CONTRACT: v_mac_f16_e32
-; GFX906-DENORM-CONTRACT: v_fma_legacy_f16
+; GFX906-DENORM-CONTRACT: v_fma_f16
 define amdgpu_kernel void @dotproduct_f16(<2 x half> addrspace(1)* %src1,
                                          <2 x half> addrspace(1)* %src2,
                                          half addrspace(1)* nocapture %dst) {
--- a/llvm/test/CodeGen/AMDGPU/fpext-free.ll
+++ b/llvm/test/CodeGen/AMDGPU/fpext-free.ll
@ -171,7 +171,7 @@ entry:

 ; GCN-LABEL: {{^}}fadd_fpext_fmuladd_f16_to_f32:
 ; GFX9: v_mul_f16
-; GFX9: v_fma_legacy_f16
+; GFX9: v_fma_f16
 ; GFX9: v_cvt_f32_f16
 ; GFX9: v_add_f32_e32
 define float @fadd_fpext_fmuladd_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
@ -185,7 +185,7 @@ entry:

 ; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32:
 ; GFX9: v_mul_f16
-; GFX9: v_fma_legacy_f16
+; GFX9: v_fma_f16
 ; GFX9: v_cvt_f32_f16
 ; GFX9: v_add_f32_e32
 define float @fadd_fpext_fma_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
@ -199,7 +199,7 @@ entry:

 ; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32_commute:
 ; GFX9: v_mul_f16
-; GFX9: v_fma_legacy_f16
+; GFX9: v_fma_f16
 ; GFX9: v_cvt_f32_f16
 ; GFX9: v_add_f32_e32
 define float @fadd_fpext_fma_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 {
@ -322,7 +322,7 @@ entry:

 ; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32:
 ; GFX9: v_mul_f16
-; GFX9: v_fma_legacy_f16
+; GFX9: v_fma_f16
 ; GFX9: v_cvt_f32_f16
 ; GFX9: v_sub_f32
 ; GCN: s_setpc_b64
@ -363,7 +363,7 @@ entry:
 ; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32_commute:
 ; GCN: s_waitcnt
 ; GFX9-NEXT: v_mul_f16_e32 v3, v3, v4
-; GFX9-NEXT: v_fma_legacy_f16 v1, v1, v2, v3
+; GFX9-NEXT: v_fma_f16 v1, v1, v2, v3
 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
 ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1
 ; GFX9-NEXT: s_setpc_b64