AMDGPU: Fix V_FMA_F16 selection on GFX9

GFX9 should select opsel version.

Differential Revision: https://reviews.llvm.org/D54545

llvm-svn: 347265
This commit is contained in:
Konstantin Zhuravlyov 2018-11-19 21:10:16 +00:00
parent 70c4858892
commit 700b1ef54d
3 changed files with 17 additions and 11 deletions

View File

@ -438,13 +438,20 @@ def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
let Predicates = [Has16BitInsts, isGFX9];
}
def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma> {
let Predicates = [Has16BitInsts, isVIOnly];
}
def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, fma> {
let renamedInGFX9 = 1;
let Predicates = [Has16BitInsts, isGFX9];
}
let SubtargetPredicate = Has16BitInsts, isCommutable = 1 in {
let renamedInGFX9 = 1 in {
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma>;
def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>>;
}
@ -452,7 +459,6 @@ let SubtargetPredicate = isGFX9 in {
def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
} // End SubtargetPredicate = isGFX9

View File

@ -8,16 +8,16 @@
; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions
; are not converted from f16 to f32.
; GCN-LABEL: {{^}}dotproduct_f16
; GFX900: v_fma_legacy_f16
; GFX900: v_fma_legacy_f16
; GFX900: v_fma_f16
; GFX900: v_fma_f16
; GFX906: v_mul_f16_e32
; GFX906: v_mul_f16_e32
; GFX906-UNSAFE: v_fma_legacy_f16
; GFX906-UNSAFE: v_fma_f16
; GFX906-CONTRACT: v_mac_f16_e32
; GFX906-DENORM-CONTRACT: v_fma_legacy_f16
; GFX906-DENORM-CONTRACT: v_fma_f16
define amdgpu_kernel void @dotproduct_f16(<2 x half> addrspace(1)* %src1,
<2 x half> addrspace(1)* %src2,
half addrspace(1)* nocapture %dst) {

View File

@ -171,7 +171,7 @@ entry:
; GCN-LABEL: {{^}}fadd_fpext_fmuladd_f16_to_f32:
; GFX9: v_mul_f16
; GFX9: v_fma_legacy_f16
; GFX9: v_fma_f16
; GFX9: v_cvt_f32_f16
; GFX9: v_add_f32_e32
define float @fadd_fpext_fmuladd_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
@ -185,7 +185,7 @@ entry:
; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32:
; GFX9: v_mul_f16
; GFX9: v_fma_legacy_f16
; GFX9: v_fma_f16
; GFX9: v_cvt_f32_f16
; GFX9: v_add_f32_e32
define float @fadd_fpext_fma_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
@ -199,7 +199,7 @@ entry:
; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32_commute:
; GFX9: v_mul_f16
; GFX9: v_fma_legacy_f16
; GFX9: v_fma_f16
; GFX9: v_cvt_f32_f16
; GFX9: v_add_f32_e32
define float @fadd_fpext_fma_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 {
@ -322,7 +322,7 @@ entry:
; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32:
; GFX9: v_mul_f16
; GFX9: v_fma_legacy_f16
; GFX9: v_fma_f16
; GFX9: v_cvt_f32_f16
; GFX9: v_sub_f32
; GCN: s_setpc_b64
@ -363,7 +363,7 @@ entry:
; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32_commute:
; GCN: s_waitcnt
; GFX9-NEXT: v_mul_f16_e32 v3, v3, v4
; GFX9-NEXT: v_fma_legacy_f16 v1, v1, v2, v3
; GFX9-NEXT: v_fma_f16 v1, v1, v2, v3
; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64