AMDGPU: Fix V_FMA_F16 selection on GFX9
GFX9 should select opsel version. Differential Revision: https://reviews.llvm.org/D54545 llvm-svn: 347265
This commit is contained in:
parent
70c4858892
commit
700b1ef54d
|
@ -438,13 +438,20 @@ def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
|
|||
let Predicates = [Has16BitInsts, isGFX9];
|
||||
}
|
||||
|
||||
def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma> {
|
||||
let Predicates = [Has16BitInsts, isVIOnly];
|
||||
}
|
||||
def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, fma> {
|
||||
let renamedInGFX9 = 1;
|
||||
let Predicates = [Has16BitInsts, isGFX9];
|
||||
}
|
||||
|
||||
let SubtargetPredicate = Has16BitInsts, isCommutable = 1 in {
|
||||
|
||||
let renamedInGFX9 = 1 in {
|
||||
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
|
||||
def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
|
||||
def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
|
||||
def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma>;
|
||||
def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>>;
|
||||
}
|
||||
|
||||
|
@ -452,7 +459,6 @@ let SubtargetPredicate = isGFX9 in {
|
|||
def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
|
||||
def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
|
||||
def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
|
||||
def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
|
||||
def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
|
||||
} // End SubtargetPredicate = isGFX9
|
||||
|
||||
|
|
|
@ -8,16 +8,16 @@
|
|||
; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions
|
||||
; are not converted from f16 to f32.
|
||||
; GCN-LABEL: {{^}}dotproduct_f16
|
||||
; GFX900: v_fma_legacy_f16
|
||||
; GFX900: v_fma_legacy_f16
|
||||
; GFX900: v_fma_f16
|
||||
; GFX900: v_fma_f16
|
||||
|
||||
; GFX906: v_mul_f16_e32
|
||||
; GFX906: v_mul_f16_e32
|
||||
|
||||
; GFX906-UNSAFE: v_fma_legacy_f16
|
||||
; GFX906-UNSAFE: v_fma_f16
|
||||
|
||||
; GFX906-CONTRACT: v_mac_f16_e32
|
||||
; GFX906-DENORM-CONTRACT: v_fma_legacy_f16
|
||||
; GFX906-DENORM-CONTRACT: v_fma_f16
|
||||
define amdgpu_kernel void @dotproduct_f16(<2 x half> addrspace(1)* %src1,
|
||||
<2 x half> addrspace(1)* %src2,
|
||||
half addrspace(1)* nocapture %dst) {
|
||||
|
|
|
@ -171,7 +171,7 @@ entry:
|
|||
|
||||
; GCN-LABEL: {{^}}fadd_fpext_fmuladd_f16_to_f32:
|
||||
; GFX9: v_mul_f16
|
||||
; GFX9: v_fma_legacy_f16
|
||||
; GFX9: v_fma_f16
|
||||
; GFX9: v_cvt_f32_f16
|
||||
; GFX9: v_add_f32_e32
|
||||
define float @fadd_fpext_fmuladd_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
|
||||
|
@ -185,7 +185,7 @@ entry:
|
|||
|
||||
; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32:
|
||||
; GFX9: v_mul_f16
|
||||
; GFX9: v_fma_legacy_f16
|
||||
; GFX9: v_fma_f16
|
||||
; GFX9: v_cvt_f32_f16
|
||||
; GFX9: v_add_f32_e32
|
||||
define float @fadd_fpext_fma_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
|
||||
|
@ -199,7 +199,7 @@ entry:
|
|||
|
||||
; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32_commute:
|
||||
; GFX9: v_mul_f16
|
||||
; GFX9: v_fma_legacy_f16
|
||||
; GFX9: v_fma_f16
|
||||
; GFX9: v_cvt_f32_f16
|
||||
; GFX9: v_add_f32_e32
|
||||
define float @fadd_fpext_fma_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 {
|
||||
|
@ -322,7 +322,7 @@ entry:
|
|||
|
||||
; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32:
|
||||
; GFX9: v_mul_f16
|
||||
; GFX9: v_fma_legacy_f16
|
||||
; GFX9: v_fma_f16
|
||||
; GFX9: v_cvt_f32_f16
|
||||
; GFX9: v_sub_f32
|
||||
; GCN: s_setpc_b64
|
||||
|
@ -363,7 +363,7 @@ entry:
|
|||
; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32_commute:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: v_mul_f16_e32 v3, v3, v4
|
||||
; GFX9-NEXT: v_fma_legacy_f16 v1, v1, v2, v3
|
||||
; GFX9-NEXT: v_fma_f16 v1, v1, v2, v3
|
||||
; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
|
||||
; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
|
|
Loading…
Reference in New Issue