AMDGPU: Keep track of modifiers when converting v_mac to v_mad
Since v_max_f32_e64/v_max_f16_e64 can be folded if the target instruction supports the clamp bit, we also need to maintain modifiers when converting v_mac to v_mad. This fixes a rendering issue with Dirt Rally because a v_mac instruction with the clamp bit set was converted to a v_mad but that bit was lost during the conversion. Fixes: e184e01dd79 ("AMDGPU: Fold FP clamp as modifier bit") Patch by Samuel Pitoiset <samuel.pitoiset@gmail.com> llvm-svn: 297556
This commit is contained in:
parent
2b38ed7b15
commit
3cb9ff8863
|
@ -1766,20 +1766,26 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
|
|||
|
||||
const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
|
||||
const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
|
||||
const MachineOperand *Src0Mods =
|
||||
getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
|
||||
const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
|
||||
const MachineOperand *Src1Mods =
|
||||
getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
|
||||
const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
|
||||
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
|
||||
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
|
||||
|
||||
return BuildMI(*MBB, MI, MI.getDebugLoc(),
|
||||
get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
|
||||
.add(*Dst)
|
||||
.addImm(0) // Src0 mods
|
||||
.addImm(Src0Mods ? Src0Mods->getImm() : 0)
|
||||
.add(*Src0)
|
||||
.addImm(0) // Src1 mods
|
||||
.addImm(Src1Mods ? Src1Mods->getImm() : 0)
|
||||
.add(*Src1)
|
||||
.addImm(0) // Src mods
|
||||
.add(*Src2)
|
||||
.addImm(0) // clamp
|
||||
.addImm(0); // omod
|
||||
.addImm(Clamp ? Clamp->getImm() : 0)
|
||||
.addImm(Omod ? Omod->getImm() : 0);
|
||||
}
|
||||
|
||||
// It's not generally safe to move VALU instructions across these since it will
|
||||
|
|
|
@ -168,6 +168,23 @@ define amdgpu_kernel void @v_clamp_add_src_f64(double addrspace(1)* %out, double
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_clamp_mac_to_mad:
|
||||
; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]}} clamp{{$}}
|
||||
define amdgpu_kernel void @v_clamp_mac_to_mad(float addrspace(1)* %out, float addrspace(1)* %aptr, float %a) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%b = load float, float addrspace(1)* %gep0
|
||||
|
||||
%mul = fmul float %a, %a
|
||||
%add = fadd float %mul, %b
|
||||
%max = call float @llvm.maxnum.f32(float %add, float 0.0)
|
||||
%clamp = call float @llvm.minnum.f32(float %max, float 1.0)
|
||||
%res = fadd float %clamp, %b
|
||||
store float %res, float addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
declare float @llvm.floor.f32(float) #1
|
||||
|
|
|
@ -250,6 +250,17 @@ define amdgpu_ps void @v_omod_div2_f16_no_denormals(half %a) #3 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_omod_mac_to_mad:
|
||||
; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} mul:2{{$}}
|
||||
define amdgpu_ps void @v_omod_mac_to_mad(float %b, float %a) #0 {
|
||||
%mul = fmul float %a, %a
|
||||
%add = fadd float %mul, %b
|
||||
%mad = fmul float %add, 2.0
|
||||
%res = fmul float %mad, %b
|
||||
store float %res, float addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
declare float @llvm.floor.f32(float) #1
|
||||
|
|
Loading…
Reference in New Issue