[AMDGPU] Eliminate barrier if workgroup size is not greater than wavefront size
If a workgroup size is known to be not greater than wavefront size the s_barrier instruction is not needed since all threads are guarantied to come to the same point at the same time. Differential Revision: https://reviews.llvm.org/D31731 llvm-svn: 299659
This commit is contained in:
parent
3fc1225c18
commit
ea57c38521
|
@ -3159,6 +3159,17 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||||
SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
|
SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
|
||||||
return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
|
return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
|
||||||
}
|
}
|
||||||
|
case Intrinsic::amdgcn_s_barrier: {
|
||||||
|
if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
|
||||||
|
const MachineFunction &MF = DAG.getMachineFunction();
|
||||||
|
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||||
|
unsigned WGSize = ST.getFlatWorkGroupSizes(*MF.getFunction()).second;
|
||||||
|
if (WGSize <= ST.getWavefrontSize())
|
||||||
|
return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other,
|
||||||
|
Op.getOperand(0)), 0);
|
||||||
|
}
|
||||||
|
return SDValue();
|
||||||
|
};
|
||||||
default:
|
default:
|
||||||
return Op;
|
return Op;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
; RUN: llc -march=amdgcn < %s | FileCheck %s
|
||||||
|
|
||||||
|
; CHECK-LABEL: {{^}}unknown_wgs:
|
||||||
|
; CHECK: s_barrier
|
||||||
|
define amdgpu_kernel void @unknown_wgs() {
|
||||||
|
tail call void @llvm.amdgcn.s.barrier() #0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: {{^}}flat_wgs_attr_32_128:
|
||||||
|
; CHECK: s_barrier
|
||||||
|
define amdgpu_kernel void @flat_wgs_attr_32_128() #1 {
|
||||||
|
tail call void @llvm.amdgcn.s.barrier() #0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: {{^}}flat_wgs_attr_32_64:
|
||||||
|
; CHECK: :
|
||||||
|
; CHECK-NEXT: ; wave barrier
|
||||||
|
; CHECK-NEXT: s_endpgm
|
||||||
|
define amdgpu_kernel void @flat_wgs_attr_32_64() #2 {
|
||||||
|
tail call void @llvm.amdgcn.s.barrier() #0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.amdgcn.s.barrier() #0
|
||||||
|
|
||||||
|
attributes #0 = { convergent nounwind }
|
||||||
|
attributes #1 = { nounwind "amdgpu-flat-work-group-size"="32,128" }
|
||||||
|
attributes #2 = { nounwind "amdgpu-flat-work-group-size"="32,64" }
|
|
@ -121,4 +121,4 @@ define amdgpu_kernel void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* n
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { convergent nounwind }
|
attributes #0 = { convergent nounwind }
|
||||||
attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,64" }
|
attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,128" }
|
||||||
|
|
Loading…
Reference in New Issue