AMDGPU: Remove IntrReadMem from memtime/memrealtime intrinsics
EarlyCSE with MemorySSA was able to use this to merge multiple calls with no intervening store. llvm-svn: 354814
This commit is contained in:
parent
84b3288853
commit
f97ace5639
|
@ -1118,7 +1118,7 @@ def int_amdgcn_s_dcache_inv :
|
||||||
|
|
||||||
def int_amdgcn_s_memtime :
|
def int_amdgcn_s_memtime :
|
||||||
GCCBuiltin<"__builtin_amdgcn_s_memtime">,
|
GCCBuiltin<"__builtin_amdgcn_s_memtime">,
|
||||||
Intrinsic<[llvm_i64_ty], [], [IntrReadMem]>;
|
Intrinsic<[llvm_i64_ty], []>;
|
||||||
|
|
||||||
def int_amdgcn_s_sleep :
|
def int_amdgcn_s_sleep :
|
||||||
GCCBuiltin<"__builtin_amdgcn_s_sleep">,
|
GCCBuiltin<"__builtin_amdgcn_s_sleep">,
|
||||||
|
@ -1391,7 +1391,7 @@ def int_amdgcn_s_dcache_wb_vol :
|
||||||
|
|
||||||
def int_amdgcn_s_memrealtime :
|
def int_amdgcn_s_memrealtime :
|
||||||
GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
|
GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
|
||||||
Intrinsic<[llvm_i64_ty], [], [IntrReadMem]>;
|
Intrinsic<[llvm_i64_ty]>;
|
||||||
|
|
||||||
// llvm.amdgcn.ds.permute <index> <src>
|
// llvm.amdgcn.ds.permute <index> <src>
|
||||||
def int_amdgcn_ds_permute :
|
def int_amdgcn_ds_permute :
|
||||||
|
|
|
@ -152,11 +152,19 @@ multiclass SM_Pseudo_Discards<string opName> {
|
||||||
def _SGPR : SM_Discard_Pseudo <opName, (ins SReg_64:$sbase, SReg_32:$offset), 0>;
|
def _SGPR : SM_Discard_Pseudo <opName, (ins SReg_64:$sbase, SReg_32:$offset), 0>;
|
||||||
}
|
}
|
||||||
|
|
||||||
class SM_Time_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
|
class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
|
||||||
opName, (outs SReg_64_XEXEC:$sdst), (ins),
|
opName, (outs SReg_64_XEXEC:$sdst), (ins),
|
||||||
" $sdst", [(set i64:$sdst, (node))]> {
|
" $sdst", [(set i64:$sdst, (node))]> {
|
||||||
let hasSideEffects = 1;
|
let hasSideEffects = 1;
|
||||||
let mayStore = 0;
|
|
||||||
|
// FIXME: This should be definitively mayStore = 0. TableGen
|
||||||
|
// brokenly tries to infer these based on the intrinsic properties
|
||||||
|
// corresponding to the IR attributes. The target intrinsics are
|
||||||
|
// considered as writing to memory for IR dependency purposes, but
|
||||||
|
// those can be modeled with hasSideEffects here. These also end up
|
||||||
|
// inferring differently for llvm.readcyclecounter and the amdgcn
|
||||||
|
// intrinsics.
|
||||||
|
let mayStore = ?;
|
||||||
let mayLoad = 1;
|
let mayLoad = 1;
|
||||||
let has_sbase = 0;
|
let has_sbase = 0;
|
||||||
let has_offset = 0;
|
let has_offset = 0;
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
config.suffixes = ['.ll']
|
||||||
|
|
||||||
|
targets = set(config.root.targets_to_build.split())
|
||||||
|
if not 'AMDGPU' in targets:
|
||||||
|
config.unsupported = True
|
|
@ -0,0 +1,43 @@
|
||||||
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -early-cse-memssa < %s | FileCheck %s
|
||||||
|
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||||
|
|
||||||
|
; CHECK-LABEL: @memrealtime(
|
||||||
|
; CHECK: call i64 @llvm.amdgcn.s.memrealtime()
|
||||||
|
; CHECK: call i64 @llvm.amdgcn.s.memrealtime()
|
||||||
|
define amdgpu_kernel void @memrealtime(i64 %cycles) #0 {
|
||||||
|
entry:
|
||||||
|
%0 = tail call i64 @llvm.amdgcn.s.memrealtime()
|
||||||
|
%cmp3 = icmp sgt i64 %cycles, 0
|
||||||
|
br i1 %cmp3, label %while.body, label %while.end
|
||||||
|
|
||||||
|
while.body:
|
||||||
|
%1 = tail call i64 @llvm.amdgcn.s.memrealtime()
|
||||||
|
%sub = sub nsw i64 %1, %0
|
||||||
|
%cmp = icmp slt i64 %sub, %cycles
|
||||||
|
br i1 %cmp, label %while.body, label %while.end
|
||||||
|
|
||||||
|
while.end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @memtime(
|
||||||
|
; CHECK: call i64 @llvm.amdgcn.s.memtime()
|
||||||
|
; CHECK: call i64 @llvm.amdgcn.s.memtime()
|
||||||
|
define amdgpu_kernel void @memtime(i64 %cycles) #0 {
|
||||||
|
entry:
|
||||||
|
%0 = tail call i64 @llvm.amdgcn.s.memtime()
|
||||||
|
%cmp3 = icmp sgt i64 %cycles, 0
|
||||||
|
br i1 %cmp3, label %while.body, label %while.end
|
||||||
|
|
||||||
|
while.body:
|
||||||
|
%1 = tail call i64 @llvm.amdgcn.s.memtime()
|
||||||
|
%sub = sub nsw i64 %1, %0
|
||||||
|
%cmp = icmp slt i64 %sub, %cycles
|
||||||
|
br i1 %cmp, label %while.body, label %while.end
|
||||||
|
|
||||||
|
while.end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i64 @llvm.amdgcn.s.memrealtime()
|
||||||
|
declare i64 @llvm.amdgcn.s.memtime()
|
Loading…
Reference in New Issue