llvm.memory.barrier, and impl for x86 and alpha

llvm-svn: 47204
This commit is contained in:
Andrew Lenharth 2008-02-16 01:24:58 +00:00
parent 27055194b7
commit 9b254eed32
17 changed files with 256 additions and 1 deletions

View File

@ -204,6 +204,11 @@
<li><a href="#int_it">'<tt>llvm.init.trampoline</tt>' Intrinsic</a></li>
</ol>
</li>
<li><a href="#int_atomics">Atomic intrinsics</a>
<ol>
<li><a href="#int_memory_barrier"><tt>llvm.memory_barrier</tt></li>
</ol>
</li>
<li><a href="#int_general">General intrinsics</a>
<ol>
<li><a href="#int_var_annotation">
@ -5232,6 +5237,107 @@ declare i8* @llvm.init.trampoline(i8* &lt;tramp&gt;, i8* &lt;func&gt;, i8* &lt;n
</p>
</div>
<!-- ======================================================================= -->
<div class="doc_subsection">
<a name="int_atomics">Atomic Operations and Synchronization Intrinsics</a>
</div>
<div class="doc_text">
<p>
These intrinsic functions expand the "universal IR" of LLVM to represent
hardware constructs for atomic operations and memory synchronization. This
provides an interface to the hardware, not an interface to the programmer. It
is aimed at a low enough level to allow any programming models or APIs which
need atomic behaviors to map cleanly onto it. It is also modeled primarily on
hardware behavior. Just as hardware provides a "universal IR" for source
languages, it also provides a starting point for developing a "universal"
atomic operation and synchronization IR.
</p>
<p>
These do <em>not</em> form an API such as high-level threading libraries,
software transaction memory systems, atomic primitives, and intrinsic
functions as found in BSD, GNU libc, atomic_ops, APR, and other system and
application libraries. The hardware interface provided by LLVM should allow
a clean implementation of all of these APIs and parallel programming models.
No one model or paradigm should be selected above others unless the hardware
itself ubiquitously does so.
</p>
</div>
<!-- _______________________________________________________________________ -->
<div class="doc_subsubsection">
<a name="int_memory_barrier">'<tt>llvm.memory.barrier</tt>' Intrinsic</a>
</div>
<div class="doc_text">
<h5>Syntax:</h5>
<pre>
declare void @llvm.memory.barrier( i1 &lt;ll&gt;, i1 &lt;ls&gt;, i1 &lt;sl&gt;, i1 &lt;ss&gt;,
i1 &lt;device&gt; )
</pre>
<h5>Overview:</h5>
<p>
The <tt>llvm.memory.barrier</tt> intrinsic guarantees ordering between
specific pairs of memory access types.
</p>
<h5>Arguments:</h5>
<p>
The <tt>llvm.memory.barrier</tt> intrinsic requires five boolean arguments.
The first four arguments enables a specific barrier as listed below. The fith
argument specifies that the barrier applies to io or device or uncached memory.
</p>
<ul>
<li><tt>ll</tt>: load-load barrier</li>
<li><tt>ls</tt>: load-store barrier</li>
<li><tt>sl</tt>: store-load barrier</li>
<li><tt>ss</tt>: store-store barrier</li>
<li><tt>device</tt>: barrier applies to device and uncached memory also.
</ul>
<h5>Semantics:</h5>
<p>
This intrinsic causes the system to enforce some ordering constraints upon
the loads and stores of the program. This barrier does not indicate
<em>when</em> any events will occur, it only enforces an <em>order</em> in
which they occur. For any of the specified pairs of load and store operations
(f.ex. load-load, or store-load), all of the first operations preceding the
barrier will complete before any of the second operations succeeding the
barrier begin. Specifically the semantics for each pairing is as follows:
</p>
<ul>
<li><tt>ll</tt>: All loads before the barrier must complete before any load
after the barrier begins.</li>
<li><tt>ls</tt>: All loads before the barrier must complete before any
store after the barrier begins.</li>
<li><tt>ss</tt>: All stores before the barrier must complete before any
store after the barrier begins.</li>
<li><tt>sl</tt>: All stores before the barrier must complete before any
load after the barrier begins.</li>
</ul>
<p>
These semantics are applied with a logical "and" behavior when more than one
is enabled in a single memory barrier intrinsic.
</p>
<p>
Backends may implement stronger barriers than those requested when they do not
support as fine grained a barrier as requested. Some architectures do not
need all types of barriers and on such architectures, these become noops.
</p>
<h5>Example:</h5>
<pre>
%ptr = malloc i32
store i32 4, %ptr
%result1 = load i32* %ptr <i>; yields {i32}:result1 = 4</i>
call void @llvm.memory.barrier( i1 false, i1 true, i1 false, i1 false )
<i>; guarantee the above finishes</i>
store i32 8, %ptr <i>; before this begins</i>
</pre>
</div>
<!-- ======================================================================= -->
<div class="doc_subsection">
<a name="int_general">General Intrinsics</a>

View File

@ -589,6 +589,14 @@ namespace ISD {
// TRAP - Trapping instruction
TRAP,
// OUTCHAIN = MEMBARRIER(INCHAIN, load-load, load-store, store-load,
// store-store, device)
// This corresponds to the atomic.barrier intrinsic.
// it takes an input chain, 4 operands to specify the type of barrier, an
// operand specifying if the barrier applies to device and uncached memory
// and produces an output chain.
MEMBARRIER,
// BUILTIN_OP_END - This must be the last enum value in this list.
BUILTIN_OP_END
};

View File

@ -262,6 +262,11 @@ def int_init_trampoline : Intrinsic<[llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty,
llvm_ptr_ty], []>,
GCCBuiltin<"__builtin_init_trampoline">;
//===------------------------- Atomic Intrinsics --------------------------===//
//
def int_memory_barrier : Intrinsic<[llvm_void_ty, llvm_i1_ty, llvm_i1_ty,
llvm_i1_ty, llvm_i1_ty, llvm_i1_ty], []>;
//===-------------------------- Other Intrinsics --------------------------===//
//
def int_flt_rounds : Intrinsic<[llvm_i32_ty]>,

View File

@ -1133,6 +1133,16 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
}
break;
case ISD::MEMBARRIER: {
assert(Node->getNumOperands() == 6 && "Invalid MemBarrier node!");
SDOperand Ops[6];
Ops[0] = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
for (int x = 1; x < 6; ++x)
Ops[x] = PromoteOp(Node->getOperand(x));
Result = DAG.UpdateNodeOperands(Result, &Ops[0], 6);
break;
}
case ISD::Constant: {
ConstantSDNode *CN = cast<ConstantSDNode>(Node);
unsigned opAction =

View File

@ -3792,6 +3792,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
return "<<Unknown Target Node>>";
}
case ISD::MEMBARRIER: return "MemBarrier";
case ISD::PCMARKER: return "PCMarker";
case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
case ISD::SRCVALUE: return "SrcValue";

View File

@ -3046,6 +3046,15 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
DAG.setRoot(DAG.getNode(ISD::TRAP, MVT::Other, getRoot()));
return 0;
}
case Intrinsic::memory_barrier: {
SDOperand Ops[6];
Ops[0] = getRoot();
for (int x = 1; x < 6; ++x)
Ops[x] = getValue(I.getOperand(x));
DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, MVT::Other, &Ops[0], 6));
return 0;
}
}
}

View File

@ -62,6 +62,14 @@ class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
let Inst{20-16} = 0;
let Inst{15-0} = fc;
}
class MfcPForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
: InstAlpha<opcode, asmstr, itin> {
let OutOperandList = (ops);
let InOperandList = (ops);
let Inst{25-21} = 0;
let Inst{20-16} = 0;
let Inst{15-0} = fc;
}
class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass itin>
: InstAlpha<opcode, asmstr, itin> {

View File

@ -568,8 +568,14 @@ def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal",
def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB),
(LDQl texternalsym:$ext, GPRC:$RB)>;
def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter
def MB : MfcPForm<0x18, 0x4000, "mb", s_imisc>; //memory barrier
def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier
def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 1), (i64 imm:$dev)),
(WMB)>;
def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 imm:$ss), (i64 imm:$dev)),
(MB)>;
//Basic Floating point ops
@ -959,6 +965,7 @@ def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),
//S_floating : IEEE Single
//T_floating : IEEE Double
//Unused instructions
//Mnemonic Format Opcode Description
//CALL_PAL Pcd 00 Trap to PALcode

View File

@ -185,6 +185,11 @@ def SDTVecInsert : SDTypeProfile<1, 3, [ // vector insert
SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3>
]>;
def STDMemBarrier : SDTypeProfile<0, 5, [
SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisSameAs<0,4>,
SDTCisInt<0>
]>;
class SDCallSeqStart<list<SDTypeConstraint> constraints> :
SDTypeProfile<0, 1, constraints>;
class SDCallSeqEnd<list<SDTypeConstraint> constraints> :
@ -329,6 +334,8 @@ def br : SDNode<"ISD::BR" , SDTBr, [SDNPHasChain]>;
def ret : SDNode<"ISD::RET" , SDTNone, [SDNPHasChain]>;
def trap : SDNode<"ISD::TRAP" , SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
def membarrier : SDNode<"ISD::MEMBARRIER" , STDMemBarrier,
[SDNPHasChain, SDNPSideEffect]>;
// Do not use ld, st directly. Use load, extload, sextload, zextload, store,
// and truncst (see below).

View File

@ -1509,3 +1509,10 @@ void test(double *P) {
}
//===---------------------------------------------------------------------===//
handling llvm.memory.barrier on pre SSE2 cpus
should generate:
lock ; mov %esp, %esp
//===---------------------------------------------------------------------===//

View File

@ -2149,6 +2149,14 @@ def LFENCE : I<0xAE, MRM5m, (outs), (ins),
def MFENCE : I<0xAE, MRM6m, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
//TODO: custom lower this so as to never even generate the noop
def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
(i8 0)), (NOOP)>;
def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
(i8 1)), (MFENCE)>;
// Alias instructions that map zero vector to pxor / xorp* for sse.
let isReMaterializable = 1 in
def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),

View File

@ -0,0 +1,8 @@
; RUN: llvm-as < %s | llc -march=alpha | grep mb
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
define void @test() {
call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 true)
ret void
}

View File

@ -0,0 +1,8 @@
; RUN: llvm-as < %s | llc -march=alpha | grep wmb
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
define void @test() {
call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true , i1 true)
ret void
}

View File

@ -0,0 +1,8 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep lfence
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
define void @test() {
call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 false, i1 true)
ret void
}

View File

@ -0,0 +1,20 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep sfence
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep lfence
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mfence
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
define void @test() {
call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 true)
call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 false, i1 true)
call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true, i1 true)
call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 false, i1 true)
call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 true, i1 true)
call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 true, i1 true)
call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 true)
call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 true)
ret void
}

View File

@ -0,0 +1,27 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep fence
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
define void @test() {
call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 false, i1 false)
call void @llvm.memory.barrier( i1 false, i1 true, i1 false, i1 false, i1 false)
call void @llvm.memory.barrier( i1 false, i1 false, i1 true, i1 false, i1 false)
call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false)
call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false)
call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 false, i1 false)
call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true, i1 false)
call void @llvm.memory.barrier( i1 false, i1 true, i1 true, i1 false, i1 false)
call void @llvm.memory.barrier( i1 false, i1 true, i1 false, i1 true, i1 false)
call void @llvm.memory.barrier( i1 false, i1 false, i1 true, i1 true, i1 false)
call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 false, i1 false)
call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 true, i1 false)
call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 true, i1 false)
call void @llvm.memory.barrier( i1 false, i1 true, i1 true, i1 true, i1 false)
call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 false)
call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 false)
ret void
}

View File

@ -0,0 +1,8 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep sfence
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
define void @test() {
call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true)
ret void
}