diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 0c4ffd93dbf7..5b0640c7237a 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -688,6 +688,34 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, UnwindDest->removePredecessor(InvokeBB); } +/// When inlining a call site that has !llvm.mem.parallel_loop_access metadata, +/// that metadata should be propagated to all memory-accessing cloned +/// instructions. +static void PropagateParallelLoopAccessMetadata(CallSite CS, + ValueToValueMapTy &VMap) { + MDNode *M = + CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access); + if (!M) + return; + + for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); + VMI != VMIE; ++VMI) { + if (!VMI->second) + continue; + + Instruction *NI = dyn_cast(VMI->second); + if (!NI) + continue; + + if (MDNode *PM = NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) { + M = MDNode::concatenate(PM, M); + NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); + } else if (NI->mayReadOrWriteMemory()) { + NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); + } + } +} + /// When inlining a function that contains noalias scope metadata, /// this metadata needs to be cloned so that the inlined blocks /// have different "unqiue scopes" at every call site. Were this not done, then @@ -1574,6 +1602,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Add noalias metadata if necessary. AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); + // Propagate llvm.mem.parallel_loop_access if necessary. + PropagateParallelLoopAccessMetadata(CS, VMap); + // FIXME: We could register any cloned assumptions instead of clearing the // whole function's cache. if (IFI.ACT) diff --git a/llvm/test/Transforms/Inline/parallel-loop-md.ll b/llvm/test/Transforms/Inline/parallel-loop-md.ll new file mode 100644 index 000000000000..43a44feb247e --- /dev/null +++ b/llvm/test/Transforms/Inline/parallel-loop-md.ll @@ -0,0 +1,57 @@ +; RUN: opt -S -inline < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse nounwind uwtable +define void @Body(i32* nocapture %res, i32* nocapture readnone %c, i32* nocapture readonly %d, i32* nocapture readonly %p, i32 %i) #0 { +entry: + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %cmp = icmp eq i32 %0, 0 + %arrayidx2 = getelementptr inbounds i32, i32* %res, i64 %idxprom + %1 = load i32, i32* %arrayidx2, align 4 + br i1 %cmp, label %cond.end, label %cond.false + +cond.false: ; preds = %entry + %arrayidx6 = getelementptr inbounds i32, i32* %d, i64 %idxprom + %2 = load i32, i32* %arrayidx6, align 4 + %add = add nsw i32 %2, %1 + br label %cond.end + +cond.end: ; preds = %entry, %cond.false + %cond = phi i32 [ %add, %cond.false ], [ %1, %entry ] + store i32 %cond, i32* %arrayidx2, align 4 + ret void +} + +; Function Attrs: nounwind uwtable +define void @Test(i32* %res, i32* %c, i32* %d, i32* %p, i32 %n) #1 { +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, 1600 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + call void @Body(i32* %res, i32* undef, i32* %d, i32* %p, i32 %i.0), !llvm.mem.parallel_loop_access !0 + %inc = add nsw i32 %i.0, 1 + br label %for.cond, !llvm.loop !0 + +for.end: ; preds = %for.cond + ret void +} + +; CHECK-LABEL: @Test +; CHECK: load i32,{{.*}}, !llvm.mem.parallel_loop_access !0 +; CHECK: load i32,{{.*}}, !llvm.mem.parallel_loop_access !0 +; CHECK: load i32,{{.*}}, !llvm.mem.parallel_loop_access !0 +; CHECK: store i32{{.*}}, !llvm.mem.parallel_loop_access !0 +; CHECK: br label %for.cond, !llvm.loop !0 + +attributes #0 = { norecurse nounwind uwtable } + +!0 = distinct !{!0} +