[OPENMP] Initial codegen for 'omp task' directive.

The task region is emmitted in several steps:

Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
    TaskFunction(gtid, tt->part_id, tt->shareds);
      return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560

llvm-svn: 231762
This commit is contained in:
Alexey Bataev 2015-03-10 07:28:44 +00:00
parent 58364dc4da
commit 62b63b197d
5 changed files with 498 additions and 19 deletions

View File

@ -42,7 +42,8 @@ public:
virtual const VarDecl *getThreadIDVariable() const = 0; virtual const VarDecl *getThreadIDVariable() const = 0;
/// \brief Get an LValue for the current ThreadID variable. /// \brief Get an LValue for the current ThreadID variable.
LValue getThreadIDVariableLValue(CodeGenFunction &CGF); /// \return LValue for thread id variable. This LValue always has type int32*.
virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
/// \brief Emit the captured statement body. /// \brief Emit the captured statement body.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
@ -77,6 +78,41 @@ private:
const VarDecl *ThreadIDVar; const VarDecl *ThreadIDVar;
}; };
/// \brief API for captured statement code generation in OpenMP constructs.
class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
public:
CGOpenMPTaskOutlinedRegionInfo(const OMPExecutableDirective &D,
const CapturedStmt &CS,
const VarDecl *ThreadIDVar,
const VarDecl *PartIDVar)
: CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar),
PartIDVar(PartIDVar) {
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
}
/// \brief Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
virtual const VarDecl *getThreadIDVariable() const override {
return ThreadIDVar;
}
/// \brief Get an LValue for the current ThreadID variable.
virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
/// \brief Emit the captured statement body.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
/// \brief Get the name of the capture helper.
StringRef getHelperName() const override { return ".omp_outlined."; }
private:
/// \brief A variable or parameter storing global thread id for OpenMP
/// constructs.
const VarDecl *ThreadIDVar;
/// \brief A variable or parameter storing part id for OpenMP tasking
/// constructs.
const VarDecl *PartIDVar;
};
/// \brief API for inlined captured statement code generation in OpenMP /// \brief API for inlined captured statement code generation in OpenMP
/// constructs. /// constructs.
class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
@ -110,6 +146,7 @@ public:
return OuterRegionInfo->getThreadIDVariable(); return OuterRegionInfo->getThreadIDVariable();
return nullptr; return nullptr;
} }
/// \brief Get the name of the capture helper. /// \brief Get the name of the capture helper.
virtual StringRef getHelperName() const override { virtual StringRef getHelperName() const override {
llvm_unreachable("No helper name for inlined OpenMP construct"); llvm_unreachable("No helper name for inlined OpenMP construct");
@ -126,8 +163,13 @@ private:
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
return CGF.MakeNaturalAlignAddrLValue( return CGF.MakeNaturalAlignAddrLValue(
CGF.GetAddrOfLocalVar(getThreadIDVariable()), CGF.Builder.CreateAlignedLoad(
CGF.getContext().getPointerType(getThreadIDVariable()->getType())); CGF.GetAddrOfLocalVar(getThreadIDVariable()),
CGF.PointerAlignInBytes),
getThreadIDVariable()
->getType()
->castAs<PointerType>()
->getPointeeType());
} }
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
@ -141,8 +183,23 @@ void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
CGCapturedStmtInfo::EmitBody(CGF, S); CGCapturedStmtInfo::EmitBody(CGF, S);
} }
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
CodeGenFunction &CGF) {
return CGF.MakeNaturalAlignAddrLValue(
CGF.GetAddrOfLocalVar(getThreadIDVariable()),
getThreadIDVariable()->getType());
}
void CGOpenMPTaskOutlinedRegionInfo::EmitBody(CodeGenFunction &CGF,
const Stmt *S) {
if (PartIDVar) {
// TODO: emit code for untied tasks.
}
CGCapturedStmtInfo::EmitBody(CGF, S);
}
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
: CGM(CGM), DefaultOpenMPPSource(nullptr) { : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
IdentTy = llvm::StructType::create( IdentTy = llvm::StructType::create(
"ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
@ -157,6 +214,8 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
llvm::Value * llvm::Value *
CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D, CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D,
const VarDecl *ThreadIDVar) { const VarDecl *ThreadIDVar) {
assert(ThreadIDVar->getType()->isPointerType() &&
"thread id variable must be of type kmp_int32 *");
const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
CodeGenFunction CGF(CGM, true); CodeGenFunction CGF(CGM, true);
CGOpenMPOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar); CGOpenMPOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar);
@ -164,6 +223,19 @@ CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D,
return CGF.GenerateCapturedStmtFunction(*CS); return CGF.GenerateCapturedStmtFunction(*CS);
} }
llvm::Value *
CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D,
const VarDecl *ThreadIDVar,
const VarDecl *PartIDVar) {
assert(!ThreadIDVar->getType()->isPointerType() &&
"thread id variable must be of type kmp_int32 for tasks");
auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
CodeGenFunction CGF(CGM, true);
CGOpenMPTaskOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar, PartIDVar);
CGF.CapturedStmtInfo = &CGInfo;
return CGF.GenerateCapturedStmtFunction(*CS);
}
llvm::Value * llvm::Value *
CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
@ -266,12 +338,9 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
} }
if (auto OMPRegionInfo = if (auto OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
if (auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable()) { if (OMPRegionInfo->getThreadIDVariable()) {
// Check if this an outlined function with thread id passed as argument. // Check if this an outlined function with thread id passed as argument.
auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
auto RVal = CGF.EmitLoadOfLValue(LVal, Loc);
LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
ThreadIDVar->getType());
ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
// If value loaded in entry block, cache it and use it everywhere in // If value loaded in entry block, cache it and use it everywhere in
// function. // function.
@ -564,6 +633,30 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
break; break;
} }
case OMPRTL__kmpc_omp_task_alloc: {
// Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
// kmp_routine_entry_t *task_entry);
assert(KmpRoutineEntryPtrTy != nullptr &&
"Type kmp_routine_entry_t must be created.");
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
// Return void * and then cast to particular kmp_task_t type.
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
break;
}
case OMPRTL__kmpc_omp_task: {
// Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
// *new_task);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
break;
}
} }
return RTLFn; return RTLFn;
} }
@ -767,8 +860,7 @@ llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
if (auto OMPRegionInfo = if (auto OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
if (OMPRegionInfo->getThreadIDVariable()) if (OMPRegionInfo->getThreadIDVariable())
return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF), return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
Loc).getScalarVal();
auto ThreadID = getThreadID(CGF, Loc); auto ThreadID = getThreadID(CGF, Loc);
auto Int32Ty = auto Int32Ty =
@ -1044,6 +1136,200 @@ void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
emitUpdateLocation(CGF, Loc)); emitUpdateLocation(CGF, Loc));
} }
namespace {
/// \brief Indexes of fields for type kmp_task_t.
enum KmpTaskTFields {
/// \brief List of shared variables.
KmpTaskTShareds,
/// \brief Task routine.
KmpTaskTRoutine,
/// \brief Partition id for the untied tasks.
KmpTaskTPartId,
/// \brief Function with call of destructors for private variables.
KmpTaskTDestructors,
};
} // namespace
void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
if (!KmpRoutineEntryPtrTy) {
// Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
auto &C = CGM.getContext();
QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
FunctionProtoType::ExtProtoInfo EPI;
KmpRoutineEntryPtrQTy = C.getPointerType(
C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
}
}
static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
QualType FieldTy) {
auto *Field = FieldDecl::Create(
C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
/*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
Field->setAccess(AS_public);
DC->addDecl(Field);
}
static QualType createKmpTaskTRecordDecl(CodeGenModule &CGM,
QualType KmpInt32Ty,
QualType KmpRoutineEntryPointerQTy) {
auto &C = CGM.getContext();
// Build struct kmp_task_t {
// void * shareds;
// kmp_routine_entry_t routine;
// kmp_int32 part_id;
// kmp_routine_entry_t destructors;
// /* private vars */
// };
auto *RD = C.buildImplicitRecord("kmp_task_t");
RD->startDefinition();
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
addFieldToRecordDecl(C, RD, KmpInt32Ty);
addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
// TODO: add private fields.
RD->completeDefinition();
return C.getRecordType(RD);
}
/// \brief Emit a proxy function which accepts kmp_task_t as the second
/// argument.
/// \code
/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
/// TaskFunction(gtid, tt->part_id, tt->shareds);
/// return 0;
/// }
/// \endcode
static llvm::Value *
emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
QualType KmpInt32Ty, QualType KmpTaskTPtrQTy,
QualType SharedsPtrTy, llvm::Value *TaskFunction) {
auto &C = CGM.getContext();
FunctionArgList Args;
ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
/*Id=*/nullptr, KmpTaskTPtrQTy);
Args.push_back(&GtidArg);
Args.push_back(&TaskTypeArg);
FunctionType::ExtInfo Info;
auto &TaskEntryFnInfo =
CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
/*isVariadic=*/false);
auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
auto *TaskEntry =
llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
".omp_task_entry.", &CGM.getModule());
CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
CodeGenFunction CGF(CGM);
CGF.disableDebugInfo();
CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
// TaskFunction(gtid, tt->part_id, tt->shareds);
auto *GtidParam = CGF.EmitLoadOfScalar(
CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
auto TaskTypeArgAddr = CGF.EmitLoadOfScalar(
CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false,
CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc);
auto *PartidPtr = CGF.Builder.CreateStructGEP(TaskTypeArgAddr,
/*Idx=*/KmpTaskTPartId);
auto *PartidParam = CGF.EmitLoadOfScalar(
PartidPtr, /*Volatile=*/false,
C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
auto *SharedsPtr = CGF.Builder.CreateStructGEP(TaskTypeArgAddr,
/*Idx=*/KmpTaskTShareds);
auto *SharedsParam =
CGF.EmitLoadOfScalar(SharedsPtr, /*Volatile=*/false,
CGM.PointerAlignInBytes, C.VoidPtrTy, Loc);
llvm::Value *CallArgs[] = {
GtidParam, PartidParam,
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
SharedsParam, CGF.ConvertTypeForMem(SharedsPtrTy))};
CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
CGF.EmitStoreThroughLValue(
RValue::get(CGF.Builder.getInt32(/*C=*/0)),
CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
CGF.FinishFunction();
return TaskEntry;
}
void CGOpenMPRuntime::emitTaskCall(
CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) {
auto &C = CGM.getContext();
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
// Build type kmp_routine_entry_t (if not built yet).
emitKmpRoutineEntryT(KmpInt32Ty);
// Build particular struct kmp_task_t for the given task.
auto KmpTaskQTy =
createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy);
QualType KmpTaskTPtrQTy = C.getPointerType(KmpTaskQTy);
auto KmpTaskTPtrTy = CGF.ConvertType(KmpTaskQTy)->getPointerTo();
auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy));
QualType SharedsPtrTy = C.getPointerType(SharedsTy);
// Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
// kmp_task_t *tt);
auto *TaskEntry = emitProxyTaskFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy,
SharedsPtrTy, TaskFunction);
// Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
// kmp_routine_entry_t *task_entry);
// Task flags. Format is taken from
// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
// description of kmp_tasking_flags struct.
const unsigned TiedFlag = 0x1;
const unsigned FinalFlag = 0x2;
unsigned Flags = Tied ? TiedFlag : 0;
auto *TaskFlags =
Final.getPointer()
? CGF.Builder.CreateSelect(Final.getPointer(),
CGF.Builder.getInt32(FinalFlag),
CGF.Builder.getInt32(/*C=*/0))
: CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
getThreadID(CGF, Loc), TaskFlags, KmpTaskTySize,
CGM.getSize(SharedsSize),
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
TaskEntry, KmpRoutineEntryPtrTy)};
auto *NewTask = CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
auto *NewTaskNewTaskTTy =
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NewTask, KmpTaskTPtrTy);
// Fill the data in the resulting kmp_task_t record.
// Copy shareds if there are any.
if (!SharedsTy->getAsStructureType()->getDecl()->field_empty())
CGF.EmitAggregateCopy(
CGF.EmitLoadOfScalar(
CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy,
/*Idx=*/KmpTaskTShareds),
/*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy, Loc),
Shareds, SharedsTy);
// TODO: generate function with destructors for privates.
// Provide pointer to function with destructors for privates.
CGF.Builder.CreateAlignedStore(
llvm::ConstantPointerNull::get(
cast<llvm::PointerType>(KmpRoutineEntryPtrTy)),
CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy,
/*Idx=*/KmpTaskTDestructors),
CGM.PointerAlignInBytes);
// NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
// libcall.
// Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
// *new_task);
llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc),
getThreadID(CGF, Loc), NewTask};
// TODO: add check for untied tasks.
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
}
InlinedOpenMPRegionRAII::InlinedOpenMPRegionRAII( InlinedOpenMPRegionRAII::InlinedOpenMPRegionRAII(
CodeGenFunction &CGF, const OMPExecutableDirective &D) CodeGenFunction &CGF, const OMPExecutableDirective &D)
: CGF(CGF) { : CGF(CGF) {

View File

@ -14,6 +14,7 @@
#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
#include "clang/AST/Type.h"
#include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMap.h"
@ -92,6 +93,13 @@ class CGOpenMPRuntime {
OMPRTL__kmpc_single, OMPRTL__kmpc_single,
// Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
OMPRTL__kmpc_end_single, OMPRTL__kmpc_end_single,
// Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
// kmp_routine_entry_t *task_entry);
OMPRTL__kmpc_omp_task_alloc,
// Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
// new_task);
OMPRTL__kmpc_omp_task,
}; };
/// \brief Values for bit flags used in the ident_t to describe the fields. /// \brief Values for bit flags used in the ident_t to describe the fields.
@ -190,6 +198,12 @@ class CGOpenMPRuntime {
/// variables. /// variables.
llvm::StringMap<llvm::AssertingVH<llvm::Constant>, llvm::BumpPtrAllocator> llvm::StringMap<llvm::AssertingVH<llvm::Constant>, llvm::BumpPtrAllocator>
InternalVars; InternalVars;
/// \brief Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);
llvm::Type *KmpRoutineEntryPtrTy;
QualType KmpRoutineEntryPtrQTy;
/// \brief Build type kmp_routine_entry_t (if not built yet).
void emitKmpRoutineEntryT(QualType KmpInt32Ty);
/// \brief Emits object of ident_t type with info for source location. /// \brief Emits object of ident_t type with info for source location.
/// \param Flags Flags for OpenMP location. /// \param Flags Flags for OpenMP location.
@ -257,16 +271,26 @@ public:
explicit CGOpenMPRuntime(CodeGenModule &CGM); explicit CGOpenMPRuntime(CodeGenModule &CGM);
virtual ~CGOpenMPRuntime() {} virtual ~CGOpenMPRuntime() {}
/// \brief Emits outlined function for the specified OpenMP directive \a D /// \brief Emits outlined function for the specified OpenMP directive \a D.
/// (required for parallel and task directives). This outlined function has /// This outlined function has type void(*)(kmp_int32 *ThreadID, kmp_int32
/// type void(*)(kmp_int32 /*ThreadID*/, kmp_int32 /*BoundID*/, struct /// BoundID, struct context_vars*).
/// context_vars*).
/// \param D OpenMP directive. /// \param D OpenMP directive.
/// \param ThreadIDVar Variable for thread id in the current OpenMP region. /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
/// ///
virtual llvm::Value *emitOutlinedFunction(const OMPExecutableDirective &D, virtual llvm::Value *emitOutlinedFunction(const OMPExecutableDirective &D,
const VarDecl *ThreadIDVar); const VarDecl *ThreadIDVar);
/// \brief Emits outlined function for the OpenMP task directive \a D. This
/// outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32
/// PartID, struct context_vars*).
/// \param D OpenMP directive.
/// \param ThreadIDVar Variable for thread id in the current OpenMP region.
/// \param PartID If not nullptr - variable used for part id in tasks.
///
virtual llvm::Value *emitTaskOutlinedFunction(const OMPExecutableDirective &D,
const VarDecl *ThreadIDVar,
const VarDecl *PartIDVar);
/// \brief Cleans up references to the objects in finished function. /// \brief Cleans up references to the objects in finished function.
/// ///
void functionFinished(CodeGenFunction &CGF); void functionFinished(CodeGenFunction &CGF);
@ -274,7 +298,7 @@ public:
/// \brief Emits code for parallel call of the \a OutlinedFn with variables /// \brief Emits code for parallel call of the \a OutlinedFn with variables
/// captured in a record which address is stored in \a CapturedStruct. /// captured in a record which address is stored in \a CapturedStruct.
/// \param OutlinedFn Outlined function to be run in parallel threads. Type of /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
/// this function is void(*)(kmp_int32, kmp_int32, struct context_vars*). /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
/// \param CapturedStruct A pointer to the record with the references to /// \param CapturedStruct A pointer to the record with the references to
/// variables used in \a OutlinedFn function. /// variables used in \a OutlinedFn function.
/// ///
@ -412,6 +436,39 @@ public:
/// \param Vars List of variables to flush. /// \param Vars List of variables to flush.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars, virtual void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
SourceLocation Loc); SourceLocation Loc);
/// \brief Emit task region for the task directive. The task region is
/// emmitted in several steps:
/// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
/// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
/// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
/// function:
/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
/// TaskFunction(gtid, tt->part_id, tt->shareds);
/// return 0;
/// }
/// 2. Copy a list of shared variables to field shareds of the resulting
/// structure kmp_task_t returned by the previous call (if any).
/// 3. Copy a pointer to destructions function to field destructions of the
/// resulting structure kmp_task_t.
/// 4. Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid,
/// kmp_task_t *new_task), where new_task is a resulting structure from
/// previous items.
/// \param Tied true if the task is tied (the task is tied to the thread that
/// can suspend its task region), false - untied (the task is not tied to any
/// thread).
/// \param Final Contains either constant bool value, or llvm::Value * of i1
/// type for final clause. If the value is true, the task forces all of its
/// child tasks to become final and included tasks.
/// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
/// /*part_id*/, captured_struct */*__context*/);
/// \param SharedsTy A type which contains references the shared variables.
/// \param Shareds Context with the list of shared variables from the \a
/// TaskFunction.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
llvm::Value *TaskFunction, QualType SharedsTy,
llvm::Value *Shareds);
}; };
/// \brief RAII for emitting code of CapturedStmt without function outlining. /// \brief RAII for emitting code of CapturedStmt without function outlining.

View File

@ -717,8 +717,35 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet."); llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet.");
} }
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) { void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
llvm_unreachable("CodeGen for 'omp task' is not supported yet."); // Emit outlined function for task construct.
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
auto *I = CS->getCapturedDecl()->param_begin();
// The first function argument for tasks is a thread id, the second one is a
// part id (0 for tied tasks, >=0 for untied task).
auto OutlinedFn =
CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, *std::next(I));
// Check if we should emit tied or untied task.
bool Tied = !S.getSingleClause(OMPC_untied);
// Check if the task is final
llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
if (auto *Clause = S.getSingleClause(OMPC_final)) {
// If the condition constant folds and can be elided, try to avoid emitting
// the condition and the dead arm of the if/else.
auto *Cond = cast<OMPFinalClause>(Clause)->getCondition();
bool CondConstant;
if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
Final.setInt(CondConstant);
else
Final.setPointer(EvaluateExprAsBool(Cond));
} else {
// By default the task is not final.
Final.setInt(/*IntVal=*/false);
}
auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final,
OutlinedFn, SharedsTy, CapturedStruct);
} }
void CodeGenFunction::EmitOMPTaskyieldDirective( void CodeGenFunction::EmitOMPTaskyieldDirective(

View File

@ -308,8 +308,7 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(StackTy::reverse_iterator Iter,
// bound to the current team is shared. // bound to the current team is shared.
if (DVar.DKind == OMPD_task) { if (DVar.DKind == OMPD_task) {
DSAVarData DVarTemp; DSAVarData DVarTemp;
for (StackTy::reverse_iterator I = std::next(Iter), for (StackTy::reverse_iterator I = std::next(Iter), EE = Stack.rend();
EE = std::prev(Stack.rend());
I != EE; ++I) { I != EE; ++I) {
// OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables
// Referenced // Referenced
@ -1122,11 +1121,19 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
break; break;
} }
case OMPD_task: { case OMPD_task: {
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
Sema::CapturedParamNameType Params[] = { Sema::CapturedParamNameType Params[] = {
std::make_pair(".global_tid.", KmpInt32Ty),
std::make_pair(".part_id.", KmpInt32Ty),
std::make_pair(StringRef(), QualType()) // __context with shared vars std::make_pair(StringRef(), QualType()) // __context with shared vars
}; };
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
Params); Params);
// Mark this captured region as inlined, because we don't use outlined
// function directly.
getCurCapturedRegion()->TheCapturedDecl->addAttr(
AlwaysInlineAttr::CreateImplicit(
Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange()));
break; break;
} }
case OMPD_ordered: { case OMPD_ordered: {

View File

@ -0,0 +1,102 @@
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp=libiomp5 -x c++ -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* }
// CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [[STRUCT_S:%.+]]* }
// CHECK-DAG: [[KMP_TASK_T:%.+]] = type { i8*, i32 (i32, i8*)*, i32, i32 (i32, i8*)* }
struct S {
int a;
S() : a(0) {}
S(const S &s) : a(s.a) {}
~S() {}
};
int a;
// CHECK-LABEL : @main
int main() {
// CHECK: [[B:%.+]] = alloca i8
// CHECK: [[S:%.+]] = alloca [[STRUCT_S]]
char b;
S s;
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* @{{.+}})
// CHECK: [[B_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES:%.+]], i32 0, i32 0
// CHECK: store i8* [[B]], i8** [[B_REF]]
// CHECK: [[S_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES]], i32 0, i32 1
// CHECK: store [[STRUCT_S]]* [[S]], [[STRUCT_S]]** [[S_REF]]
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 32, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY1:@.+]] to i32 (i32, i8*)*))
// CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0
// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]]
// CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS]]* [[CAPTURES]] to i8*
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[BITCAST]], i64 16, i32 8, i1 false)
// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
#pragma omp task shared(a, b, s)
{
a = 15;
b = a;
s.a = 10;
}
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
#pragma omp task untied
{
a = 1;
}
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
#pragma omp task final(true)
{
a = 2;
}
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.*}}, i32 0, i32 3
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
const bool flag = false;
#pragma omp task final(flag)
{
a = 3;
}
// CHECK: [[B_VAL:%.+]] = load i8, i8* [[B]]
// CHECK: [[CMP:%.+]] = icmp ne i8 [[B_VAL]], 0
// CHECK: [[FINAL:%.+]] = select i1 [[CMP]], i32 2, i32 0
// CHECK: [[FLAGS:%.+]] = or i32 [[FINAL]], 1
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 [[FLAGS]], i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY5:@.+]] to i32 (i32, i8*)*))
// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
#pragma omp task final(b)
{
a = 4;
}
return a;
}
// CHECK: define internal i32 [[TASK_ENTRY1]](i32, [[KMP_TASK_T]]{{.*}}*)
// CHECK: store i32 15, i32* [[A_PTR:@.+]]
// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PTR]]
// CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8
// CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}}
// CHECK: store i32 10, i32* %{{.+}}
// CHECK: define internal i32 [[TASK_ENTRY2]](i32, [[KMP_TASK_T]]{{.*}}*)
// CHECK: store i32 1, i32* [[A_PTR:@.+]]
// CHECK: define internal i32 [[TASK_ENTRY3]](i32, [[KMP_TASK_T]]{{.*}}*)
// CHECK: store i32 2, i32* [[A_PTR:@.+]]
// CHECK: define internal i32 [[TASK_ENTRY4]](i32, [[KMP_TASK_T]]{{.*}}*)
// CHECK: store i32 3, i32* [[A_PTR:@.+]]
// CHECK: define internal i32 [[TASK_ENTRY5]](i32, [[KMP_TASK_T]]{{.*}}*)
// CHECK: store i32 4, i32* [[A_PTR:@.+]]
#endif