[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps: Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the function: kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { TaskFunction(gtid, tt->part_id, tt->shareds); return 0; } Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any). Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t. Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items. Differential Revision: http://reviews.llvm.org/D7560 llvm-svn: 231762
This commit is contained in:
parent
58364dc4da
commit
62b63b197d
|
@ -42,7 +42,8 @@ public:
|
|||
virtual const VarDecl *getThreadIDVariable() const = 0;
|
||||
|
||||
/// \brief Get an LValue for the current ThreadID variable.
|
||||
LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
|
||||
/// \return LValue for thread id variable. This LValue always has type int32*.
|
||||
virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
|
||||
|
||||
/// \brief Emit the captured statement body.
|
||||
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
|
||||
|
@ -77,6 +78,41 @@ private:
|
|||
const VarDecl *ThreadIDVar;
|
||||
};
|
||||
|
||||
/// \brief API for captured statement code generation in OpenMP constructs.
|
||||
class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
|
||||
public:
|
||||
CGOpenMPTaskOutlinedRegionInfo(const OMPExecutableDirective &D,
|
||||
const CapturedStmt &CS,
|
||||
const VarDecl *ThreadIDVar,
|
||||
const VarDecl *PartIDVar)
|
||||
: CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar),
|
||||
PartIDVar(PartIDVar) {
|
||||
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
|
||||
}
|
||||
/// \brief Get a variable or parameter for storing global thread id
|
||||
/// inside OpenMP construct.
|
||||
virtual const VarDecl *getThreadIDVariable() const override {
|
||||
return ThreadIDVar;
|
||||
}
|
||||
|
||||
/// \brief Get an LValue for the current ThreadID variable.
|
||||
virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
|
||||
|
||||
/// \brief Emit the captured statement body.
|
||||
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
|
||||
|
||||
/// \brief Get the name of the capture helper.
|
||||
StringRef getHelperName() const override { return ".omp_outlined."; }
|
||||
|
||||
private:
|
||||
/// \brief A variable or parameter storing global thread id for OpenMP
|
||||
/// constructs.
|
||||
const VarDecl *ThreadIDVar;
|
||||
/// \brief A variable or parameter storing part id for OpenMP tasking
|
||||
/// constructs.
|
||||
const VarDecl *PartIDVar;
|
||||
};
|
||||
|
||||
/// \brief API for inlined captured statement code generation in OpenMP
|
||||
/// constructs.
|
||||
class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
|
||||
|
@ -110,6 +146,7 @@ public:
|
|||
return OuterRegionInfo->getThreadIDVariable();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// \brief Get the name of the capture helper.
|
||||
virtual StringRef getHelperName() const override {
|
||||
llvm_unreachable("No helper name for inlined OpenMP construct");
|
||||
|
@ -126,8 +163,13 @@ private:
|
|||
|
||||
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
|
||||
return CGF.MakeNaturalAlignAddrLValue(
|
||||
CGF.GetAddrOfLocalVar(getThreadIDVariable()),
|
||||
CGF.getContext().getPointerType(getThreadIDVariable()->getType()));
|
||||
CGF.Builder.CreateAlignedLoad(
|
||||
CGF.GetAddrOfLocalVar(getThreadIDVariable()),
|
||||
CGF.PointerAlignInBytes),
|
||||
getThreadIDVariable()
|
||||
->getType()
|
||||
->castAs<PointerType>()
|
||||
->getPointeeType());
|
||||
}
|
||||
|
||||
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
|
||||
|
@ -141,8 +183,23 @@ void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
|
|||
CGCapturedStmtInfo::EmitBody(CGF, S);
|
||||
}
|
||||
|
||||
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
|
||||
CodeGenFunction &CGF) {
|
||||
return CGF.MakeNaturalAlignAddrLValue(
|
||||
CGF.GetAddrOfLocalVar(getThreadIDVariable()),
|
||||
getThreadIDVariable()->getType());
|
||||
}
|
||||
|
||||
void CGOpenMPTaskOutlinedRegionInfo::EmitBody(CodeGenFunction &CGF,
|
||||
const Stmt *S) {
|
||||
if (PartIDVar) {
|
||||
// TODO: emit code for untied tasks.
|
||||
}
|
||||
CGCapturedStmtInfo::EmitBody(CGF, S);
|
||||
}
|
||||
|
||||
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
|
||||
: CGM(CGM), DefaultOpenMPPSource(nullptr) {
|
||||
: CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
|
||||
IdentTy = llvm::StructType::create(
|
||||
"ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
|
||||
CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
|
||||
|
@ -157,6 +214,8 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
|
|||
llvm::Value *
|
||||
CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D,
|
||||
const VarDecl *ThreadIDVar) {
|
||||
assert(ThreadIDVar->getType()->isPointerType() &&
|
||||
"thread id variable must be of type kmp_int32 *");
|
||||
const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
|
||||
CodeGenFunction CGF(CGM, true);
|
||||
CGOpenMPOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar);
|
||||
|
@ -164,6 +223,19 @@ CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D,
|
|||
return CGF.GenerateCapturedStmtFunction(*CS);
|
||||
}
|
||||
|
||||
llvm::Value *
|
||||
CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D,
|
||||
const VarDecl *ThreadIDVar,
|
||||
const VarDecl *PartIDVar) {
|
||||
assert(!ThreadIDVar->getType()->isPointerType() &&
|
||||
"thread id variable must be of type kmp_int32 for tasks");
|
||||
auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
|
||||
CodeGenFunction CGF(CGM, true);
|
||||
CGOpenMPTaskOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar, PartIDVar);
|
||||
CGF.CapturedStmtInfo = &CGInfo;
|
||||
return CGF.GenerateCapturedStmtFunction(*CS);
|
||||
}
|
||||
|
||||
llvm::Value *
|
||||
CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
|
||||
llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
|
||||
|
@ -266,12 +338,9 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
|
|||
}
|
||||
if (auto OMPRegionInfo =
|
||||
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
|
||||
if (auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable()) {
|
||||
if (OMPRegionInfo->getThreadIDVariable()) {
|
||||
// Check if this an outlined function with thread id passed as argument.
|
||||
auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
|
||||
auto RVal = CGF.EmitLoadOfLValue(LVal, Loc);
|
||||
LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
|
||||
ThreadIDVar->getType());
|
||||
ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
|
||||
// If value loaded in entry block, cache it and use it everywhere in
|
||||
// function.
|
||||
|
@ -564,6 +633,30 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
|
|||
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
|
||||
break;
|
||||
}
|
||||
case OMPRTL__kmpc_omp_task_alloc: {
|
||||
// Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
|
||||
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
|
||||
// kmp_routine_entry_t *task_entry);
|
||||
assert(KmpRoutineEntryPtrTy != nullptr &&
|
||||
"Type kmp_routine_entry_t must be created.");
|
||||
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
|
||||
CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
|
||||
// Return void * and then cast to particular kmp_task_t type.
|
||||
llvm::FunctionType *FnTy =
|
||||
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
|
||||
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
|
||||
break;
|
||||
}
|
||||
case OMPRTL__kmpc_omp_task: {
|
||||
// Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
|
||||
// *new_task);
|
||||
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
||||
CGM.VoidPtrTy};
|
||||
llvm::FunctionType *FnTy =
|
||||
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
|
||||
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
|
||||
break;
|
||||
}
|
||||
}
|
||||
return RTLFn;
|
||||
}
|
||||
|
@ -767,8 +860,7 @@ llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
|
|||
if (auto OMPRegionInfo =
|
||||
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
|
||||
if (OMPRegionInfo->getThreadIDVariable())
|
||||
return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF),
|
||||
Loc).getScalarVal();
|
||||
return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
|
||||
|
||||
auto ThreadID = getThreadID(CGF, Loc);
|
||||
auto Int32Ty =
|
||||
|
@ -1044,6 +1136,200 @@ void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
|
|||
emitUpdateLocation(CGF, Loc));
|
||||
}
|
||||
|
||||
namespace {
|
||||
/// \brief Indexes of fields for type kmp_task_t.
|
||||
enum KmpTaskTFields {
|
||||
/// \brief List of shared variables.
|
||||
KmpTaskTShareds,
|
||||
/// \brief Task routine.
|
||||
KmpTaskTRoutine,
|
||||
/// \brief Partition id for the untied tasks.
|
||||
KmpTaskTPartId,
|
||||
/// \brief Function with call of destructors for private variables.
|
||||
KmpTaskTDestructors,
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
|
||||
if (!KmpRoutineEntryPtrTy) {
|
||||
// Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
|
||||
auto &C = CGM.getContext();
|
||||
QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
|
||||
FunctionProtoType::ExtProtoInfo EPI;
|
||||
KmpRoutineEntryPtrQTy = C.getPointerType(
|
||||
C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
|
||||
KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
|
||||
}
|
||||
}
|
||||
|
||||
static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
|
||||
QualType FieldTy) {
|
||||
auto *Field = FieldDecl::Create(
|
||||
C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
|
||||
C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
|
||||
/*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
|
||||
Field->setAccess(AS_public);
|
||||
DC->addDecl(Field);
|
||||
}
|
||||
|
||||
static QualType createKmpTaskTRecordDecl(CodeGenModule &CGM,
|
||||
QualType KmpInt32Ty,
|
||||
QualType KmpRoutineEntryPointerQTy) {
|
||||
auto &C = CGM.getContext();
|
||||
// Build struct kmp_task_t {
|
||||
// void * shareds;
|
||||
// kmp_routine_entry_t routine;
|
||||
// kmp_int32 part_id;
|
||||
// kmp_routine_entry_t destructors;
|
||||
// /* private vars */
|
||||
// };
|
||||
auto *RD = C.buildImplicitRecord("kmp_task_t");
|
||||
RD->startDefinition();
|
||||
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
|
||||
addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
|
||||
addFieldToRecordDecl(C, RD, KmpInt32Ty);
|
||||
addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
|
||||
// TODO: add private fields.
|
||||
RD->completeDefinition();
|
||||
return C.getRecordType(RD);
|
||||
}
|
||||
|
||||
/// \brief Emit a proxy function which accepts kmp_task_t as the second
|
||||
/// argument.
|
||||
/// \code
|
||||
/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
|
||||
/// TaskFunction(gtid, tt->part_id, tt->shareds);
|
||||
/// return 0;
|
||||
/// }
|
||||
/// \endcode
|
||||
static llvm::Value *
|
||||
emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
|
||||
QualType KmpInt32Ty, QualType KmpTaskTPtrQTy,
|
||||
QualType SharedsPtrTy, llvm::Value *TaskFunction) {
|
||||
auto &C = CGM.getContext();
|
||||
FunctionArgList Args;
|
||||
ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
|
||||
ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
|
||||
/*Id=*/nullptr, KmpTaskTPtrQTy);
|
||||
Args.push_back(&GtidArg);
|
||||
Args.push_back(&TaskTypeArg);
|
||||
FunctionType::ExtInfo Info;
|
||||
auto &TaskEntryFnInfo =
|
||||
CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
|
||||
/*isVariadic=*/false);
|
||||
auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
|
||||
auto *TaskEntry =
|
||||
llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
|
||||
".omp_task_entry.", &CGM.getModule());
|
||||
CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
|
||||
CodeGenFunction CGF(CGM);
|
||||
CGF.disableDebugInfo();
|
||||
CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
|
||||
|
||||
// TaskFunction(gtid, tt->part_id, tt->shareds);
|
||||
auto *GtidParam = CGF.EmitLoadOfScalar(
|
||||
CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
|
||||
C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
|
||||
auto TaskTypeArgAddr = CGF.EmitLoadOfScalar(
|
||||
CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false,
|
||||
CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc);
|
||||
auto *PartidPtr = CGF.Builder.CreateStructGEP(TaskTypeArgAddr,
|
||||
/*Idx=*/KmpTaskTPartId);
|
||||
auto *PartidParam = CGF.EmitLoadOfScalar(
|
||||
PartidPtr, /*Volatile=*/false,
|
||||
C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
|
||||
auto *SharedsPtr = CGF.Builder.CreateStructGEP(TaskTypeArgAddr,
|
||||
/*Idx=*/KmpTaskTShareds);
|
||||
auto *SharedsParam =
|
||||
CGF.EmitLoadOfScalar(SharedsPtr, /*Volatile=*/false,
|
||||
CGM.PointerAlignInBytes, C.VoidPtrTy, Loc);
|
||||
llvm::Value *CallArgs[] = {
|
||||
GtidParam, PartidParam,
|
||||
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
||||
SharedsParam, CGF.ConvertTypeForMem(SharedsPtrTy))};
|
||||
CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
|
||||
CGF.EmitStoreThroughLValue(
|
||||
RValue::get(CGF.Builder.getInt32(/*C=*/0)),
|
||||
CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
|
||||
CGF.FinishFunction();
|
||||
return TaskEntry;
|
||||
}
|
||||
|
||||
void CGOpenMPRuntime::emitTaskCall(
|
||||
CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
|
||||
llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
|
||||
llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) {
|
||||
auto &C = CGM.getContext();
|
||||
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
|
||||
// Build type kmp_routine_entry_t (if not built yet).
|
||||
emitKmpRoutineEntryT(KmpInt32Ty);
|
||||
// Build particular struct kmp_task_t for the given task.
|
||||
auto KmpTaskQTy =
|
||||
createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy);
|
||||
QualType KmpTaskTPtrQTy = C.getPointerType(KmpTaskQTy);
|
||||
auto KmpTaskTPtrTy = CGF.ConvertType(KmpTaskQTy)->getPointerTo();
|
||||
auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy));
|
||||
QualType SharedsPtrTy = C.getPointerType(SharedsTy);
|
||||
|
||||
// Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
|
||||
// kmp_task_t *tt);
|
||||
auto *TaskEntry = emitProxyTaskFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy,
|
||||
SharedsPtrTy, TaskFunction);
|
||||
|
||||
// Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
|
||||
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
|
||||
// kmp_routine_entry_t *task_entry);
|
||||
// Task flags. Format is taken from
|
||||
// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
|
||||
// description of kmp_tasking_flags struct.
|
||||
const unsigned TiedFlag = 0x1;
|
||||
const unsigned FinalFlag = 0x2;
|
||||
unsigned Flags = Tied ? TiedFlag : 0;
|
||||
auto *TaskFlags =
|
||||
Final.getPointer()
|
||||
? CGF.Builder.CreateSelect(Final.getPointer(),
|
||||
CGF.Builder.getInt32(FinalFlag),
|
||||
CGF.Builder.getInt32(/*C=*/0))
|
||||
: CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
|
||||
TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
|
||||
auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
|
||||
llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
|
||||
getThreadID(CGF, Loc), TaskFlags, KmpTaskTySize,
|
||||
CGM.getSize(SharedsSize),
|
||||
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
||||
TaskEntry, KmpRoutineEntryPtrTy)};
|
||||
auto *NewTask = CGF.EmitRuntimeCall(
|
||||
createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
|
||||
auto *NewTaskNewTaskTTy =
|
||||
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NewTask, KmpTaskTPtrTy);
|
||||
// Fill the data in the resulting kmp_task_t record.
|
||||
// Copy shareds if there are any.
|
||||
if (!SharedsTy->getAsStructureType()->getDecl()->field_empty())
|
||||
CGF.EmitAggregateCopy(
|
||||
CGF.EmitLoadOfScalar(
|
||||
CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy,
|
||||
/*Idx=*/KmpTaskTShareds),
|
||||
/*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy, Loc),
|
||||
Shareds, SharedsTy);
|
||||
// TODO: generate function with destructors for privates.
|
||||
// Provide pointer to function with destructors for privates.
|
||||
CGF.Builder.CreateAlignedStore(
|
||||
llvm::ConstantPointerNull::get(
|
||||
cast<llvm::PointerType>(KmpRoutineEntryPtrTy)),
|
||||
CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy,
|
||||
/*Idx=*/KmpTaskTDestructors),
|
||||
CGM.PointerAlignInBytes);
|
||||
|
||||
// NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
|
||||
// libcall.
|
||||
// Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
|
||||
// *new_task);
|
||||
llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc),
|
||||
getThreadID(CGF, Loc), NewTask};
|
||||
// TODO: add check for untied tasks.
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
|
||||
}
|
||||
|
||||
InlinedOpenMPRegionRAII::InlinedOpenMPRegionRAII(
|
||||
CodeGenFunction &CGF, const OMPExecutableDirective &D)
|
||||
: CGF(CGF) {
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
|
||||
#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
|
||||
|
||||
#include "clang/AST/Type.h"
|
||||
#include "clang/Basic/OpenMPKinds.h"
|
||||
#include "clang/Basic/SourceLocation.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
|
@ -92,6 +93,13 @@ class CGOpenMPRuntime {
|
|||
OMPRTL__kmpc_single,
|
||||
// Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
|
||||
OMPRTL__kmpc_end_single,
|
||||
// Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
|
||||
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
|
||||
// kmp_routine_entry_t *task_entry);
|
||||
OMPRTL__kmpc_omp_task_alloc,
|
||||
// Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
|
||||
// new_task);
|
||||
OMPRTL__kmpc_omp_task,
|
||||
};
|
||||
|
||||
/// \brief Values for bit flags used in the ident_t to describe the fields.
|
||||
|
@ -190,6 +198,12 @@ class CGOpenMPRuntime {
|
|||
/// variables.
|
||||
llvm::StringMap<llvm::AssertingVH<llvm::Constant>, llvm::BumpPtrAllocator>
|
||||
InternalVars;
|
||||
/// \brief Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);
|
||||
llvm::Type *KmpRoutineEntryPtrTy;
|
||||
QualType KmpRoutineEntryPtrQTy;
|
||||
|
||||
/// \brief Build type kmp_routine_entry_t (if not built yet).
|
||||
void emitKmpRoutineEntryT(QualType KmpInt32Ty);
|
||||
|
||||
/// \brief Emits object of ident_t type with info for source location.
|
||||
/// \param Flags Flags for OpenMP location.
|
||||
|
@ -257,16 +271,26 @@ public:
|
|||
explicit CGOpenMPRuntime(CodeGenModule &CGM);
|
||||
virtual ~CGOpenMPRuntime() {}
|
||||
|
||||
/// \brief Emits outlined function for the specified OpenMP directive \a D
|
||||
/// (required for parallel and task directives). This outlined function has
|
||||
/// type void(*)(kmp_int32 /*ThreadID*/, kmp_int32 /*BoundID*/, struct
|
||||
/// context_vars*).
|
||||
/// \brief Emits outlined function for the specified OpenMP directive \a D.
|
||||
/// This outlined function has type void(*)(kmp_int32 *ThreadID, kmp_int32
|
||||
/// BoundID, struct context_vars*).
|
||||
/// \param D OpenMP directive.
|
||||
/// \param ThreadIDVar Variable for thread id in the current OpenMP region.
|
||||
///
|
||||
virtual llvm::Value *emitOutlinedFunction(const OMPExecutableDirective &D,
|
||||
const VarDecl *ThreadIDVar);
|
||||
|
||||
/// \brief Emits outlined function for the OpenMP task directive \a D. This
|
||||
/// outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32
|
||||
/// PartID, struct context_vars*).
|
||||
/// \param D OpenMP directive.
|
||||
/// \param ThreadIDVar Variable for thread id in the current OpenMP region.
|
||||
/// \param PartID If not nullptr - variable used for part id in tasks.
|
||||
///
|
||||
virtual llvm::Value *emitTaskOutlinedFunction(const OMPExecutableDirective &D,
|
||||
const VarDecl *ThreadIDVar,
|
||||
const VarDecl *PartIDVar);
|
||||
|
||||
/// \brief Cleans up references to the objects in finished function.
|
||||
///
|
||||
void functionFinished(CodeGenFunction &CGF);
|
||||
|
@ -274,7 +298,7 @@ public:
|
|||
/// \brief Emits code for parallel call of the \a OutlinedFn with variables
|
||||
/// captured in a record which address is stored in \a CapturedStruct.
|
||||
/// \param OutlinedFn Outlined function to be run in parallel threads. Type of
|
||||
/// this function is void(*)(kmp_int32, kmp_int32, struct context_vars*).
|
||||
/// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
|
||||
/// \param CapturedStruct A pointer to the record with the references to
|
||||
/// variables used in \a OutlinedFn function.
|
||||
///
|
||||
|
@ -412,6 +436,39 @@ public:
|
|||
/// \param Vars List of variables to flush.
|
||||
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
|
||||
SourceLocation Loc);
|
||||
|
||||
/// \brief Emit task region for the task directive. The task region is
|
||||
/// emmitted in several steps:
|
||||
/// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
|
||||
/// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
|
||||
/// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
|
||||
/// function:
|
||||
/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
|
||||
/// TaskFunction(gtid, tt->part_id, tt->shareds);
|
||||
/// return 0;
|
||||
/// }
|
||||
/// 2. Copy a list of shared variables to field shareds of the resulting
|
||||
/// structure kmp_task_t returned by the previous call (if any).
|
||||
/// 3. Copy a pointer to destructions function to field destructions of the
|
||||
/// resulting structure kmp_task_t.
|
||||
/// 4. Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid,
|
||||
/// kmp_task_t *new_task), where new_task is a resulting structure from
|
||||
/// previous items.
|
||||
/// \param Tied true if the task is tied (the task is tied to the thread that
|
||||
/// can suspend its task region), false - untied (the task is not tied to any
|
||||
/// thread).
|
||||
/// \param Final Contains either constant bool value, or llvm::Value * of i1
|
||||
/// type for final clause. If the value is true, the task forces all of its
|
||||
/// child tasks to become final and included tasks.
|
||||
/// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
|
||||
/// /*part_id*/, captured_struct */*__context*/);
|
||||
/// \param SharedsTy A type which contains references the shared variables.
|
||||
/// \param Shareds Context with the list of shared variables from the \a
|
||||
/// TaskFunction.
|
||||
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
|
||||
llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
|
||||
llvm::Value *TaskFunction, QualType SharedsTy,
|
||||
llvm::Value *Shareds);
|
||||
};
|
||||
|
||||
/// \brief RAII for emitting code of CapturedStmt without function outlining.
|
||||
|
|
|
@ -717,8 +717,35 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
|
|||
llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet.");
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) {
|
||||
llvm_unreachable("CodeGen for 'omp task' is not supported yet.");
|
||||
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
|
||||
// Emit outlined function for task construct.
|
||||
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
||||
auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
|
||||
auto *I = CS->getCapturedDecl()->param_begin();
|
||||
// The first function argument for tasks is a thread id, the second one is a
|
||||
// part id (0 for tied tasks, >=0 for untied task).
|
||||
auto OutlinedFn =
|
||||
CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, *std::next(I));
|
||||
// Check if we should emit tied or untied task.
|
||||
bool Tied = !S.getSingleClause(OMPC_untied);
|
||||
// Check if the task is final
|
||||
llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
|
||||
if (auto *Clause = S.getSingleClause(OMPC_final)) {
|
||||
// If the condition constant folds and can be elided, try to avoid emitting
|
||||
// the condition and the dead arm of the if/else.
|
||||
auto *Cond = cast<OMPFinalClause>(Clause)->getCondition();
|
||||
bool CondConstant;
|
||||
if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
|
||||
Final.setInt(CondConstant);
|
||||
else
|
||||
Final.setPointer(EvaluateExprAsBool(Cond));
|
||||
} else {
|
||||
// By default the task is not final.
|
||||
Final.setInt(/*IntVal=*/false);
|
||||
}
|
||||
auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
|
||||
CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final,
|
||||
OutlinedFn, SharedsTy, CapturedStruct);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTaskyieldDirective(
|
||||
|
|
|
@ -308,8 +308,7 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(StackTy::reverse_iterator Iter,
|
|||
// bound to the current team is shared.
|
||||
if (DVar.DKind == OMPD_task) {
|
||||
DSAVarData DVarTemp;
|
||||
for (StackTy::reverse_iterator I = std::next(Iter),
|
||||
EE = std::prev(Stack.rend());
|
||||
for (StackTy::reverse_iterator I = std::next(Iter), EE = Stack.rend();
|
||||
I != EE; ++I) {
|
||||
// OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables
|
||||
// Referenced
|
||||
|
@ -1122,11 +1121,19 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
|||
break;
|
||||
}
|
||||
case OMPD_task: {
|
||||
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
|
||||
Sema::CapturedParamNameType Params[] = {
|
||||
std::make_pair(".global_tid.", KmpInt32Ty),
|
||||
std::make_pair(".part_id.", KmpInt32Ty),
|
||||
std::make_pair(StringRef(), QualType()) // __context with shared vars
|
||||
};
|
||||
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
|
||||
Params);
|
||||
// Mark this captured region as inlined, because we don't use outlined
|
||||
// function directly.
|
||||
getCurCapturedRegion()->TheCapturedDecl->addAttr(
|
||||
AlwaysInlineAttr::CreateImplicit(
|
||||
Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange()));
|
||||
break;
|
||||
}
|
||||
case OMPD_ordered: {
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp=libiomp5 -x c++ -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* }
|
||||
// CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [[STRUCT_S:%.+]]* }
|
||||
// CHECK-DAG: [[KMP_TASK_T:%.+]] = type { i8*, i32 (i32, i8*)*, i32, i32 (i32, i8*)* }
|
||||
struct S {
|
||||
int a;
|
||||
S() : a(0) {}
|
||||
S(const S &s) : a(s.a) {}
|
||||
~S() {}
|
||||
};
|
||||
int a;
|
||||
// CHECK-LABEL : @main
|
||||
int main() {
|
||||
// CHECK: [[B:%.+]] = alloca i8
|
||||
// CHECK: [[S:%.+]] = alloca [[STRUCT_S]]
|
||||
char b;
|
||||
S s;
|
||||
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* @{{.+}})
|
||||
// CHECK: [[B_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES:%.+]], i32 0, i32 0
|
||||
// CHECK: store i8* [[B]], i8** [[B_REF]]
|
||||
// CHECK: [[S_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES]], i32 0, i32 1
|
||||
// CHECK: store [[STRUCT_S]]* [[S]], [[STRUCT_S]]** [[S_REF]]
|
||||
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 32, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY1:@.+]] to i32 (i32, i8*)*))
|
||||
// CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0
|
||||
// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]]
|
||||
// CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS]]* [[CAPTURES]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[BITCAST]], i64 16, i32 8, i1 false)
|
||||
// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3
|
||||
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
|
||||
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
|
||||
#pragma omp task shared(a, b, s)
|
||||
{
|
||||
a = 15;
|
||||
b = a;
|
||||
s.a = 10;
|
||||
}
|
||||
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
|
||||
// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
|
||||
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
|
||||
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
|
||||
#pragma omp task untied
|
||||
{
|
||||
a = 1;
|
||||
}
|
||||
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
|
||||
// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
|
||||
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
|
||||
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
|
||||
#pragma omp task final(true)
|
||||
{
|
||||
a = 2;
|
||||
}
|
||||
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
|
||||
// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.*}}, i32 0, i32 3
|
||||
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
|
||||
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
|
||||
const bool flag = false;
|
||||
#pragma omp task final(flag)
|
||||
{
|
||||
a = 3;
|
||||
}
|
||||
// CHECK: [[B_VAL:%.+]] = load i8, i8* [[B]]
|
||||
// CHECK: [[CMP:%.+]] = icmp ne i8 [[B_VAL]], 0
|
||||
// CHECK: [[FINAL:%.+]] = select i1 [[CMP]], i32 2, i32 0
|
||||
// CHECK: [[FLAGS:%.+]] = or i32 [[FINAL]], 1
|
||||
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 [[FLAGS]], i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY5:@.+]] to i32 (i32, i8*)*))
|
||||
// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
|
||||
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
|
||||
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
|
||||
#pragma omp task final(b)
|
||||
{
|
||||
a = 4;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
// CHECK: define internal i32 [[TASK_ENTRY1]](i32, [[KMP_TASK_T]]{{.*}}*)
|
||||
// CHECK: store i32 15, i32* [[A_PTR:@.+]]
|
||||
// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PTR]]
|
||||
// CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8
|
||||
// CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}}
|
||||
// CHECK: store i32 10, i32* %{{.+}}
|
||||
|
||||
// CHECK: define internal i32 [[TASK_ENTRY2]](i32, [[KMP_TASK_T]]{{.*}}*)
|
||||
// CHECK: store i32 1, i32* [[A_PTR:@.+]]
|
||||
|
||||
// CHECK: define internal i32 [[TASK_ENTRY3]](i32, [[KMP_TASK_T]]{{.*}}*)
|
||||
// CHECK: store i32 2, i32* [[A_PTR:@.+]]
|
||||
|
||||
// CHECK: define internal i32 [[TASK_ENTRY4]](i32, [[KMP_TASK_T]]{{.*}}*)
|
||||
// CHECK: store i32 3, i32* [[A_PTR:@.+]]
|
||||
|
||||
// CHECK: define internal i32 [[TASK_ENTRY5]](i32, [[KMP_TASK_T]]{{.*}}*)
|
||||
// CHECK: store i32 4, i32* [[A_PTR:@.+]]
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue