[Coroutines] Offering llvm.coro.align intrinsic

It is a known problem that we can't align the switch-based coroutine
frame if the alignment exceeds std::max_align_t (which is 16 usually).

We could solve the problem on the middle-end by dynamically transforming
or in the frontend by emitting aligned allocation function.

If we need to solve it in the frontend, the middle end need to offer an
intrinsic to tell the alignment at least. This patch tries to offer such
an intrinsic called llvm.coro.align.

Reviewed By: https://reviews.llvm.org/D117542

Differential revision: https://reviews.llvm.org/D117542
This commit is contained in:
Chuanqi Xu 2022-01-18 15:19:50 +08:00
parent 76b74236c7
commit c8ecf12bc3
12 changed files with 315 additions and 2 deletions

View File

@ -948,6 +948,32 @@ Semantics:
The `coro.size` intrinsic is lowered to a constant representing the size of
the coroutine frame.
.. _coro.align:
'llvm.coro.align' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
::
declare i32 @llvm.coro.align.i32()
declare i64 @llvm.coro.align.i64()
Overview:
"""""""""
The '``llvm.coro.align``' intrinsic returns the alignment of a `coroutine frame`_.
This is only supported for switched-resume coroutines.
Arguments:
""""""""""
None
Semantics:
""""""""""
The `coro.align` intrinsic is lowered to a constant representing the alignment of
the coroutine frame.
.. _coro.begin:
'llvm.coro.begin' Intrinsic

View File

@ -633,6 +633,7 @@ public:
case Intrinsic::coro_end:
case Intrinsic::coro_frame:
case Intrinsic::coro_size:
case Intrinsic::coro_align:
case Intrinsic::coro_suspend:
case Intrinsic::coro_subfn_addr:
// These intrinsics don't actually represent code after lowering.

View File

@ -1272,6 +1272,7 @@ def int_coro_end_async
def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
def int_coro_align : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>;
def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>;

View File

@ -599,6 +599,18 @@ public:
}
};
/// This represents the llvm.coro.align instruction.
class LLVM_LIBRARY_VISIBILITY CoroAlignInst : public IntrinsicInst {
public:
// Methods to support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_align;
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst {
enum { FrameArg, UnwindArg };

View File

@ -104,6 +104,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
CoroBeginInst *CoroBegin;
SmallVector<AnyCoroEndInst *, 4> CoroEnds;
SmallVector<CoroSizeInst *, 2> CoroSizes;
SmallVector<CoroAlignInst *, 2> CoroAligns;
SmallVector<AnyCoroSuspendInst *, 4> CoroSuspends;
SmallVector<CallInst*, 2> SwiftErrorOps;

View File

@ -1083,10 +1083,16 @@ static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
}
static void replaceFrameSize(coro::Shape &Shape) {
static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
if (Shape.ABI == coro::ABI::Async)
updateAsyncFuncPointerContextSize(Shape);
for (CoroAlignInst *CA : Shape.CoroAligns) {
CA->replaceAllUsesWith(
ConstantInt::get(CA->getType(), Shape.FrameAlign.value()));
CA->eraseFromParent();
}
if (Shape.CoroSizes.empty())
return;
@ -1884,7 +1890,7 @@ static coro::Shape splitCoroutine(Function &F,
simplifySuspendPoints(Shape);
buildCoroutineFrame(F, Shape);
replaceFrameSize(Shape);
replaceFrameSizeAndAlignment(Shape);
// If there are no suspend points, no split required, just remove
// the allocation and deallocation blocks, they are not needed.

View File

@ -123,6 +123,7 @@ Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index,
static bool isCoroutineIntrinsicName(StringRef Name) {
// NOTE: Must be sorted!
static const char *const CoroIntrinsics[] = {
"llvm.coro.align",
"llvm.coro.alloc",
"llvm.coro.async.context.alloc",
"llvm.coro.async.context.dealloc",
@ -268,6 +269,9 @@ void coro::Shape::buildFrom(Function &F) {
case Intrinsic::coro_size:
CoroSizes.push_back(cast<CoroSizeInst>(II));
break;
case Intrinsic::coro_align:
CoroAligns.push_back(cast<CoroAlignInst>(II));
break;
case Intrinsic::coro_frame:
CoroFrames.push_back(cast<CoroFrameInst>(II));
break;

View File

@ -0,0 +1,54 @@
; Tests that the coro.align intrinsic could be lowered to correct alignment
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
define i8* @f() "coroutine.presplit"="1" {
entry:
%x = alloca i64
%y = alloca i64
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%align = call i32 @llvm.coro.align.i32()
%alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %sp1, label %suspend [i8 0, label %resume
i8 1, label %cleanup]
resume:
%x.alias = bitcast i64* %x to i32*
call void @capture_call(i32* %x.alias)
%y.alias = bitcast i64* %y to i32*
call void @nocapture_call(i32* %y.alias)
br label %cleanup
cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
call i1 @llvm.coro.end(i8* %hdl, i1 0)
ret i8* %hdl
}
; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i1 }
; CHECK-LABEL: define i8* @f()
; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 8, i32 32)
; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i32 @llvm.coro.align.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare i1 @llvm.coro.end(i8*, i1)
declare void @capture_call(i32*)
declare void @nocapture_call(i32* nocapture)
declare noalias i8* @aligned_alloc(i32, i32)
declare void @free(i8*)

View File

@ -0,0 +1,46 @@
; Tests that the coro.align intrinsic could be lowered to correct alignment
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
define i8* @f() "coroutine.presplit"="1" {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%align = call i32 @llvm.coro.align.i32()
%alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %sp1, label %suspend [i8 0, label %resume
i8 1, label %cleanup]
resume:
br label %cleanup
cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
call i1 @llvm.coro.end(i8* %hdl, i1 0)
ret i8* %hdl
}
; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1 }
; CHECK-LABEL: define i8* @f()
; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 8, i32 24)
; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i32 @llvm.coro.align.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare i1 @llvm.coro.end(i8*, i1)
declare noalias i8* @aligned_alloc(i32, i32)
declare void @free(i8*)

View File

@ -0,0 +1,54 @@
; Tests that the coro.align intrinsic could be lowered to correct alignment
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
define i8* @f() "coroutine.presplit"="1" {
entry:
%x = alloca i64, align 16
%y = alloca i64
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%align = call i32 @llvm.coro.align.i32()
%alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %sp1, label %suspend [i8 0, label %resume
i8 1, label %cleanup]
resume:
%x.alias = bitcast i64* %x to i32*
call void @capture_call(i32* %x.alias)
%y.alias = bitcast i64* %y to i32*
call void @capture_call(i32* %y.alias)
br label %cleanup
cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
call i1 @llvm.coro.end(i8* %hdl, i1 0)
ret i8* %hdl
}
; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i1 }
; CHECK-LABEL: define i8* @f()
; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 16, i32 40)
; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i32 @llvm.coro.align.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare i1 @llvm.coro.end(i8*, i1)
declare void @capture_call(i32*)
declare void @nocapture_call(i32* nocapture)
declare noalias i8* @aligned_alloc(i32, i32)
declare void @free(i8*)

View File

@ -0,0 +1,54 @@
; Tests that the coro.align intrinsic could be lowered to correct alignment
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
define i8* @f() "coroutine.presplit"="1" {
entry:
%x = alloca i1, align 64
%y = alloca i64
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%align = call i32 @llvm.coro.align.i32()
%alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %sp1, label %suspend [i8 0, label %resume
i8 1, label %cleanup]
resume:
%x.alias = bitcast i1* %x to i32*
call void @capture_call(i32* %x.alias)
%y.alias = bitcast i64* %y to i32*
call void @capture_call(i32* %y.alias)
br label %cleanup
cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
call i1 @llvm.coro.end(i8* %hdl, i1 0)
ret i8* %hdl
}
; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i1, [39 x i8], i1 }
; CHECK-LABEL: define i8* @f()
; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 64, i32 72)
; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i32 @llvm.coro.align.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare i1 @llvm.coro.end(i8*, i1)
declare void @capture_call(i32*)
declare void @nocapture_call(i32* nocapture)
declare noalias i8* @aligned_alloc(i32, i32)
declare void @free(i8*)

View File

@ -0,0 +1,54 @@
; Tests that the coro.align intrinsic could be lowered to correct alignment
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
define i8* @f() "coroutine.presplit"="1" {
entry:
%x = alloca i1, align 64
%y = alloca i64, align 32
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%align = call i32 @llvm.coro.align.i32()
%alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %sp1, label %suspend [i8 0, label %resume
i8 1, label %cleanup]
resume:
%x.alias = bitcast i1* %x to i32*
call void @capture_call(i32* %x.alias)
%y.alias = bitcast i64* %y to i32*
call void @capture_call(i32* %y.alias)
br label %cleanup
cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
call i1 @llvm.coro.end(i8* %hdl, i1 0)
ret i8* %hdl
}
; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, [15 x i8], i64, [24 x i8], i1 }
; CHECK-LABEL: define i8* @f()
; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 64, i32 72)
; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i32 @llvm.coro.align.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare i1 @llvm.coro.end(i8*, i1)
declare void @capture_call(i32*)
declare void @nocapture_call(i32* nocapture)
declare noalias i8* @aligned_alloc(i32, i32)
declare void @free(i8*)