Add an assembly __kmp_invoke_microtask for ppc64[le]

Clang no longer restricts itself to generating microtasks with a small number
of arguments, and so an assembly implementation is required to prevent hitting
the parameter limit present in the C implementation. This adds an
implementation for ppc64[le].

llvm-svn: 270821
This commit is contained in:
Hal Finkel 2016-05-26 04:48:14 +00:00
parent ec6f56eb39
commit 91e19a3de4
2 changed files with 221 additions and 1 deletions

View File

@ -1555,6 +1555,226 @@ KMP_LABEL(kmp_1):
#endif /* KMP_OS_LINUX && KMP_ARCH_AARCH64 */
#if KMP_ARCH_PPC64
//------------------------------------------------------------------------
//
// typedef void (*microtask_t)( int *gtid, int *tid, ... );
//
// int
// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
// int gtid, int tid,
// int argc, void *p_argv[] ) {
// (*pkfn)( & gtid, & tid, argv[0], ... );
// return 1;
// }
//
// parameters:
// r3: pkfn
// r4: gtid
// r5: tid
// r6: argc
// r7: p_argv
// r8: &exit_frame
//
// return: r3 (always 1/TRUE)
//
.text
# if KMP_ARCH_PPC64_LE
.abiversion 2
# endif
.globl __kmp_invoke_microtask
# if KMP_ARCH_PPC64_LE
.p2align 4
# else
.p2align 2
# endif
.type __kmp_invoke_microtask,@function
# if KMP_ARCH_PPC64_LE
__kmp_invoke_microtask:
.Lfunc_begin0:
.Lfunc_gep0:
addis 2, 12, .TOC.-.Lfunc_gep0@ha
addi 2, 2, .TOC.-.Lfunc_gep0@l
.Lfunc_lep0:
.localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0
# else
.section .opd,"aw",@progbits
__kmp_invoke_microtask:
.p2align 3
.quad .Lfunc_begin0
.quad .TOC.@tocbase
.quad 0
.text
.Lfunc_begin0:
# endif
// -- Begin __kmp_invoke_microtask
// mark_begin;
// We need to allocate a stack frame large enough to hold all of the parameters
// on the stack for the microtask plus what this function needs. That's 48
// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the
// parameters to the microtask, plus 8 bytes to store the values of r4 and r5,
// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes
// to save r30 to hold a copy of r8.
.cfi_startproc
mflr 0
std 31, -8(1)
std 0, 16(1)
// This is unusual because normally we'd set r31 equal to r1 after the stack
// frame is established. In this case, however, we need to dynamically compute
// the stack frame size, and so we keep a direct copy of r1 to access our
// register save areas and restore the r1 value before returning.
mr 31, 1
.cfi_def_cfa_register r31
.cfi_offset r31, -8
.cfi_offset lr, 16
// Compute the size necessary for the local stack frame.
# if KMP_ARCH_PPC64_LE
li 12, 72
# else
li 12, 88
# endif
sldi 0, 6, 3
add 12, 0, 12
neg 12, 12
// We need to make sure that the stack frame stays aligned (to 16 bytes, except
// under the BG/Q CNK, where it must be to 32 bytes).
# if KMP_OS_CNK
li 0, -32
# else
li 0, -16
# endif
and 12, 0, 12
// Establish the local stack frame.
stdux 1, 1, 12
# if OMPT_SUPPORT
.cfi_offset r30, -16
std 30, -16(31)
mr 30, 8
# endif
// Store gtid and tid to the stack because they're passed by reference to the microtask.
stw 4, -20(31)
stw 5, -24(31)
mr 12, 6
mr 4, 7
cmpwi 0, 12, 1
blt 0, .Lcall
ld 5, 0(4)
cmpwi 0, 12, 2
blt 0, .Lcall
ld 6, 8(4)
cmpwi 0, 12, 3
blt 0, .Lcall
ld 7, 16(4)
cmpwi 0, 12, 4
blt 0, .Lcall
ld 8, 24(4)
cmpwi 0, 12, 5
blt 0, .Lcall
ld 9, 32(4)
cmpwi 0, 12, 6
blt 0, .Lcall
ld 10, 40(4)
cmpwi 0, 12, 7
blt 0, .Lcall
// There are more than 6 microtask parameters, so we need to store the
// remainder to the stack.
addi 12, 12, -6
mtctr 12
// These are set to 8 bytes before the first desired store address (we're using
// pre-increment loads and stores in the loop below). The parameter save area
// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and
// 32 + 8*8 == 96 bytes above r1 for ELFv2.
addi 4, 4, 40
# if KMP_ARCH_PPC64_LE
addi 12, 1, 88
# else
addi 12, 1, 104
# endif
.Lnext:
ldu 0, 8(4)
stdu 0, 8(12)
bdnz .Lnext
.Lcall:
# if KMP_ARCH_PPC64_LE
std 2, 24(1)
mr 12, 3
#else
std 2, 40(1)
// For ELFv1, we need to load the actual function address from the function descriptor.
ld 12, 0(3)
ld 2, 8(3)
ld 11, 16(3)
#endif
addi 3, 31, -20
addi 4, 31, -24
mtctr 12
bctrl
# if KMP_ARCH_PPC64_LE
ld 2, 24(1)
# else
ld 2, 40(1)
# endif
# if OMPT_SUPPORT
li 3, 0
std 3, 0(30)
# endif
li 3, 1
# if OMPT_SUPPORT
ld 30, -16(31)
# endif
mr 1, 31
ld 0, 16(1)
ld 31, -8(1)
mtlr 0
blr
.long 0
.quad 0
.Lfunc_end0:
.size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0
.cfi_endproc
// -- End __kmp_invoke_microtask
#endif /* KMP_ARCH_PPC64 */
#if KMP_ARCH_ARM
.data
.comm .gomp_critical_user_,32,8

View File

@ -2575,7 +2575,7 @@ __kmp_get_load_balance( int max )
#endif // USE_LOAD_BALANCE
#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX && KMP_ARCH_AARCH64))
#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX && KMP_ARCH_AARCH64) || KMP_ARCH_PPC64)
// we really only need the case with 1 argument, because CLANG always build
// a struct of pointers to shared variables referenced in the outlined function