Add an assembly __kmp_invoke_microtask for ppc64[le]
Clang no longer restricts itself to generating microtasks with a small number of arguments, and so an assembly implementation is required to prevent hitting the parameter limit present in the C implementation. This adds an implementation for ppc64[le]. llvm-svn: 270821
This commit is contained in:
parent
ec6f56eb39
commit
91e19a3de4
|
@ -1555,6 +1555,226 @@ KMP_LABEL(kmp_1):
|
|||
|
||||
#endif /* KMP_OS_LINUX && KMP_ARCH_AARCH64 */
|
||||
|
||||
#if KMP_ARCH_PPC64
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// typedef void (*microtask_t)( int *gtid, int *tid, ... );
|
||||
//
|
||||
// int
|
||||
// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
|
||||
// int gtid, int tid,
|
||||
// int argc, void *p_argv[] ) {
|
||||
// (*pkfn)( & gtid, & tid, argv[0], ... );
|
||||
// return 1;
|
||||
// }
|
||||
//
|
||||
// parameters:
|
||||
// r3: pkfn
|
||||
// r4: gtid
|
||||
// r5: tid
|
||||
// r6: argc
|
||||
// r7: p_argv
|
||||
// r8: &exit_frame
|
||||
//
|
||||
// return: r3 (always 1/TRUE)
|
||||
//
|
||||
.text
|
||||
# if KMP_ARCH_PPC64_LE
|
||||
.abiversion 2
|
||||
# endif
|
||||
.globl __kmp_invoke_microtask
|
||||
|
||||
# if KMP_ARCH_PPC64_LE
|
||||
.p2align 4
|
||||
# else
|
||||
.p2align 2
|
||||
# endif
|
||||
|
||||
.type __kmp_invoke_microtask,@function
|
||||
|
||||
# if KMP_ARCH_PPC64_LE
|
||||
__kmp_invoke_microtask:
|
||||
.Lfunc_begin0:
|
||||
.Lfunc_gep0:
|
||||
addis 2, 12, .TOC.-.Lfunc_gep0@ha
|
||||
addi 2, 2, .TOC.-.Lfunc_gep0@l
|
||||
.Lfunc_lep0:
|
||||
.localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0
|
||||
# else
|
||||
.section .opd,"aw",@progbits
|
||||
__kmp_invoke_microtask:
|
||||
.p2align 3
|
||||
.quad .Lfunc_begin0
|
||||
.quad .TOC.@tocbase
|
||||
.quad 0
|
||||
.text
|
||||
.Lfunc_begin0:
|
||||
# endif
|
||||
|
||||
// -- Begin __kmp_invoke_microtask
|
||||
// mark_begin;
|
||||
|
||||
// We need to allocate a stack frame large enough to hold all of the parameters
|
||||
// on the stack for the microtask plus what this function needs. That's 48
|
||||
// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the
|
||||
// parameters to the microtask, plus 8 bytes to store the values of r4 and r5,
|
||||
// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes
|
||||
// to save r30 to hold a copy of r8.
|
||||
|
||||
.cfi_startproc
|
||||
mflr 0
|
||||
std 31, -8(1)
|
||||
std 0, 16(1)
|
||||
|
||||
// This is unusual because normally we'd set r31 equal to r1 after the stack
|
||||
// frame is established. In this case, however, we need to dynamically compute
|
||||
// the stack frame size, and so we keep a direct copy of r1 to access our
|
||||
// register save areas and restore the r1 value before returning.
|
||||
mr 31, 1
|
||||
.cfi_def_cfa_register r31
|
||||
.cfi_offset r31, -8
|
||||
.cfi_offset lr, 16
|
||||
|
||||
// Compute the size necessary for the local stack frame.
|
||||
# if KMP_ARCH_PPC64_LE
|
||||
li 12, 72
|
||||
# else
|
||||
li 12, 88
|
||||
# endif
|
||||
sldi 0, 6, 3
|
||||
add 12, 0, 12
|
||||
neg 12, 12
|
||||
|
||||
// We need to make sure that the stack frame stays aligned (to 16 bytes, except
|
||||
// under the BG/Q CNK, where it must be to 32 bytes).
|
||||
# if KMP_OS_CNK
|
||||
li 0, -32
|
||||
# else
|
||||
li 0, -16
|
||||
# endif
|
||||
and 12, 0, 12
|
||||
|
||||
// Establish the local stack frame.
|
||||
stdux 1, 1, 12
|
||||
|
||||
# if OMPT_SUPPORT
|
||||
.cfi_offset r30, -16
|
||||
std 30, -16(31)
|
||||
mr 30, 8
|
||||
# endif
|
||||
|
||||
// Store gtid and tid to the stack because they're passed by reference to the microtask.
|
||||
stw 4, -20(31)
|
||||
stw 5, -24(31)
|
||||
|
||||
mr 12, 6
|
||||
mr 4, 7
|
||||
|
||||
cmpwi 0, 12, 1
|
||||
blt 0, .Lcall
|
||||
|
||||
ld 5, 0(4)
|
||||
|
||||
cmpwi 0, 12, 2
|
||||
blt 0, .Lcall
|
||||
|
||||
ld 6, 8(4)
|
||||
|
||||
cmpwi 0, 12, 3
|
||||
blt 0, .Lcall
|
||||
|
||||
ld 7, 16(4)
|
||||
|
||||
cmpwi 0, 12, 4
|
||||
blt 0, .Lcall
|
||||
|
||||
ld 8, 24(4)
|
||||
|
||||
cmpwi 0, 12, 5
|
||||
blt 0, .Lcall
|
||||
|
||||
ld 9, 32(4)
|
||||
|
||||
cmpwi 0, 12, 6
|
||||
blt 0, .Lcall
|
||||
|
||||
ld 10, 40(4)
|
||||
|
||||
cmpwi 0, 12, 7
|
||||
blt 0, .Lcall
|
||||
|
||||
// There are more than 6 microtask parameters, so we need to store the
|
||||
// remainder to the stack.
|
||||
addi 12, 12, -6
|
||||
mtctr 12
|
||||
|
||||
// These are set to 8 bytes before the first desired store address (we're using
|
||||
// pre-increment loads and stores in the loop below). The parameter save area
|
||||
// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and
|
||||
// 32 + 8*8 == 96 bytes above r1 for ELFv2.
|
||||
addi 4, 4, 40
|
||||
# if KMP_ARCH_PPC64_LE
|
||||
addi 12, 1, 88
|
||||
# else
|
||||
addi 12, 1, 104
|
||||
# endif
|
||||
|
||||
.Lnext:
|
||||
ldu 0, 8(4)
|
||||
stdu 0, 8(12)
|
||||
bdnz .Lnext
|
||||
|
||||
.Lcall:
|
||||
# if KMP_ARCH_PPC64_LE
|
||||
std 2, 24(1)
|
||||
mr 12, 3
|
||||
#else
|
||||
std 2, 40(1)
|
||||
// For ELFv1, we need to load the actual function address from the function descriptor.
|
||||
ld 12, 0(3)
|
||||
ld 2, 8(3)
|
||||
ld 11, 16(3)
|
||||
#endif
|
||||
|
||||
addi 3, 31, -20
|
||||
addi 4, 31, -24
|
||||
|
||||
mtctr 12
|
||||
bctrl
|
||||
# if KMP_ARCH_PPC64_LE
|
||||
ld 2, 24(1)
|
||||
# else
|
||||
ld 2, 40(1)
|
||||
# endif
|
||||
|
||||
# if OMPT_SUPPORT
|
||||
li 3, 0
|
||||
std 3, 0(30)
|
||||
# endif
|
||||
|
||||
li 3, 1
|
||||
|
||||
# if OMPT_SUPPORT
|
||||
ld 30, -16(31)
|
||||
# endif
|
||||
|
||||
mr 1, 31
|
||||
ld 0, 16(1)
|
||||
ld 31, -8(1)
|
||||
mtlr 0
|
||||
blr
|
||||
|
||||
.long 0
|
||||
.quad 0
|
||||
.Lfunc_end0:
|
||||
.size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0
|
||||
.cfi_endproc
|
||||
|
||||
// -- End __kmp_invoke_microtask
|
||||
|
||||
#endif /* KMP_ARCH_PPC64 */
|
||||
|
||||
#if KMP_ARCH_ARM
|
||||
.data
|
||||
.comm .gomp_critical_user_,32,8
|
||||
|
|
|
@ -2575,7 +2575,7 @@ __kmp_get_load_balance( int max )
|
|||
|
||||
#endif // USE_LOAD_BALANCE
|
||||
|
||||
#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX && KMP_ARCH_AARCH64))
|
||||
#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX && KMP_ARCH_AARCH64) || KMP_ARCH_PPC64)
|
||||
|
||||
// we really only need the case with 1 argument, because CLANG always build
|
||||
// a struct of pointers to shared variables referenced in the outlined function
|
||||
|
|
Loading…
Reference in New Issue