Added propagation of not big initial stack size of master thread to workers.

Currently implemented only for non-Windows 64-bit platforms.

Differential Revision: https://reviews.llvm.org/D62488

llvm-svn: 362618
This commit is contained in:
Andrey Churbanov 2019-06-05 16:14:47 +00:00
parent de586bd1fd
commit d47f5488cf
4 changed files with 91 additions and 0 deletions

View File

@ -3263,6 +3263,7 @@ extern void __kmp_init_random(kmp_info_t *thread);
extern kmp_r_sched_t __kmp_get_schedule_global(void);
extern void __kmp_adjust_num_threads(int new_nproc);
extern void __kmp_check_stksize(size_t *val);
extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL);
extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL);

View File

@ -289,6 +289,20 @@ static void __kmp_stg_parse_bool(char const *name, char const *value,
}
} // __kmp_stg_parse_bool
// placed here in order to use __kmp_round4k static function
void __kmp_check_stksize(size_t *val) {
// if system stack size is too big then limit the size for worker threads
if (*val > KMP_DEFAULT_STKSIZE * 16) // just a heuristics...
*val = KMP_DEFAULT_STKSIZE * 16;
if (*val < KMP_MIN_STKSIZE)
*val = KMP_MIN_STKSIZE;
if (*val > KMP_MAX_STKSIZE)
*val = KMP_MAX_STKSIZE; // dead code currently, but may work in future
#if KMP_OS_DARWIN
*val = __kmp_round4k(*val);
#endif // KMP_OS_DARWIN
}
static void __kmp_stg_parse_size(char const *name, char const *value,
size_t size_min, size_t size_max,
int *is_specified, size_t *out,

View File

@ -1833,6 +1833,17 @@ void __kmp_runtime_initialize(void) {
__kmp_xproc = __kmp_get_xproc();
#if ! KMP_32_BIT_ARCH
struct rlimit rlim;
// read stack size of calling thread, save it as default for worker threads;
// this should be done before reading environment variables
status = getrlimit(RLIMIT_STACK, &rlim);
if (status == 0) { // success?
__kmp_stksize = rlim.rlim_cur;
__kmp_check_stksize(&__kmp_stksize); // check value and adjust if needed
}
#endif /* KMP_32_BIT_ARCH */
if (sysconf(_SC_THREADS)) {
/* Query the maximum number of threads */

View File

@ -0,0 +1,65 @@
// RUN: %libomp-compile-and-run
// https://bugs.llvm.org/show_bug.cgi?id=26540 requested
// stack size to be propagated from master to workers.
// Library implements propagation of not too big stack
// for Linux x86_64 platform (skipped Windows for now).
//
// The test checks that workers can use more than 4MB
// of stack (4MB - was historical default for
// stack size of worker thread in runtime library).
#include <stdio.h>
#include <omp.h>
#if !defined(_WIN32)
#include <sys/resource.h> // getrlimit
#endif
#define STK 4800000
double foo(int n, int th)
{
double arr[n];
int i;
double res = 0.0;
for (i = 0; i < n; ++i) {
arr[i] = (double)i / (n + 2);
}
for (i = 0; i < n; ++i) {
res += arr[i] / n;
}
return res;
}
int main(int argc, char *argv[])
{
#if defined(_WIN32)
// don't test Windows
printf("stack propagation not implemented, skipping test...\n");
return 0;
#else
int status;
double val = 0.0;
int m = STK / 8; // > 4800000 bytes per thread
// read stack size of calling thread, save it as default
struct rlimit rlim;
status = getrlimit(RLIMIT_STACK, &rlim);
if (sizeof(void *) > 4 && // do not test 32-bit systems,
status == 0 && rlim.rlim_cur > STK) { // or small initial stack size
#pragma omp parallel reduction(+:val)
{
val += foo(m, omp_get_thread_num());
}
} else {
printf("too small stack size limit (needs about 8MB), skipping test...\n");
return 0;
}
if (val > 0.1) {
printf("passed\n");
return 0;
} else {
printf("failed, val = %f\n", val);
return 1;
}
#endif // _WIN32
}