[OpenMP] Make use of sched_yield optional in runtime

This patch cleans up the yielding code and makes it optional. An
environment variable, KMP_USE_YIELD, was added. Yielding is still
on by default (KMP_USE_YIELD=1), but can be turned off completely
(KMP_USE_YIELD=0), or turned on only when oversubscription is detected
(KMP_USE_YIELD=2). Note that oversubscription cannot always be detected
by the runtime (for example, when the runtime is initialized and the
process forks, oversubscription cannot be detected currently over
multiple instances of the runtime).

Because yielding can be controlled by user now, the library mode
settings (from KMP_LIBRARY) for throughput and turnaround have been
adjusted by altering blocktime, unless that was also explicitly set.

In the original code, there were a number of places where a double yield
might have been done under oversubscription. This version checks
oversubscription and if that's not going to yield, then it does
the spin check.

Patch by Terry Wilmarth

Differential Revision: https://reviews.llvm.org/D58148

llvm-svn: 355120
This commit is contained in:
Jonathan Peyton 2019-02-28 19:11:29 +00:00
parent 23452e1c85
commit e47d32f165
19 changed files with 184 additions and 386 deletions

View File

@ -158,7 +158,7 @@
#
# Regular entry points
__kmp_wait_yield_4
__kmp_wait_4
__kmp_fork_call
__kmp_invoke_microtask
%ifdef KMP_USE_MONITOR

View File

@ -83,7 +83,7 @@ VERSION {
__kmp_reap_worker;
__kmp_release_64;
__kmp_wait_64;
__kmp_wait_yield_4;
__kmp_wait_4;
# ittnotify symbols to be used by debugger
__kmp_itt_fini_ittlib;

View File

@ -981,10 +981,6 @@ extern kmp_uint64 __kmp_now_nsec();
(KMP_BLOCKTIME(team, tid) * KMP_USEC_PER_SEC)
#define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())
#endif
#define KMP_YIELD_NOW() \
(KMP_NOW_MSEC() / KMP_MAX(__kmp_dflt_blocktime, 1) % \
(__kmp_yield_on_count + __kmp_yield_off_count) < \
(kmp_uint32)__kmp_yield_on_count)
#endif // KMP_USE_MONITOR
#define KMP_MIN_STATSCOLS 40
@ -999,14 +995,6 @@ extern kmp_uint64 __kmp_now_nsec();
#define KMP_MAX_CHUNK (INT_MAX - 1)
#define KMP_DEFAULT_CHUNK 1
#define KMP_MIN_INIT_WAIT 1
#define KMP_MAX_INIT_WAIT (INT_MAX / 2)
#define KMP_DEFAULT_INIT_WAIT 2048U
#define KMP_MIN_NEXT_WAIT 1
#define KMP_MAX_NEXT_WAIT (INT_MAX / 2)
#define KMP_DEFAULT_NEXT_WAIT 1024U
#define KMP_DFLT_DISP_NUM_BUFF 7
#define KMP_MAX_ORDERED 8
@ -1090,7 +1078,7 @@ extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p);
extern void __kmp_x86_pause(void);
#elif KMP_MIC
// Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed
// regression after removal of extra PAUSE from KMP_YIELD_SPIN(). Changing
// regression after removal of extra PAUSE from spin loops. Changing
// the delay from 100 to 300 showed even better performance than double PAUSE
// on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.
static inline void __kmp_x86_pause(void) { _mm_delay_32(300); }
@ -1115,31 +1103,54 @@ static inline void __kmp_x86_pause(void) { _mm_pause(); }
#define KMP_INIT_YIELD(count) \
{ (count) = __kmp_yield_init; }
#define KMP_OVERSUBSCRIBED \
(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))
#define KMP_TRY_YIELD \
((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED)))
#define KMP_TRY_YIELD_OVERSUB \
((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED))
#define KMP_YIELD(cond) \
{ \
KMP_CPU_PAUSE(); \
__kmp_yield((cond)); \
if ((cond) && (KMP_TRY_YIELD)) \
__kmp_yield(); \
}
#define KMP_YIELD_OVERSUB() \
{ \
KMP_CPU_PAUSE(); \
if ((KMP_TRY_YIELD_OVERSUB)) \
__kmp_yield(); \
}
// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
// there should be no yielding since initial value from KMP_INIT_YIELD() is odd.
#define KMP_YIELD_WHEN(cond, count) \
{ \
KMP_CPU_PAUSE(); \
(count) -= 2; \
if (!(count)) { \
__kmp_yield(cond); \
(count) = __kmp_yield_next; \
} \
}
#define KMP_YIELD_SPIN(count) \
{ \
KMP_CPU_PAUSE(); \
(count) -= 2; \
if (!(count)) { \
__kmp_yield(1); \
(count) = __kmp_yield_next; \
if (KMP_TRY_YIELD) { \
(count) -= 2; \
if (!(count)) { \
__kmp_yield(); \
(count) = __kmp_yield_next; \
} \
} \
}
#define KMP_YIELD_OVERSUB_ELSE_SPIN(count) \
{ \
KMP_CPU_PAUSE(); \
if ((KMP_TRY_YIELD_OVERSUB)) \
__kmp_yield(); \
else if (__kmp_use_yield == 1) { \
(count) -= 2; \
if (!(count)) { \
__kmp_yield(); \
(count) = __kmp_yield_next; \
} \
} \
}
@ -2945,10 +2956,6 @@ extern kmp_lock_t __kmp_global_lock; /* control OS/global access */
extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */
extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
/* used for yielding spin-waits */
extern unsigned int __kmp_init_wait; /* initial number of spin-tests */
extern unsigned int __kmp_next_wait; /* susequent number of spin-tests */
extern enum library_type __kmp_library;
extern enum sched_type __kmp_sched; /* default runtime scheduling */
@ -2977,16 +2984,11 @@ extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
#endif
extern kmp_int32 __kmp_use_yield;
extern kmp_int32 __kmp_use_yield_exp_set;
extern kmp_uint32 __kmp_yield_init;
extern kmp_uint32 __kmp_yield_next;
#if KMP_USE_MONITOR
extern kmp_uint32 __kmp_yielding_on;
#endif
extern kmp_uint32 __kmp_yield_cycle;
extern kmp_int32 __kmp_yield_on_count;
extern kmp_int32 __kmp_yield_off_count;
/* ------------------------------------------------------------------------- */
extern int __kmp_allThreadsSpecified;
@ -3309,7 +3311,7 @@ extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
int num_threads);
#endif
extern void __kmp_yield(int cond);
extern void __kmp_yield();
extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
enum sched_type schedule, kmp_int32 lb,
@ -3374,13 +3376,11 @@ extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker);
extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker);
extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker);
extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker);
extern kmp_uint32 __kmp_wait_yield_4(kmp_uint32 volatile *spinner,
kmp_uint32 checker,
kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
void *obj);
extern void __kmp_wait_yield_4_ptr(void *spinner, kmp_uint32 checker,
kmp_uint32 (*pred)(void *, kmp_uint32),
void *obj);
extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker,
kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
void *obj);
extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);
class kmp_flag_32;
class kmp_flag_64;

View File

@ -683,7 +683,7 @@ void __kmpc_flush(ident_t *loc) {
// }
// and adding the yield here is good for at least a 10x speedup
// when running >2 threads per core (on the NAS LU benchmark).
__kmp_yield(TRUE);
__kmp_yield();
#endif
#else
#error Unknown or unsupported architecture
@ -993,24 +993,18 @@ __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
kmp_uint32 spins; \
KMP_FSYNC_PREPARE(l); \
KMP_INIT_YIELD(spins); \
if (TCR_4(__kmp_nth) > \
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
KMP_YIELD(TRUE); \
} else { \
KMP_YIELD_SPIN(spins); \
} \
kmp_backoff_t backoff = __kmp_spin_backoff_params; \
while ( \
KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
!__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
__kmp_spin_backoff(&backoff); \
do { \
if (TCR_4(__kmp_nth) > \
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
KMP_YIELD(TRUE); \
} else { \
KMP_YIELD_SPIN(spins); \
} \
} \
__kmp_spin_backoff(&backoff); \
} while ( \
KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
!__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
} \
KMP_FSYNC_ACQUIRED(l); \
}
@ -1096,8 +1090,7 @@ __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
} \
KMP_MB(); \
KMP_YIELD(TCR_4(__kmp_nth) > \
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
KMP_YIELD_OVERSUB(); \
}
#endif // KMP_USE_FUTEX
@ -3976,8 +3969,8 @@ void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
// __kmp_dispatch_num_buffers)
if (idx != sh_buf->doacross_buf_idx) {
// Shared buffer is occupied, wait for it to be free
__kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
__kmp_eq_4, NULL);
__kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
__kmp_eq_4, NULL);
}
#if KMP_32_BIT_ARCH
// Check if we are the first thread. After the CAS the first thread gets 0,

View File

@ -858,9 +858,9 @@ __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb,
KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d "
"sh->buffer_index:%d\n",
gtid, my_buffer_index, sh->buffer_index));
__kmp_wait_yield<kmp_uint32>(&sh->buffer_index, my_buffer_index,
__kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
// Note: KMP_WAIT_YIELD() cannot be used there: buffer index and
__kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
__kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
// Note: KMP_WAIT() cannot be used there: buffer index and
// my_buffer_index are *always* 32-bit integers.
KMP_MB(); /* is this necessary? */
KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
@ -1004,8 +1004,8 @@ static void __kmp_dispatch_finish(int gtid, ident_t *loc) {
}
#endif
__kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
__kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
__kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
__kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
KMP_MB(); /* is this necessary? */
#ifdef KMP_DEBUG
{
@ -1073,8 +1073,8 @@ static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) {
}
#endif
__kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
__kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
__kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
__kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
KMP_MB(); /* is this necessary? */
KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting "
@ -2489,10 +2489,10 @@ kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) {
}
kmp_uint32
__kmp_wait_yield_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
void *obj // Higher-level synchronization object, or NULL.
) {
__kmp_wait_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
void *obj // Higher-level synchronization object, or NULL.
) {
// note: we may not belong to a team at this point
volatile kmp_uint32 *spin = spinner;
kmp_uint32 check = checker;
@ -2509,20 +2509,16 @@ __kmp_wait_yield_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
split. It causes problems with infinite recursion because of exit lock */
/* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
__kmp_abort_thread(); */
/* if we have waited a bit, or are oversubscribed, yield */
/* pause is in the following code */
KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
KMP_YIELD_SPIN(spins);
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
}
KMP_FSYNC_SPIN_ACQUIRED(obj);
return r;
}
void __kmp_wait_yield_4_ptr(
void *spinner, kmp_uint32 checker, kmp_uint32 (*pred)(void *, kmp_uint32),
void *obj // Higher-level synchronization object, or NULL.
) {
void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
kmp_uint32 (*pred)(void *, kmp_uint32),
void *obj // Higher-level synchronization object, or NULL.
) {
// note: we may not belong to a team at this point
void *spin = spinner;
kmp_uint32 check = checker;
@ -2534,10 +2530,9 @@ void __kmp_wait_yield_4_ptr(
// main wait spin loop
while (!f(spin, check)) {
KMP_FSYNC_SPIN_PREPARE(obj);
/* if we have waited a bit, or are oversubscribed, yield */
/* if we have waited a bit, or are noversubscribed, yield */
/* pause is in the following code */
KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
KMP_YIELD_SPIN(spins);
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
}
KMP_FSYNC_SPIN_ACQUIRED(obj);
}

View File

@ -269,7 +269,7 @@ template <typename T> kmp_uint32 __kmp_eq(T value, T checker) {
}
/*
Spin wait loop that first does pause, then yield.
Spin wait loop that pauses between checks.
Waits until function returns non-zero when called with *spinner and check.
Does NOT put threads to sleep.
Arguments:
@ -282,15 +282,14 @@ template <typename T> kmp_uint32 __kmp_eq(T value, T checker) {
is used to report locks consistently. For example, if lock is acquired
immediately, its address is reported to ittnotify via
KMP_FSYNC_ACQUIRED(). However, it lock cannot be acquired immediately
and lock routine calls to KMP_WAIT_YIELD(), the later should report the
and lock routine calls to KMP_WAIT(), the later should report the
same address, not an address of low-level spinner.
#endif // USE_ITT_BUILD
TODO: make inline function (move to header file for icl)
*/
template <typename UT>
static UT __kmp_wait_yield(volatile UT *spinner, UT checker,
kmp_uint32 (*pred)(UT, UT)
USE_ITT_BUILD_ARG(void *obj)) {
static UT __kmp_wait(volatile UT *spinner, UT checker,
kmp_uint32 (*pred)(UT, UT) USE_ITT_BUILD_ARG(void *obj)) {
// note: we may not belong to a team at this point
volatile UT *spin = spinner;
UT check = checker;
@ -308,12 +307,8 @@ static UT __kmp_wait_yield(volatile UT *spinner, UT checker,
It causes problems with infinite recursion because of exit lock */
/* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
__kmp_abort_thread(); */
// if we are oversubscribed,
// or have waited a bit (and KMP_LIBRARY=throughput, then yield
// pause is in the following code
KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
KMP_YIELD_SPIN(spins);
// If oversubscribed, or have waited a bit then yield.
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
}
KMP_FSYNC_SPIN_ACQUIRED(obj);
return r;
@ -379,8 +374,8 @@ void __kmp_dispatch_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
__kmp_str_free(&buff);
}
#endif
__kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
__kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
__kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
__kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
KMP_MB(); /* is this necessary? */
#ifdef KMP_DEBUG
{

View File

@ -263,8 +263,8 @@ void core_barrier_impl<T>::barrier(kmp_int32 id,
next_wait_value));
char v = (current_wait_value ? 0x1 : 0x0);
(RCAST(volatile char *, &(bdata->val[current_index])))[id] = v;
__kmp_wait_yield<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
__kmp_eq<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
__kmp_wait<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
__kmp_eq<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
tdata->wait_val[current_index] = next_wait_value;
tdata->index = next_index;
}
@ -310,8 +310,8 @@ void counter_barrier_impl<T>::barrier(kmp_int32 id,
next_wait_value));
val = RCAST(volatile kmp_int64 *, &(bdata->val[current_index]));
KMP_TEST_THEN_INC64(val);
__kmp_wait_yield<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
__kmp_ge<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
__kmp_wait<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
__kmp_ge<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
tdata->wait_val[current_index] = next_wait_value;
tdata->index = next_index;
}

View File

@ -62,11 +62,6 @@ int __kmp_version = 0;
std::atomic<kmp_int32> __kmp_team_counter = ATOMIC_VAR_INIT(0);
std::atomic<kmp_int32> __kmp_task_counter = ATOMIC_VAR_INIT(0);
unsigned int __kmp_init_wait =
KMP_DEFAULT_INIT_WAIT; /* initial number of spin-tests */
unsigned int __kmp_next_wait =
KMP_DEFAULT_NEXT_WAIT; /* susequent number of spin-tests */
size_t __kmp_stksize = KMP_DEFAULT_STKSIZE;
#if KMP_USE_MONITOR
size_t __kmp_monitor_stksize = 0; // auto adjust
@ -395,22 +390,17 @@ int __kmp_env_blocktime = FALSE; /* KMP_BLOCKTIME specified? */
int __kmp_env_checks = FALSE; /* KMP_CHECKS specified? */
int __kmp_env_consistency_check = FALSE; /* KMP_CONSISTENCY_CHECK specified? */
// From KMP_USE_YIELD:
// 0 = never yield;
// 1 = always yield (default);
// 2 = yield only if oversubscribed
kmp_int32 __kmp_use_yield = 1;
// This will be 1 if KMP_USE_YIELD environment variable was set explicitly
kmp_int32 __kmp_use_yield_exp_set = 0;
kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT;
kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT;
#if KMP_USE_MONITOR
kmp_uint32 __kmp_yielding_on = 1;
#endif
#if KMP_OS_CNK
kmp_uint32 __kmp_yield_cycle = 0;
#else
kmp_uint32 __kmp_yield_cycle = 1; /* Yield-cycle is on by default */
#endif
kmp_int32 __kmp_yield_on_count =
10; /* By default, yielding is on for 10 monitor periods. */
kmp_int32 __kmp_yield_off_count =
1; /* By default, yielding is off for 1 monitor periods. */
/* ------------------------------------------------------ */
/* STATE mostly syncronized with global lock */
/* data written to rarely by masters, read often by workers */

View File

@ -219,7 +219,7 @@ __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);
with a delay (and not called at all if waiting time is small). So, in spin
loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before
spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and
KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT_YIELD() for example. */
KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT() for example. */
#undef KMP_FSYNC_SPIN_INIT
#define KMP_FSYNC_SPIN_INIT(obj, spin) \

View File

@ -100,23 +100,12 @@ __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
kmp_uint32 spins;
KMP_FSYNC_PREPARE(lck);
KMP_INIT_YIELD(spins);
if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
KMP_YIELD(TRUE);
} else {
KMP_YIELD_SPIN(spins);
}
kmp_backoff_t backoff = __kmp_spin_backoff_params;
while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
!__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
do {
__kmp_spin_backoff(&backoff);
if (TCR_4(__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
KMP_YIELD(TRUE);
} else {
KMP_YIELD_SPIN(spins);
}
}
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
} while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
!__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
KMP_FSYNC_ACQUIRED(lck);
return KMP_LOCK_ACQUIRED_FIRST;
}
@ -169,8 +158,7 @@ int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
KMP_MB(); /* Flush all pending memory write invalidates. */
KMP_YIELD(TCR_4(__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
KMP_YIELD_OVERSUB();
return KMP_LOCK_RELEASED;
}
@ -474,8 +462,7 @@ int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
lck->lk.poll, gtid));
KMP_YIELD(TCR_4(__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
KMP_YIELD_OVERSUB();
return KMP_LOCK_RELEASED;
}
@ -651,7 +638,7 @@ __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
std::memory_order_acquire) == my_ticket) {
return KMP_LOCK_ACQUIRED_FIRST;
}
KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
return KMP_LOCK_ACQUIRED_FIRST;
}
@ -1249,10 +1236,9 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
lck, gtid));
/* ToDo: May want to consider using __kmp_wait_sleep or something that
sleeps for throughput only here. */
KMP_MB();
KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
// ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
#ifdef DEBUG_QUEUING_LOCKS
TRACE_LOCK(gtid + 1, "acq spin");
@ -1282,8 +1268,8 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
/* Yield if number of threads > number of logical processors */
/* ToDo: Not sure why this should only be in oversubscription case,
maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
KMP_YIELD(TCR_4(__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
KMP_YIELD_OVERSUB();
#ifdef DEBUG_QUEUING_LOCKS
TRACE_LOCK(gtid + 1, "acq retry");
#endif
@ -1462,8 +1448,8 @@ int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
KMP_MB();
/* make sure enqueuing thread has time to update next waiting thread
* field */
*head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0,
KMP_NEQ, NULL);
*head_id_p =
KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
#ifdef DEBUG_QUEUING_LOCKS
TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
#endif
@ -2131,7 +2117,7 @@ static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
// lock from now on.
while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
KMP_INC_STAT(lck, lemmingYields);
__kmp_yield(TRUE);
KMP_YIELD(TRUE);
}
if (__kmp_test_adaptive_lock_only(lck, gtid))
@ -2259,23 +2245,14 @@ __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
// polling area has been reconfigured. Unless it is reconfigured, the
// reloads stay in L1 cache and are cheap.
//
// Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!!
//
// The current implementation of KMP_WAIT_YIELD doesn't allow for mask
// Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
// The current implementation of KMP_WAIT doesn't allow for mask
// and poll to be re-read every spin iteration.
kmp_uint32 spins;
KMP_FSYNC_PREPARE(lck);
KMP_INIT_YIELD(spins);
while (polls[ticket & mask] < ticket) { // atomic load
// If we are oversubscribed,
// or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
// CPU Pause is in the macros for yield.
//
KMP_YIELD(TCR_4(__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
KMP_YIELD_SPIN(spins);
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
// Re-read the mask and the poll pointer from the lock structure.
//
// Make certain that "mask" is read before "polls" !!!
@ -2807,8 +2784,9 @@ static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
}
if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
// Wait until lock becomes free
while (!__kmp_is_unlocked_queuing_lock(lck))
__kmp_yield(TRUE);
while (!__kmp_is_unlocked_queuing_lock(lck)) {
KMP_YIELD(TRUE);
}
} else if (!(status & _XABORT_RETRY))
break;
} while (retries--);

View File

@ -652,21 +652,11 @@ extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
kmp_uint32 spins; \
KMP_FSYNC_PREPARE(lck); \
KMP_INIT_YIELD(spins); \
if (TCR_4(__kmp_nth) > \
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
KMP_YIELD(TRUE); \
} else { \
KMP_YIELD_SPIN(spins); \
} \
while (lck->tas.lk.poll != 0 || !__kmp_atomic_compare_store_acq( \
&lck->tas.lk.poll, 0, gtid + 1)) { \
if (TCR_4(__kmp_nth) > \
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
KMP_YIELD(TRUE); \
} else { \
KMP_YIELD_SPIN(spins); \
} \
} \
do { \
KMP_YIELD_OVERSUB_ELSE_SPIN(spins); \
} while ( \
lck->tas.lk.poll != 0 || \
!__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \
} \
KMP_FSYNC_ACQUIRED(lck); \
} else { \
@ -770,22 +760,11 @@ extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
kmp_uint32 spins; \
KMP_FSYNC_PREPARE(lck); \
KMP_INIT_YIELD(spins); \
if (TCR_4(__kmp_nth) > \
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
KMP_YIELD(TRUE); \
} else { \
KMP_YIELD_SPIN(spins); \
} \
while ( \
do { \
KMP_YIELD_OVERSUB_ELSE_SPIN(spins); \
} while ( \
(lck->tas.lk.poll != 0) || \
!__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
if (TCR_4(__kmp_nth) > \
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
KMP_YIELD(TRUE); \
} else { \
KMP_YIELD_SPIN(spins); \
} \
} \
!__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \
} \
lck->tas.lk.depth_locked = 1; \
*depth = KMP_LOCK_ACQUIRED_FIRST; \

View File

@ -304,7 +304,7 @@ extern "C" {
#define KMP_CACHE_PREFETCH(ADDR) /* nothing */
// Define attribute that indicates that the fall through from the previous
// Define attribute that indicates that the fall through from the previous
// case label is intentional and should not be diagnosed by a compiler
// Code from libcxx/include/__config
// Use a function like macro to imply that it must be followed by a semicolon
@ -882,8 +882,8 @@ typedef void (*microtask_t)(int *gtid, int *npr, ...);
#define VOLATILE_CAST(x) (x)
#endif
#define KMP_WAIT_YIELD __kmp_wait_yield_4
#define KMP_WAIT_YIELD_PTR __kmp_wait_yield_4_ptr
#define KMP_WAIT __kmp_wait_4
#define KMP_WAIT_PTR __kmp_wait_4_ptr
#define KMP_EQ __kmp_eq_4
#define KMP_NEQ __kmp_neq_4
#define KMP_LT __kmp_lt_4

View File

@ -327,7 +327,7 @@ void __kmp_infinite_loop(void) {
static int done = FALSE;
while (!done) {
KMP_YIELD(1);
KMP_YIELD(TRUE);
}
}
@ -672,24 +672,6 @@ BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
#endif /* KMP_OS_WINDOWS */
#endif /* KMP_DYNAMIC_LIB */
/* Change the library type to "status" and return the old type */
/* called from within initialization routines where __kmp_initz_lock is held */
int __kmp_change_library(int status) {
int old_status;
old_status = __kmp_yield_init &
1; // check whether KMP_LIBRARY=throughput (even init count)
if (status) {
__kmp_yield_init |= 1; // throughput => turnaround (odd init count)
} else {
__kmp_yield_init &= ~1; // turnaround => throughput (even init count)
}
return old_status; // return previous setting of whether
// KMP_LIBRARY=throughput
}
/* __kmp_parallel_deo -- Wait until it's our turn. */
void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
int gtid = *gtid_ref;
@ -708,8 +690,8 @@ void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
#ifdef BUILD_PARALLEL_ORDERED
if (!team->t.t_serialized) {
KMP_MB();
KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
KMP_EQ, NULL);
KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
NULL);
KMP_MB();
}
#endif /* BUILD_PARALLEL_ORDERED */
@ -7735,13 +7717,14 @@ void __kmp_aux_set_library(enum library_type arg) {
switch (__kmp_library) {
case library_serial: {
KMP_INFORM(LibraryIsSerial);
(void)__kmp_change_library(TRUE);
} break;
case library_turnaround:
(void)__kmp_change_library(TRUE);
if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
__kmp_use_yield = 2; // only yield when oversubscribed
break;
case library_throughput:
(void)__kmp_change_library(FALSE);
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
__kmp_dflt_blocktime = 200;
break;
default:
KMP_FATAL(UnknownLibraryType, arg);

View File

@ -628,6 +628,19 @@ static void __kmp_stg_print_teams_thread_limit(kmp_str_buf_t *buffer,
__kmp_stg_print_int(buffer, name, __kmp_teams_max_nth);
} // __kmp_stg_print_teams_thread_limit
// -----------------------------------------------------------------------------
// KMP_USE_YIELD
static void __kmp_stg_parse_use_yield(char const *name, char const *value,
void *data) {
__kmp_stg_parse_int(name, value, 0, 2, &__kmp_use_yield);
__kmp_use_yield_exp_set = 1;
} // __kmp_stg_parse_use_yield
static void __kmp_stg_print_use_yield(kmp_str_buf_t *buffer, char const *name,
void *data) {
__kmp_stg_print_int(buffer, name, __kmp_use_yield);
} // __kmp_stg_print_use_yield
// -----------------------------------------------------------------------------
// KMP_BLOCKTIME
@ -745,18 +758,24 @@ static void __kmp_stg_parse_wait_policy(char const *name, char const *value,
__kmp_library = library_serial;
} else if (__kmp_str_match("throughput", 2, value)) { /* TH */
__kmp_library = library_throughput;
if (blocktime_str == NULL) {
// KMP_BLOCKTIME not specified, so set default to 0.
__kmp_dflt_blocktime = 0;
}
} else if (__kmp_str_match("turnaround", 2, value)) { /* TU */
__kmp_library = library_turnaround;
} else if (__kmp_str_match("dedicated", 1, value)) { /* D */
__kmp_library = library_turnaround;
} else if (__kmp_str_match("multiuser", 1, value)) { /* M */
__kmp_library = library_throughput;
if (blocktime_str == NULL) {
// KMP_BLOCKTIME not specified, so set default to 0.
__kmp_dflt_blocktime = 0;
}
} else {
KMP_WARNING(StgInvalidValue, name, value);
}
}
__kmp_aux_set_library(__kmp_library);
} // __kmp_stg_parse_wait_policy
static void __kmp_stg_print_wait_policy(kmp_str_buf_t *buffer, char const *name,
@ -3944,78 +3963,8 @@ static void __kmp_stg_print_par_range_env(kmp_str_buf_t *buffer,
}
} // __kmp_stg_print_par_range_env
// -----------------------------------------------------------------------------
// KMP_YIELD_CYCLE, KMP_YIELD_ON, KMP_YIELD_OFF
static void __kmp_stg_parse_yield_cycle(char const *name, char const *value,
void *data) {
int flag = __kmp_yield_cycle;
__kmp_stg_parse_bool(name, value, &flag);
__kmp_yield_cycle = flag;
} // __kmp_stg_parse_yield_cycle
static void __kmp_stg_print_yield_cycle(kmp_str_buf_t *buffer, char const *name,
void *data) {
__kmp_stg_print_bool(buffer, name, __kmp_yield_cycle);
} // __kmp_stg_print_yield_cycle
static void __kmp_stg_parse_yield_on(char const *name, char const *value,
void *data) {
__kmp_stg_parse_int(name, value, 2, INT_MAX, &__kmp_yield_on_count);
} // __kmp_stg_parse_yield_on
static void __kmp_stg_print_yield_on(kmp_str_buf_t *buffer, char const *name,
void *data) {
__kmp_stg_print_int(buffer, name, __kmp_yield_on_count);
} // __kmp_stg_print_yield_on
static void __kmp_stg_parse_yield_off(char const *name, char const *value,
void *data) {
__kmp_stg_parse_int(name, value, 2, INT_MAX, &__kmp_yield_off_count);
} // __kmp_stg_parse_yield_off
static void __kmp_stg_print_yield_off(kmp_str_buf_t *buffer, char const *name,
void *data) {
__kmp_stg_print_int(buffer, name, __kmp_yield_off_count);
} // __kmp_stg_print_yield_off
#endif
// -----------------------------------------------------------------------------
// KMP_INIT_WAIT, KMP_NEXT_WAIT
static void __kmp_stg_parse_init_wait(char const *name, char const *value,
void *data) {
int wait;
KMP_ASSERT((__kmp_init_wait & 1) == 0);
wait = __kmp_init_wait / 2;
__kmp_stg_parse_int(name, value, KMP_MIN_INIT_WAIT, KMP_MAX_INIT_WAIT, &wait);
__kmp_init_wait = wait * 2;
KMP_ASSERT((__kmp_init_wait & 1) == 0);
__kmp_yield_init = __kmp_init_wait;
} // __kmp_stg_parse_init_wait
static void __kmp_stg_print_init_wait(kmp_str_buf_t *buffer, char const *name,
void *data) {
__kmp_stg_print_int(buffer, name, __kmp_init_wait);
} // __kmp_stg_print_init_wait
static void __kmp_stg_parse_next_wait(char const *name, char const *value,
void *data) {
int wait;
KMP_ASSERT((__kmp_next_wait & 1) == 0);
wait = __kmp_next_wait / 2;
__kmp_stg_parse_int(name, value, KMP_MIN_NEXT_WAIT, KMP_MAX_NEXT_WAIT, &wait);
__kmp_next_wait = wait * 2;
KMP_ASSERT((__kmp_next_wait & 1) == 0);
__kmp_yield_next = __kmp_next_wait;
} // __kmp_stg_parse_next_wait
static void __kmp_stg_print_next_wait(kmp_str_buf_t *buffer, char const *name,
void *data) {
__kmp_stg_print_int(buffer, name, __kmp_next_wait);
} //__kmp_stg_print_next_wait
// -----------------------------------------------------------------------------
// KMP_GTID_MODE
@ -4726,6 +4675,8 @@ static kmp_setting_t __kmp_stg_table[] = {
{"KMP_ALL_THREADS", __kmp_stg_parse_device_thread_limit, NULL, NULL, 0, 0},
{"KMP_BLOCKTIME", __kmp_stg_parse_blocktime, __kmp_stg_print_blocktime,
NULL, 0, 0},
{"KMP_USE_YIELD", __kmp_stg_parse_use_yield, __kmp_stg_print_use_yield,
NULL, 0, 0},
{"KMP_DUPLICATE_LIB_OK", __kmp_stg_parse_duplicate_lib_ok,
__kmp_stg_print_duplicate_lib_ok, NULL, 0, 0},
{"KMP_LIBRARY", __kmp_stg_parse_wait_policy, __kmp_stg_print_wait_policy,
@ -4830,12 +4781,6 @@ static kmp_setting_t __kmp_stg_table[] = {
{"KMP_PAR_RANGE", __kmp_stg_parse_par_range_env,
__kmp_stg_print_par_range_env, NULL, 0, 0},
{"KMP_YIELD_CYCLE", __kmp_stg_parse_yield_cycle,
__kmp_stg_print_yield_cycle, NULL, 0, 0},
{"KMP_YIELD_ON", __kmp_stg_parse_yield_on, __kmp_stg_print_yield_on, NULL,
0, 0},
{"KMP_YIELD_OFF", __kmp_stg_parse_yield_off, __kmp_stg_print_yield_off,
NULL, 0, 0},
#endif // KMP_DEBUG
{"KMP_ALIGN_ALLOC", __kmp_stg_parse_align_alloc,
@ -4927,10 +4872,6 @@ static kmp_setting_t __kmp_stg_table[] = {
#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
{"KMP_MALLOC_POOL_INCR", __kmp_stg_parse_malloc_pool_incr,
__kmp_stg_print_malloc_pool_incr, NULL, 0, 0},
{"KMP_INIT_WAIT", __kmp_stg_parse_init_wait, __kmp_stg_print_init_wait,
NULL, 0, 0},
{"KMP_NEXT_WAIT", __kmp_stg_parse_next_wait, __kmp_stg_print_next_wait,
NULL, 0, 0},
{"KMP_GTID_MODE", __kmp_stg_parse_gtid_mode, __kmp_stg_print_gtid_mode,
NULL, 0, 0},
{"OMP_DYNAMIC", __kmp_stg_parse_omp_dynamic, __kmp_stg_print_omp_dynamic,

View File

@ -2705,8 +2705,7 @@ static inline int __kmp_execute_tasks_template(
if (thread->th.th_task_team == NULL) {
break;
}
// Yield before executing next task
KMP_YIELD(__kmp_library == library_throughput);
KMP_YIELD(__kmp_library == library_throughput); // Yield before next task
// If execution of a stolen task results in more tasks being placed on our
// run queue, reset use_own_tasks
if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
@ -3242,10 +3241,8 @@ void __kmp_wait_to_unref_task_teams(void) {
break;
}
// If we are oversubscribed, or have waited a bit (and library mode is
// throughput), yield. Pause is in the following code.
KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
KMP_YIELD_SPIN(spins); // Yields only if KMP_LIBRARY=throughput
// If oversubscribed or have waited a bit, yield.
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
}
}
@ -3410,7 +3407,7 @@ void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) {
__kmp_abort_thread();
break;
}
KMP_YIELD(TRUE); // GH: We always yield here
KMP_YIELD(TRUE);
}
#if USE_ITT_BUILD
KMP_FSYNC_SPIN_ACQUIRED(RCAST(void *, spin));

View File

@ -51,7 +51,7 @@ static void __kmp_taskq_eo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
taskq = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
KMP_WAIT(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
KMP_MB();
}
}
@ -95,7 +95,7 @@ static void __kmp_taskq_check_ordered(kmp_int32 gtid, kmpc_thunk_t *thunk) {
taskq = thunk->th.th_shareds->sv_queue;
if (taskq->tq_tasknum_serving <= my_token) {
KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
KMP_WAIT(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
KMP_MB();
taskq->tq_tasknum_serving = my_token + 1;
KMP_MB();
@ -1056,8 +1056,7 @@ static void __kmp_remove_queue_from_tree(kmp_taskq_t *tq, kmp_int32 global_tid,
while (queue->tq_ref_count > 1) {
__kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
KMP_WAIT_YIELD((volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE,
NULL);
KMP_WAIT((volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE, NULL);
__kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
// Make sure data structures are in consistent state before querying them
@ -1538,8 +1537,6 @@ void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
if (in_parallel) {
kmp_uint32 spins;
/* this is just a safeguard to release the waiting threads if */
/* the outermost taskq never queues a task */
@ -1556,12 +1553,10 @@ void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
do {
/* wait until something is available to dequeue */
KMP_INIT_YIELD(spins);
while ((queue->tq_nfull == 0) && (queue->tq_taskq_slot == NULL) &&
(!__kmp_taskq_has_any_children(queue)) &&
(!(queue->tq_flags & TQF_ALL_TASKS_QUEUED))) {
KMP_YIELD_WHEN(TRUE, spins);
KMP_CPU_PAUSE();
}
/* check to see if we can execute tasks in the queue */
@ -1628,7 +1623,6 @@ void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
/* WAIT until all tasks are finished and no child queues exist before
* proceeding */
KMP_INIT_YIELD(spins);
while (!__kmp_taskq_tasks_finished(queue) ||
__kmp_taskq_has_any_children(queue)) {
@ -1643,7 +1637,8 @@ void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
in_parallel);
}
KMP_YIELD_WHEN(thunk == NULL, spins);
if (thunk == NULL)
KMP_CPU_PAUSE();
__kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
}
@ -1669,8 +1664,6 @@ void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
// Outermost Queue: steal work from descendants until all tasks are finished
KMP_INIT_YIELD(spins);
while (!__kmp_taskq_tasks_finished(queue)) {
thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
@ -1683,7 +1676,8 @@ void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
__kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
}
KMP_YIELD_WHEN(thunk == NULL, spins);
if (thunk == NULL)
KMP_CPU_PAUSE();
}
/* Need this barrier to prevent destruction of queue before threads have all

View File

@ -150,8 +150,8 @@ static void __ompt_implicit_task_end(kmp_info_t *this_thr,
}
#endif
/* Spin wait loop that first does pause, then yield, then sleep. A thread that
calls __kmp_wait_* must make certain that another thread calls __kmp_release
/* Spin wait loop that first does pause/yield, then sleep. A thread that calls
__kmp_wait_* must make certain that another thread calls __kmp_release
to wake it back up to prevent deadlocks!
NOTE: We may not belong to a team at this point. */
@ -270,8 +270,7 @@ final_spin=FALSE)
}
#endif
// Setup for waiting
KMP_INIT_YIELD(spins);
KMP_INIT_YIELD(spins); // Setup for waiting
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
#if OMP_50_ENABLED
@ -368,14 +367,8 @@ final_spin=FALSE)
// If we are oversubscribed, or have waited a bit (and
// KMP_LIBRARY=throughput), then yield
// TODO: Should it be number of cores instead of thread contexts? Like:
// KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
// Need performance improvement data to make the change...
if (oversubscribed) {
KMP_YIELD(1);
} else {
KMP_YIELD_SPIN(spins);
}
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
// Check if this thread was transferred from a team
// to the thread pool (or vice-versa) while spinning.
in_pool = !!TCR_4(this_thr->th.th_in_pool);

View File

@ -437,7 +437,7 @@ void __kmp_terminate_thread(int gtid) {
__kmp_msg_null);
}
#endif
__kmp_yield(TRUE);
KMP_YIELD(TRUE);
} //
/* Set thread stack info according to values returned by pthread_getattr_np().
@ -580,8 +580,6 @@ static void *__kmp_launch_monitor(void *thr) {
sigset_t new_set;
#endif /* KMP_BLOCK_SIGNALS */
struct timespec interval;
int yield_count;
int yield_cycles = 0;
KMP_MB(); /* Flush all pending memory write invalidates. */
@ -665,13 +663,6 @@ static void *__kmp_launch_monitor(void *thr) {
KA_TRACE(10, ("__kmp_launch_monitor: #2 monitor\n"));
if (__kmp_yield_cycle) {
__kmp_yielding_on = 0; /* Start out with yielding shut off */
yield_count = __kmp_yield_off_count;
} else {
__kmp_yielding_on = 1; /* Yielding is on permanently */
}
while (!TCR_4(__kmp_global.g.g_done)) {
struct timespec now;
struct timeval tval;
@ -707,22 +698,6 @@ static void *__kmp_launch_monitor(void *thr) {
status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
if (__kmp_yield_cycle) {
yield_cycles++;
if ((yield_cycles % yield_count) == 0) {
if (__kmp_yielding_on) {
__kmp_yielding_on = 0; /* Turn it off now */
yield_count = __kmp_yield_off_count;
} else {
__kmp_yielding_on = 1; /* Turn it on now */
yield_count = __kmp_yield_on_count;
}
yield_cycles = 0;
}
} else {
__kmp_yielding_on = 1;
}
TCW_4(__kmp_global.g.g_time.dt.t_value,
TCR_4(__kmp_global.g.g_time.dt.t_value) + 1);
@ -1011,8 +986,8 @@ retry:
// Wait for the monitor thread is really started and set its *priority*.
KMP_DEBUG_ASSERT(sizeof(kmp_uint32) ==
sizeof(__kmp_global.g.g_time.dt.t_value));
__kmp_wait_yield_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value,
-1, &__kmp_neq_4, NULL);
__kmp_wait_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value, -1,
&__kmp_neq_4, NULL);
#endif // KMP_REAL_TIME_FIX
#ifdef KMP_THREAD_ATTR
@ -1688,18 +1663,7 @@ void __kmp_resume_monitor() {
}
#endif // KMP_USE_MONITOR
void __kmp_yield(int cond) {
if (!cond)
return;
#if KMP_USE_MONITOR
if (!__kmp_yielding_on)
return;
#else
if (__kmp_yield_cycle && !KMP_YIELD_NOW())
return;
#endif
sched_yield();
}
void __kmp_yield() { sched_yield(); }
void __kmp_gtid_set_specific(int gtid) {
if (__kmp_init_gtid) {

View File

@ -483,10 +483,7 @@ void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
__kmp_resume_template(target_gtid, flag);
}
void __kmp_yield(int cond) {
if (cond)
Sleep(0);
}
void __kmp_yield() { Sleep(0); }
void __kmp_gtid_set_specific(int gtid) {
if (__kmp_init_gtid) {
@ -1245,8 +1242,8 @@ static void __kmp_reap_common(kmp_info_t *th) {
Right solution seems to be waiting for *either* thread termination *or*
ds_alive resetting. */
{
// TODO: This code is very similar to KMP_WAIT_YIELD. Need to generalize
// KMP_WAIT_YIELD to cover this usage also.
// TODO: This code is very similar to KMP_WAIT. Need to generalize
// KMP_WAIT to cover this usage also.
void *obj = NULL;
kmp_uint32 spins;
#if USE_ITT_BUILD
@ -1258,8 +1255,7 @@ static void __kmp_reap_common(kmp_info_t *th) {
KMP_FSYNC_SPIN_PREPARE(obj);
#endif /* USE_ITT_BUILD */
__kmp_is_thread_alive(th, &exit_val);
KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
KMP_YIELD_SPIN(spins);
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
} while (exit_val == STILL_ACTIVE && TCR_4(th->th.th_info.ds.ds_alive));
#if USE_ITT_BUILD
if (exit_val == STILL_ACTIVE) {