spa: document spa_thread() and SDC feature gates

spa_thread() and the "System Duty Cycle" scheduling class are from
Illumos and have not yet been adapted to Linux or FreeBSD.

HAVE_SPA_THREAD has long been explicitly undefined and used to mark
spa_thread(), but there's some related taskq code that can never be
invoked without it, which makes some already-tricky code harder to read.

HAVE_SYSDC is introduced in this commit to mark the SDC parts. SDC
requires spa_thread(), but the inverse is not true, so they are
separate.

I don't want to make the call to just remove it because I still harbour
hopes that OpenZFS could become a first-class citizen on Illumos
someday. But hopefully this will at least make the reason it exists a
bit clearer for people without long memories and/or an interest in
history.

For those that are interested in the history, the original FreeBSD port
of ZFS (before ZFS-on-Linux was adopted there) did have a spa_thread(),
but not SDC. The last version of that before it was removed can be read
here:

  22df1ffd81/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c

Meanwhile, more information on the SDC scheduling class is here:

  https://github.com/illumos/illumos-gate/blob/master/usr/src/uts/common/disp/sysdc.c

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by:  Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Closes #15406
This commit is contained in:
Rob N 2023-10-24 02:50:55 +11:00 committed by GitHub
parent 797f55ef12
commit b5e6091885
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 35 additions and 6 deletions

View File

@ -100,6 +100,26 @@
#include "zfs_prop.h"
#include "zfs_comutil.h"
/*
* spa_thread() existed on Illumos as a parent thread for the various worker
* threads that actually run the pool, as a way to both reference the entire
* pool work as a single object, and to share properties like scheduling
* options. It has not yet been adapted to Linux or FreeBSD. This define is
* used to mark related parts of the code to make things easier for the reader,
* and to compile this code out. It can be removed when someone implements it,
* moves it to some Illumos-specific place, or removes it entirely.
*/
#undef HAVE_SPA_THREAD
/*
* The "System Duty Cycle" scheduling class is an Illumos feature to help
* prevent CPU-intensive kernel threads from affecting latency on interactive
* threads. It doesn't exist on Linux or FreeBSD, so the supporting code is
* gated behind a define. On Illumos SDC depends on spa_thread(), but
* spa_thread() also has other uses, so this is a separate define.
*/
#undef HAVE_SYSDC
/*
* The interval, in seconds, at which failed configuration cache file writes
* should be retried.
@ -176,10 +196,15 @@ static uint_t metaslab_preload_pct = 50;
static uint_t zio_taskq_batch_pct = 80; /* 1 thread per cpu in pset */
static uint_t zio_taskq_batch_tpq; /* threads per taskq */
#ifdef HAVE_SYSDC
static const boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */
static const uint_t zio_taskq_basedc = 80; /* base duty cycle */
#endif
#ifdef HAVE_SPA_THREAD
static const boolean_t spa_create_process = B_TRUE; /* no process => no sysdc */
#endif
/*
* Report any spa_load_verify errors found, but do not fail spa_load.
@ -1029,7 +1054,9 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
uint_t count = ztip->zti_count;
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
uint_t cpus, flags = TASKQ_DYNAMIC;
#ifdef HAVE_SYSDC
boolean_t batch = B_FALSE;
#endif
switch (mode) {
case ZTI_MODE_FIXED:
@ -1037,7 +1064,9 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
break;
case ZTI_MODE_BATCH:
#ifdef HAVE_SYSDC
batch = B_TRUE;
#endif
flags |= TASKQ_THREADS_CPU_PCT;
value = MIN(zio_taskq_batch_pct, 100);
break;
@ -1106,6 +1135,7 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
(void) snprintf(name, sizeof (name), "%s_%s",
zio_type_name[t], zio_taskq_types[q]);
#ifdef HAVE_SYSDC
if (zio_taskq_sysdc && spa->spa_proc != &p0) {
if (batch)
flags |= TASKQ_DC_BATCH;
@ -1114,6 +1144,7 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
tq = taskq_create_sysdc(name, value, 50, INT_MAX,
spa->spa_proc, zio_taskq_basedc, flags);
} else {
#endif
pri_t pri = maxclsyspri;
/*
* The write issue taskq can be extremely CPU
@ -1139,7 +1170,9 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
}
tq = taskq_create_proc(name, value, pri, 50,
INT_MAX, spa->spa_proc, flags);
#ifdef HAVE_SYSDC
}
#endif
tqs->stqs_taskq[i] = tq;
}
@ -1224,11 +1257,6 @@ spa_create_zio_taskqs(spa_t *spa)
}
}
/*
* Disabled until spa_thread() can be adapted for Linux.
*/
#undef HAVE_SPA_THREAD
#if defined(_KERNEL) && defined(HAVE_SPA_THREAD)
static void
spa_thread(void *arg)
@ -1269,9 +1297,11 @@ spa_thread(void *arg)
pool_unlock();
}
#ifdef HAVE_SYSDC
if (zio_taskq_sysdc) {
sysdc_thread_enter(curthread, 100, 0);
}
#endif
spa->spa_proc = curproc;
spa->spa_did = curthread->t_did;
@ -1327,7 +1357,6 @@ spa_activate(spa_t *spa, spa_mode_t mode)
ASSERT(spa->spa_proc == &p0);
spa->spa_did = 0;
(void) spa_create_process;
#ifdef HAVE_SPA_THREAD
/* Only create a process if we're going to be around a while. */
if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) {