deadlock between spa_errlog_lock and dp_config_rwlock

There is a lock order inversion deadlock between `spa_errlog_lock` and
`dp_config_rwlock`:

A thread in `spa_delete_dataset_errlog()` is running from a sync task.
It is holding the `dp_config_rwlock` for writer (see
`dsl_sync_task_sync()`), and waiting for the `spa_errlog_lock`.

A thread in `dsl_pool_config_enter()` is holding the `spa_errlog_lock`
(see `spa_get_errlog_size()`) and waiting for the `dp_config_rwlock` (as
reader).

Note that this was introduced by #12812.

This commit address this by defining the lock ordering to be
dp_config_rwlock first, then spa_errlog_lock / spa_errlist_lock.
spa_get_errlog() and spa_get_errlog_size() can acquire the locks in this
order, and then process_error_block() and get_head_and_birth_txg() can
verify that the dp_config_rwlock is already held.

Additionally, a buffer overrun in `spa_get_errlog()` is corrected.  Many
code paths didn't check if `*count` got to zero, instead continuing to
overwrite past the beginning of the userspace buffer at `uaddr`.

Tested by having some errors in the pool (via `zinject -t data
/path/to/file`), one thread running `zpool iostat 0.001`, and another
thread runs `zfs destroy` (in a loop, although it hits the first time).
This reproduces the problem easily without the fix, and works with the
fix.

Reviewed-by: Mark Maybee <mark.maybee@delphix.com>
Reviewed-by: George Amanakis <gamanakis@gmail.com>
Reviewed-by: George Wilson <gwilson@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Matthew Ahrens <mahrens@delphix.com>
Closes #14239
Closes #14289
This commit is contained in:
Matthew Ahrens 2022-12-22 11:48:49 -08:00 committed by GitHub
parent 29e1b089c1
commit 018f26041d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 138 additions and 230 deletions

View File

@ -8599,37 +8599,17 @@ status_callback(zpool_handle_t *zhp, void *data)
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
&nerr) == 0) {
nvlist_t *nverrlist = NULL;
/*
* If the approximate error count is small, get a
* precise count by fetching the entire log and
* uniquifying the results.
*/
if (nerr > 0 && nerr < 100 && !cbp->cb_verbose &&
zpool_get_errlog(zhp, &nverrlist) == 0) {
nvpair_t *elem;
elem = NULL;
nerr = 0;
while ((elem = nvlist_next_nvpair(nverrlist,
elem)) != NULL) {
nerr++;
}
}
nvlist_free(nverrlist);
(void) printf("\n");
if (nerr == 0)
(void) printf(gettext("errors: No known data "
"errors\n"));
else if (!cbp->cb_verbose)
if (nerr == 0) {
(void) printf(gettext(
"errors: No known data errors\n"));
} else if (!cbp->cb_verbose) {
(void) printf(gettext("errors: %llu data "
"errors, use '-v' for a list\n"),
(u_longlong_t)nerr);
else
} else {
print_error_log(zhp);
}
}
if (cbp->cb_dedup_stats)

View File

@ -6313,7 +6313,7 @@ ztest_scrub_impl(spa_t *spa)
while (dsl_scan_scrubbing(spa_get_dsl(spa)))
txg_wait_synced(spa_get_dsl(spa), 0);
if (spa_get_errlog_size(spa) > 0)
if (spa_approx_errlog_size(spa) > 0)
return (ECKSUM);
ztest_pool_scrubbed = B_TRUE;

View File

@ -1146,7 +1146,7 @@ extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
extern uint64_t spa_get_errlog_size(spa_t *spa);
extern uint64_t spa_approx_errlog_size(spa_t *spa);
extern int spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count);
extern void spa_errlog_rotate(spa_t *spa);
extern void spa_errlog_drain(spa_t *spa);

View File

@ -4133,33 +4133,28 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
{
zfs_cmd_t zc = {"\0"};
libzfs_handle_t *hdl = zhp->zpool_hdl;
uint64_t count;
zbookmark_phys_t *zb = NULL;
int i;
zbookmark_phys_t *buf;
uint64_t buflen = 10000; /* approx. 1MB of RAM */
if (fnvlist_lookup_uint64(zhp->zpool_config,
ZPOOL_CONFIG_ERRCOUNT) == 0)
return (0);
/*
* Retrieve the raw error list from the kernel. If the number of errors
* has increased, allocate more space and continue until we get the
* entire list.
* Retrieve the raw error list from the kernel. If it doesn't fit,
* allocate a larger buffer and retry.
*/
count = fnvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT);
if (count == 0)
return (0);
zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
count * sizeof (zbookmark_phys_t));
zc.zc_nvlist_dst_size = count;
(void) strcpy(zc.zc_name, zhp->zpool_name);
for (;;) {
buf = zfs_alloc(zhp->zpool_hdl,
buflen * sizeof (zbookmark_phys_t));
zc.zc_nvlist_dst = (uintptr_t)buf;
zc.zc_nvlist_dst_size = buflen;
if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_ERROR_LOG,
&zc) != 0) {
free((void *)(uintptr_t)zc.zc_nvlist_dst);
free(buf);
if (errno == ENOMEM) {
void *dst;
count = zc.zc_nvlist_dst_size;
dst = zfs_alloc(zhp->zpool_hdl, count *
sizeof (zbookmark_phys_t));
zc.zc_nvlist_dst = (uintptr_t)dst;
buflen *= 2;
} else {
return (zpool_standard_error_fmt(hdl, errno,
dgettext(TEXT_DOMAIN, "errors: List of "
@ -4177,18 +4172,17 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
* _not_ copied as part of the process. So we point the start of our
* array appropriate and decrement the total number of elements.
*/
zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) +
zc.zc_nvlist_dst_size;
count -= zc.zc_nvlist_dst_size;
zbookmark_phys_t *zb = buf + zc.zc_nvlist_dst_size;
uint64_t zblen = buflen - zc.zc_nvlist_dst_size;
qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
qsort(zb, zblen, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
/*
* Fill in the nverrlistp with nvlist's of dataset and object numbers.
*/
for (i = 0; i < count; i++) {
for (uint64_t i = 0; i < zblen; i++) {
nvlist_t *nv;
/* ignoring zb_blkid and zb_level for now */
@ -4215,11 +4209,11 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
nvlist_free(nv);
}
free((void *)(uintptr_t)zc.zc_nvlist_dst);
free(buf);
return (0);
nomem:
free((void *)(uintptr_t)zc.zc_nvlist_dst);
free(buf);
return (no_memory(zhp->zpool_hdl));
}

View File

@ -222,7 +222,6 @@ check_status(nvlist_t *config, boolean_t isimport,
{
pool_scan_stat_t *ps = NULL;
uint_t vsc, psc;
uint64_t nerr;
uint64_t suspended;
uint64_t hostid = 0;
uint64_t errata = 0;
@ -392,6 +391,7 @@ check_status(nvlist_t *config, boolean_t isimport,
* Persistent data errors.
*/
if (!isimport) {
uint64_t nerr;
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
&nerr) == 0 && nerr != 0)
return (ZPOOL_STATUS_CORRUPT_DATA);

View File

@ -944,13 +944,13 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
if (dsl_scan_restarting(scn, tx))
spa_history_log_internal(spa, "scan aborted, restarting", tx,
"errors=%llu", (u_longlong_t)spa_get_errlog_size(spa));
"errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa));
else if (!complete)
spa_history_log_internal(spa, "scan cancelled", tx,
"errors=%llu", (u_longlong_t)spa_get_errlog_size(spa));
"errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa));
else
spa_history_log_internal(spa, "scan done", tx,
"errors=%llu", (u_longlong_t)spa_get_errlog_size(spa));
"errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa));
if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
spa->spa_scrub_active = B_FALSE;
@ -1013,7 +1013,7 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
vdev_clear_resilver_deferred(spa->spa_root_vdev, tx)) {
spa_history_log_internal(spa,
"starting deferred resilver", tx, "errors=%llu",
(u_longlong_t)spa_get_errlog_size(spa));
(u_longlong_t)spa_approx_errlog_size(spa));
spa_async_request(spa, SPA_ASYNC_RESILVER);
}

View File

@ -5543,7 +5543,7 @@ spa_get_stats(const char *name, nvlist_t **config,
fnvlist_add_uint64(*config,
ZPOOL_CONFIG_ERRCOUNT,
spa_get_errlog_size(spa));
spa_approx_errlog_size(spa));
if (spa_suspended(spa)) {
fnvlist_add_uint64(*config,

View File

@ -160,10 +160,8 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
dsl_dataset_t *ds;
objset_t *os;
dsl_pool_config_enter(dp, FTAG);
int error = dsl_dataset_hold_obj(dp, ds_obj, FTAG, &ds);
if (error != 0) {
dsl_pool_config_exit(dp, FTAG);
return (error);
}
ASSERT(head_dataset_id);
@ -172,7 +170,6 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
error = dmu_objset_from_ds(ds, &os);
if (error != 0) {
dsl_dataset_rele(ds, FTAG);
dsl_pool_config_exit(dp, FTAG);
return (error);
}
@ -189,7 +186,6 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
ZFS_KEYSTATUS_UNAVAILABLE) {
zep->zb_birth = 0;
dsl_dataset_rele(ds, FTAG);
dsl_pool_config_exit(dp, FTAG);
return (0);
}
@ -199,7 +195,6 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
error = dnode_hold(os, zep->zb_object, FTAG, &dn);
if (error != 0) {
dsl_dataset_rele(ds, FTAG);
dsl_pool_config_exit(dp, FTAG);
return (error);
}
@ -225,7 +220,6 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
dsl_dataset_rele(ds, FTAG);
dsl_pool_config_exit(dp, FTAG);
return (error);
}
@ -303,17 +297,31 @@ find_birth_txg(dsl_dataset_t *ds, zbookmark_err_phys_t *zep,
}
/*
* This function serves a double role. If only_count is true, it returns
* (in *count) how many times an error block belonging to this filesystem is
* referenced by snapshots or clones. If only_count is false, each time the
* error block is referenced by a snapshot or clone, it fills the userspace
* array at uaddr with the bookmarks of the error blocks. The array is filled
* from the back and *count is modified to be the number of unused entries at
* the beginning of the array.
* Copy the bookmark to the end of the user-space buffer which starts at
* uaddr and has *count unused entries, and decrement *count by 1.
*/
static int
copyout_entry(const zbookmark_phys_t *zb, void *uaddr, uint64_t *count)
{
if (*count == 0)
return (SET_ERROR(ENOMEM));
*count -= 1;
if (copyout(zb, (char *)uaddr + (*count) * sizeof (zbookmark_phys_t),
sizeof (zbookmark_phys_t)) != 0)
return (SET_ERROR(EFAULT));
return (0);
}
/*
* Each time the error block is referenced by a snapshot or clone, add a
* zbookmark_phys_t entry to the userspace array at uaddr. The array is
* filled from the back and the in-out parameter *count is modified to be the
* number of unused entries at the beginning of the array.
*/
static int
check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
uint64_t *count, void *uaddr, boolean_t only_count)
void *uaddr, uint64_t *count)
{
dsl_dataset_t *ds;
dsl_pool_t *dp = spa->spa_dsl_pool;
@ -343,18 +351,12 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
}
if (zep->zb_birth == latest_txg) {
/* Block neither free nor rewritten. */
if (!only_count) {
zbookmark_phys_t zb;
zep_to_zb(head_ds, zep, &zb);
if (copyout(&zb, (char *)uaddr + (*count - 1)
* sizeof (zbookmark_phys_t),
sizeof (zbookmark_phys_t)) != 0) {
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EFAULT));
}
(*count)--;
} else {
(*count)++;
zbookmark_phys_t zb;
zep_to_zb(head_ds, zep, &zb);
error = copyout_entry(&zb, uaddr, count);
if (error != 0) {
dsl_dataset_rele(ds, FTAG);
return (error);
}
check_snapshot = B_FALSE;
} else {
@ -407,19 +409,12 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
snap_obj_array[aff_snap_count] = snap_obj;
aff_snap_count++;
if (!only_count) {
zbookmark_phys_t zb;
zep_to_zb(snap_obj, zep, &zb);
if (copyout(&zb, (char *)uaddr + (*count - 1) *
sizeof (zbookmark_phys_t),
sizeof (zbookmark_phys_t)) != 0) {
dsl_dataset_rele(ds, FTAG);
error = SET_ERROR(EFAULT);
goto out;
}
(*count)--;
} else {
(*count)++;
zbookmark_phys_t zb;
zep_to_zb(snap_obj, zep, &zb);
error = copyout_entry(&zb, uaddr, count);
if (error != 0) {
dsl_dataset_rele(ds, FTAG);
goto out;
}
/*
@ -433,8 +428,7 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
error = check_filesystem(spa,
za.za_first_integer, zep,
count, uaddr, only_count);
za.za_first_integer, zep, uaddr, count);
if (error != 0) {
zap_cursor_fini(&zc);
@ -477,11 +471,8 @@ find_top_affected_fs(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
static int
process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
uint64_t *count, void *uaddr, boolean_t only_count)
void *uaddr, uint64_t *count)
{
dsl_pool_t *dp = spa->spa_dsl_pool;
uint64_t top_affected_fs;
/*
* If the zb_birth is 0 it means we failed to retrieve the birth txg
* of the block pointer. This happens when an encrypted filesystem is
@ -489,95 +480,24 @@ process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
* check_filesystem(), instead do the accounting here.
*/
if (zep->zb_birth == 0) {
if (!only_count) {
zbookmark_phys_t zb;
zep_to_zb(head_ds, zep, &zb);
if (copyout(&zb, (char *)uaddr + (*count - 1)
* sizeof (zbookmark_phys_t),
sizeof (zbookmark_phys_t)) != 0) {
return (SET_ERROR(EFAULT));
}
(*count)--;
} else {
(*count)++;
zbookmark_phys_t zb;
zep_to_zb(head_ds, zep, &zb);
int error = copyout_entry(&zb, uaddr, count);
if (error != 0) {
return (error);
}
return (0);
}
dsl_pool_config_enter(dp, FTAG);
uint64_t top_affected_fs;
int error = find_top_affected_fs(spa, head_ds, zep, &top_affected_fs);
if (error == 0)
error = check_filesystem(spa, top_affected_fs, zep, count,
uaddr, only_count);
if (error == 0) {
error = check_filesystem(spa, top_affected_fs, zep,
uaddr, count);
}
dsl_pool_config_exit(dp, FTAG);
return (error);
}
static uint64_t
get_errlog_size(spa_t *spa, uint64_t spa_err_obj)
{
if (spa_err_obj == 0)
return (0);
uint64_t total = 0;
zap_cursor_t zc;
zap_attribute_t za;
for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj);
zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) {
zap_cursor_t head_ds_cursor;
zap_attribute_t head_ds_attr;
zbookmark_err_phys_t head_ds_block;
uint64_t head_ds;
name_to_object(za.za_name, &head_ds);
for (zap_cursor_init(&head_ds_cursor, spa->spa_meta_objset,
za.za_first_integer); zap_cursor_retrieve(&head_ds_cursor,
&head_ds_attr) == 0; zap_cursor_advance(&head_ds_cursor)) {
name_to_errphys(head_ds_attr.za_name, &head_ds_block);
(void) process_error_block(spa, head_ds, &head_ds_block,
&total, NULL, B_TRUE);
}
zap_cursor_fini(&head_ds_cursor);
}
zap_cursor_fini(&zc);
return (total);
}
static uint64_t
get_errlist_size(spa_t *spa, avl_tree_t *tree)
{
if (avl_numnodes(tree) == 0)
return (0);
uint64_t total = 0;
spa_error_entry_t *se;
for (se = avl_first(tree); se != NULL; se = AVL_NEXT(tree, se)) {
zbookmark_err_phys_t zep;
zep.zb_object = se->se_bookmark.zb_object;
zep.zb_level = se->se_bookmark.zb_level;
zep.zb_blkid = se->se_bookmark.zb_blkid;
zep.zb_birth = 0;
/*
* If we cannot find out the head dataset and birth txg of
* the present error block, we opt not to error out. In the
* next pool sync this information will be retrieved by
* sync_error_list() and written to the on-disk error log.
*/
uint64_t head_ds_obj;
int error = get_head_and_birth_txg(spa, &zep,
se->se_bookmark.zb_objset, &head_ds_obj);
if (!error)
(void) process_error_block(spa, head_ds_obj, &zep,
&total, NULL, B_TRUE);
}
return (total);
}
#endif
/*
@ -677,13 +597,33 @@ spa_remove_error(spa_t *spa, zbookmark_phys_t *zb)
spa_add_healed_error(spa, spa->spa_errlog_scrub, zb);
}
static uint64_t
approx_errlog_size_impl(spa_t *spa, uint64_t spa_err_obj)
{
if (spa_err_obj == 0)
return (0);
uint64_t total = 0;
zap_cursor_t zc;
zap_attribute_t za;
for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj);
zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) {
uint64_t count;
if (zap_count(spa->spa_meta_objset, za.za_first_integer,
&count) == 0)
total += count;
}
zap_cursor_fini(&zc);
return (total);
}
/*
* Return the number of errors currently in the error log. This is actually the
* sum of both the last log and the current log, since we don't know the union
* of these logs until we reach userland.
* Return the approximate number of errors currently in the error log. This
* will be nonzero if there are some errors, but otherwise it may be more
* or less than the number of entries returned by spa_get_errlog().
*/
uint64_t
spa_get_errlog_size(spa_t *spa)
spa_approx_errlog_size(spa_t *spa)
{
uint64_t total = 0;
@ -701,23 +641,16 @@ spa_get_errlog_size(spa_t *spa)
total += count;
mutex_exit(&spa->spa_errlog_lock);
mutex_enter(&spa->spa_errlist_lock);
total += avl_numnodes(&spa->spa_errlist_last);
total += avl_numnodes(&spa->spa_errlist_scrub);
mutex_exit(&spa->spa_errlist_lock);
} else {
#ifdef _KERNEL
mutex_enter(&spa->spa_errlog_lock);
total += get_errlog_size(spa, spa->spa_errlog_last);
total += get_errlog_size(spa, spa->spa_errlog_scrub);
total += approx_errlog_size_impl(spa, spa->spa_errlog_last);
total += approx_errlog_size_impl(spa, spa->spa_errlog_scrub);
mutex_exit(&spa->spa_errlog_lock);
mutex_enter(&spa->spa_errlist_lock);
total += get_errlist_size(spa, &spa->spa_errlist_last);
total += get_errlist_size(spa, &spa->spa_errlist_scrub);
mutex_exit(&spa->spa_errlist_lock);
#endif
}
mutex_enter(&spa->spa_errlist_lock);
total += avl_numnodes(&spa->spa_errlist_last);
total += avl_numnodes(&spa->spa_errlist_scrub);
mutex_exit(&spa->spa_errlist_lock);
return (total);
}
@ -860,8 +793,7 @@ spa_upgrade_errlog(spa_t *spa, dmu_tx_t *tx)
#ifdef _KERNEL
/*
* If an error block is shared by two datasets it will be counted twice. For
* detailed message see spa_get_errlog_size() above.
* If an error block is shared by two datasets it will be counted twice.
*/
static int
process_error_log(spa_t *spa, uint64_t obj, void *uaddr, uint64_t *count)
@ -884,14 +816,11 @@ process_error_log(spa_t *spa, uint64_t obj, void *uaddr, uint64_t *count)
zbookmark_phys_t zb;
name_to_bookmark(za.za_name, &zb);
if (copyout(&zb, (char *)uaddr +
(*count - 1) * sizeof (zbookmark_phys_t),
sizeof (zbookmark_phys_t)) != 0) {
int error = copyout_entry(&zb, uaddr, count);
if (error != 0) {
zap_cursor_fini(&zc);
return (SET_ERROR(EFAULT));
return (error);
}
*count -= 1;
}
zap_cursor_fini(&zc);
return (0);
@ -914,7 +843,7 @@ process_error_log(spa_t *spa, uint64_t obj, void *uaddr, uint64_t *count)
zbookmark_err_phys_t head_ds_block;
name_to_errphys(head_ds_attr.za_name, &head_ds_block);
int error = process_error_block(spa, head_ds,
&head_ds_block, count, uaddr, B_FALSE);
&head_ds_block, uaddr, count);
if (error != 0) {
zap_cursor_fini(&head_ds_cursor);
@ -936,16 +865,11 @@ process_error_list(spa_t *spa, avl_tree_t *list, void *uaddr, uint64_t *count)
if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
for (se = avl_first(list); se != NULL;
se = AVL_NEXT(list, se)) {
if (*count == 0)
return (SET_ERROR(ENOMEM));
if (copyout(&se->se_bookmark, (char *)uaddr +
(*count - 1) * sizeof (zbookmark_phys_t),
sizeof (zbookmark_phys_t)) != 0)
return (SET_ERROR(EFAULT));
*count -= 1;
int error =
copyout_entry(&se->se_bookmark, uaddr, count);
if (error != 0) {
return (error);
}
}
return (0);
}
@ -963,7 +887,7 @@ process_error_list(spa_t *spa, avl_tree_t *list, void *uaddr, uint64_t *count)
if (!error)
error = process_error_block(spa, head_ds_obj, &zep,
count, uaddr, B_FALSE);
uaddr, count);
if (error)
return (error);
}
@ -988,6 +912,12 @@ spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count)
int ret = 0;
#ifdef _KERNEL
/*
* The pool config lock is needed to hold a dataset_t via (among other
* places) process_error_list() -> get_head_and_birth_txg(), and lock
* ordering requires that we get it before the spa_errlog_lock.
*/
dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);
mutex_enter(&spa->spa_errlog_lock);
ret = process_error_log(spa, spa->spa_errlog_scrub, uaddr, count);
@ -1006,6 +936,7 @@ spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count)
mutex_exit(&spa->spa_errlist_lock);
mutex_exit(&spa->spa_errlog_lock);
dsl_pool_config_exit(spa->spa_dsl_pool, FTAG);
#else
(void) spa, (void) uaddr, (void) count;
#endif
@ -1174,6 +1105,13 @@ spa_errlog_sync(spa_t *spa, uint64_t txg)
spa->spa_scrub_finished = B_FALSE;
mutex_exit(&spa->spa_errlist_lock);
/*
* The pool config lock is needed to hold a dataset_t via
* sync_error_list() -> get_head_and_birth_txg(), and lock ordering
* requires that we get it before the spa_errlog_lock.
*/
dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);
mutex_enter(&spa->spa_errlog_lock);
tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
@ -1218,6 +1156,7 @@ spa_errlog_sync(spa_t *spa, uint64_t txg)
dmu_tx_commit(tx);
mutex_exit(&spa->spa_errlog_lock);
dsl_pool_config_exit(spa->spa_dsl_pool, FTAG);
}
static void
@ -1354,7 +1293,7 @@ spa_swap_errlog(spa_t *spa, uint64_t new_head_ds, uint64_t old_head_ds,
#if defined(_KERNEL)
/* error handling */
EXPORT_SYMBOL(spa_log_error);
EXPORT_SYMBOL(spa_get_errlog_size);
EXPORT_SYMBOL(spa_approx_errlog_size);
EXPORT_SYMBOL(spa_get_errlog);
EXPORT_SYMBOL(spa_errlog_rotate);
EXPORT_SYMBOL(spa_errlog_drain);

View File

@ -5695,17 +5695,12 @@ zfs_ioc_error_log(zfs_cmd_t *zc)
{
spa_t *spa;
int error;
uint64_t count = zc->zc_nvlist_dst_size;
if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
return (error);
error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
&count);
if (error == 0)
zc->zc_nvlist_dst_size = count;
else
zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
&zc->zc_nvlist_dst_size);
spa_close(spa, FTAG);