FreeBSD: convert teardown inactive lock to a read-mostly sleepable lock

The lock is taken all the time and as a regular read-write lock
avoidably serves as a mount point-wide contention point.

This forward ports FreeBSD revision r357322.

To quote aforementioned commit:

Sample result doing an incremental -j 40 build:
before: 173.30s user 458.97s system 2595% cpu 24.358 total
after:  168.58s user 254.92s system 2211% cpu 19.147 total

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Ryan Moeller <freqlabs@FreeBSD.org>
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Closes #10896
This commit is contained in:
Mateusz Guzik 2020-09-09 19:15:52 +02:00 committed by GitHub
parent c2c7ca0d6d
commit 8e7fe49b25
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 82 additions and 19 deletions

View File

@ -27,18 +27,31 @@
#ifndef _SYS_FS_ZFS_VFSOPS_H
#define _SYS_FS_ZFS_VFSOPS_H
#if __FreeBSD_version >= 1300109
#define TEARDOWN_INACTIVE_RMS
#endif
#include <sys/dataset_kstats.h>
#include <sys/list.h>
#include <sys/vfs.h>
#include <sys/zil.h>
#include <sys/sa.h>
#include <sys/rrwlock.h>
#ifdef TEARDOWN_INACTIVE_RMS
#include <sys/rmlock.h>
#endif
#include <sys/zfs_ioctl.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifdef TEARDOWN_INACTIVE_RMS
typedef struct rmslock zfs_teardown_lock_t;
#else
#define zfs_teardown_lock_t krwlock_t
#endif
typedef struct zfsvfs zfsvfs_t;
struct znode;
@ -67,7 +80,7 @@ struct zfsvfs {
boolean_t z_atime; /* enable atimes mount option */
boolean_t z_unmounted; /* unmounted */
rrmlock_t z_teardown_lock;
krwlock_t z_teardown_inactive_lock;
zfs_teardown_lock_t z_teardown_inactive_lock;
list_t z_all_znodes; /* all vnodes in the fs */
uint64_t z_nr_znodes; /* number of znodes in the fs */
kmutex_t z_znodes_lock; /* lock for z_all_znodes */
@ -98,6 +111,56 @@ struct zfsvfs {
struct task z_unlinked_drain_task;
};
#ifdef TEARDOWN_INACTIVE_RMS
#define ZFS_INIT_TEARDOWN_INACTIVE(zfsvfs) \
rms_init(&(zfsvfs)->z_teardown_inactive_lock, "zfs teardown inactive")
#define ZFS_DESTROY_TEARDOWN_INACTIVE(zfsvfs) \
rms_destroy(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_TRYRLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rms_try_rlock(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rms_rlock(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rms_runlock(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_WLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rms_wlock(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rms_wunlock(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs) \
rms_wowned(&(zfsvfs)->z_teardown_inactive_lock)
#else
#define ZFS_INIT_TEARDOWN_INACTIVE(zfsvfs) \
rw_init(&(zfsvfs)->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL)
#define ZFS_DESTROY_TEARDOWN_INACTIVE(zfsvfs) \
rw_destroy(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_TRYRLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rw_tryenter(&(zfsvfs)->z_teardown_inactive_lock, RW_READER)
#define ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rw_enter(&(zfsvfs)->z_teardown_inactive_lock, RW_READER)
#define ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rw_exit(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_WLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rw_enter(&(zfsvfs)->z_teardown_inactive_lock, RW_WRITER)
#define ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \
rw_exit(&(zfsvfs)->z_teardown_inactive_lock)
#define ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs) \
RW_WRITE_HELD(&(zfsvfs)->z_teardown_inactive_lock)
#endif
#define ZSB_XATTR 0x0001 /* Enable user xattrs */
/*
* Normal filesystems (those not under .zfs/snapshot) have a total

View File

@ -975,7 +975,7 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
#else
rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
#endif
rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
ZFS_INIT_TEARDOWN_INACTIVE(zfsvfs);
rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
@ -1126,7 +1126,7 @@ zfsvfs_free(zfsvfs_t *zfsvfs)
ASSERT(zfsvfs->z_nr_znodes == 0);
list_destroy(&zfsvfs->z_all_znodes);
rrm_destroy(&zfsvfs->z_teardown_lock);
rw_destroy(&zfsvfs->z_teardown_inactive_lock);
ZFS_DESTROY_TEARDOWN_INACTIVE(zfsvfs);
rw_destroy(&zfsvfs->z_fuid_lock);
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_destroy(&zfsvfs->z_hold_mtx[i]);
@ -1545,7 +1545,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
zfsvfs->z_log = NULL;
}
rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
ZFS_WLOCK_TEARDOWN_INACTIVE(zfsvfs);
/*
* If we are not unmounting (ie: online recv) and someone already
@ -1553,7 +1553,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
* or a reopen of z_os failed then just bail out now.
*/
if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
rw_exit(&zfsvfs->z_teardown_inactive_lock);
ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
return (SET_ERROR(EIO));
}
@ -1581,7 +1581,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
*/
if (unmounting) {
zfsvfs->z_unmounted = B_TRUE;
rw_exit(&zfsvfs->z_teardown_inactive_lock);
ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
}
@ -1901,7 +1901,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
znode_t *zp;
ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
ASSERT(ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs));
/*
* We already own this, so just update the objset_t, as the one we
@ -1939,7 +1939,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
bail:
/* release the VOPs */
rw_exit(&zfsvfs->z_teardown_inactive_lock);
ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
if (err) {
@ -2056,7 +2056,7 @@ int
zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
{
ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
ASSERT(ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs));
/*
* We already own this, so just hold and rele it to update the
@ -2072,7 +2072,7 @@ zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
zfsvfs->z_os = os;
/* release the VOPs */
rw_exit(&zfsvfs->z_teardown_inactive_lock);
ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
/*

View File

@ -4638,13 +4638,13 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
int error;
rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs);
if (zp->z_sa_hdl == NULL) {
/*
* The fs has been unmounted, or we did a
* suspend/resume and this file no longer exists.
*/
rw_exit(&zfsvfs->z_teardown_inactive_lock);
ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
vrecycle(vp);
return;
}
@ -4653,7 +4653,7 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
/*
* Fast path to recycle a vnode of a removed file.
*/
rw_exit(&zfsvfs->z_teardown_inactive_lock);
ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
vrecycle(vp);
return;
}
@ -4673,7 +4673,7 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
dmu_tx_commit(tx);
}
}
rw_exit(&zfsvfs->z_teardown_inactive_lock);
ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
}
@ -5823,10 +5823,10 @@ zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap)
if (vn_need_pageq_flush(vp))
return (1);
if (!rw_tryenter(&zfsvfs->z_teardown_inactive_lock, RW_READER))
if (!ZFS_TRYRLOCK_TEARDOWN_INACTIVE(zfsvfs))
return (1);
need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty);
rw_exit(&zfsvfs->z_teardown_inactive_lock);
ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
return (need);
}
@ -5857,12 +5857,12 @@ zfs_freebsd_reclaim(struct vop_reclaim_args *ap)
* zfs_znode_dmu_fini in zfsvfs_teardown during
* force unmount.
*/
rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs);
if (zp->z_sa_hdl == NULL)
zfs_znode_free(zp);
else
zfs_zinactive(zp);
rw_exit(&zfsvfs->z_teardown_inactive_lock);
ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
vp->v_data = NULL;
return (0);

View File

@ -384,7 +384,7 @@ zfs_znode_dmu_fini(znode_t *zp)
{
ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) ||
zp->z_unlinked ||
RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock));
ZFS_TEARDOWN_INACTIVE_WLOCKED(zp->z_zfsvfs));
sa_handle_destroy(zp->z_sa_hdl);
zp->z_sa_hdl = NULL;