Add zfs_sb_prune_aliases() function

For kernels which do not implement a per-suberblock shrinker,
those older than Linux 3.1, the shrink_dcache_parent() function
was used to attempt to reclaim dentries.  This was found not be
entirely reliable and could lead to performance issues on older
kernels running meta-data heavy workloads.

To address this issue a zfs_sb_prune_aliases() function has been
added to implement this functionality.  It relies on traversing
the list of znodes for a filesystem and adding them to a private
list with a reference held.  The private list can then be safely
walked outside the z_znodes_lock to prune dentires and drop the
last reference so the inode can be freed.

This provides the same synchronous behavior as the per-filesystem
shrinker and has the advantage of depending on only long standing
interfaces.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tim Chase <tim@chase2k.com>
Closes #3501
This commit is contained in:
Brian Behlendorf 2015-06-18 09:21:19 -07:00
parent 4c6a700910
commit 218b4e0a76
3 changed files with 84 additions and 11 deletions

View File

@ -0,0 +1,19 @@
dnl #
dnl # 2.6.12 API change
dnl # d_prune_aliases() helper function available.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_D_PRUNE_ALIASES],
[AC_MSG_CHECKING([whether d_prune_aliases() is available])
ZFS_LINUX_TRY_COMPILE_SYMBOL([
#include <linux/dcache.h>
], [
struct inode *ip = NULL;
d_prune_aliases(ip);
], [d_prune_aliases], [fs/dcache.c], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_D_PRUNE_ALIASES, 1,
[d_prune_aliases() is available])
], [
AC_MSG_RESULT(no)
])
])

View File

@ -78,6 +78,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_INSERT_INODE_LOCKED
ZFS_AC_KERNEL_D_MAKE_ROOT
ZFS_AC_KERNEL_D_OBTAIN_ALIAS
ZFS_AC_KERNEL_D_PRUNE_ALIASES
ZFS_AC_KERNEL_D_SET_D_OP
ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA
ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS

View File

@ -1072,6 +1072,67 @@ zfs_root(zfs_sb_t *zsb, struct inode **ipp)
}
EXPORT_SYMBOL(zfs_root);
#if !defined(HAVE_SPLIT_SHRINKER_CALLBACK) && !defined(HAVE_SHRINK) && \
defined(HAVE_D_PRUNE_ALIASES)
/*
* Linux kernels older than 3.1 do not support a per-filesystem shrinker.
* To accommodate this we must improvise and manually walk the list of znodes
* attempting to prune dentries in order to be able to drop the inodes.
*
* To avoid scanning the same znodes multiple times they are always rotated
* to the end of the z_all_znodes list. New znodes are inserted at the
* end of the list so we're always scanning the oldest znodes first.
*/
static int
zfs_sb_prune_aliases(zfs_sb_t *zsb, unsigned long nr_to_scan)
{
znode_t **zp_array, *zp;
int max_array = MIN(nr_to_scan, PAGE_SIZE * 8 / sizeof (znode_t *));
int objects = 0;
int i = 0, j = 0;
zp_array = kmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
mutex_enter(&zsb->z_znodes_lock);
while ((zp = list_head(&zsb->z_all_znodes)) != NULL) {
if ((i++ > nr_to_scan) || (j >= max_array))
break;
ASSERT(list_link_active(&zp->z_link_node));
list_remove(&zsb->z_all_znodes, zp);
list_insert_tail(&zsb->z_all_znodes, zp);
/* Skip active znodes and .zfs entries */
if (MUTEX_HELD(&zp->z_lock) || zp->z_is_ctldir)
continue;
if (igrab(ZTOI(zp)) == NULL)
continue;
zp_array[j] = zp;
j++;
}
mutex_exit(&zsb->z_znodes_lock);
for (i = 0; i < j; i++) {
zp = zp_array[i];
ASSERT3P(zp, !=, NULL);
d_prune_aliases(ZTOI(zp));
if (atomic_read(&ZTOI(zp)->i_count) == 1)
objects++;
iput(ZTOI(zp));
}
kmem_free(zp_array, max_array * sizeof (znode_t *));
return (objects);
}
#endif /* HAVE_D_PRUNE_ALIASES */
/*
* The ARC has requested that the filesystem drop entries from the dentry
* and inode caches. This can occur when the ARC needs to free meta data
@ -1106,18 +1167,10 @@ zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
*objects = (*shrinker->scan_objects)(shrinker, &sc);
#elif defined(HAVE_SHRINK)
*objects = (*shrinker->shrink)(shrinker, &sc);
#elif defined(HAVE_D_PRUNE_ALIASES)
*objects = zfs_sb_prune_aliases(zsb, nr_to_scan);
#else
/*
* Linux kernels older than 3.1 do not support a per-filesystem
* shrinker. Therefore, we must fall back to the only available
* interface which is to discard all unused dentries and inodes.
* This behavior clearly isn't ideal but it's required so the ARC
* may free memory. The performance impact is mitigated by the
* fact that the frequently accessed dentry and inode buffers will
* still be in the ARC making them relatively cheap to recreate.
*/
*objects = 0;
shrink_dcache_parent(sb->s_root);
#error "No available dentry and inode cache pruning mechanism."
#endif
ZFS_EXIT(zsb);