ddt: add support for prefetching tables into the ARC

This change adds a new `zpool prefetch -t ddt $pool` command which
causes a pool's DDT to be loaded into the ARC. The primary goal is to
remove the need to "warm" a pool's cache before deduplication stops
slowing write performance. It may also provide a way to reload portions
of a DDT if they have been flushed due to inactivity.

Sponsored-by: iXsystems, Inc.
Sponsored-by: Catalogics, Inc.
Sponsored-by: Klara, Inc.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Will Andrews <will.andrews@klarasystems.com>
Signed-off-by: Fred Weigel <fred.weigel@klarasystems.com>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Signed-off-by: Don Brady <don.brady@klarasystems.com>
Co-authored-by: Will Andrews <will.andrews@klarasystems.com>
Co-authored-by: Don Brady <don.brady@klarasystems.com>
Closes #15890
This commit is contained in:
Allan Jude 2024-07-26 12:16:18 -04:00 committed by GitHub
parent 2ed1aebaf6
commit 62e7d3c89e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
37 changed files with 1067 additions and 52 deletions

View File

@ -1985,8 +1985,8 @@ dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
name,
(u_longlong_t)count,
(u_longlong_t)(dspace / count),
(u_longlong_t)(mspace / count));
(u_longlong_t)dspace,
(u_longlong_t)mspace);
if (dump_opt['D'] < 3)
return;

View File

@ -32,7 +32,7 @@
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
* Copyright (c) 2021, Klara Inc.
* Copyright (c) 2021, 2023, Klara Inc.
* Copyright [2021] Hewlett Packard Enterprise Development LP
*/
@ -90,6 +90,7 @@ static int zpool_do_remove(int, char **);
static int zpool_do_labelclear(int, char **);
static int zpool_do_checkpoint(int, char **);
static int zpool_do_prefetch(int, char **);
static int zpool_do_list(int, char **);
static int zpool_do_iostat(int, char **);
@ -176,6 +177,7 @@ typedef enum {
HELP_LIST,
HELP_OFFLINE,
HELP_ONLINE,
HELP_PREFETCH,
HELP_REPLACE,
HELP_REMOVE,
HELP_INITIALIZE,
@ -307,6 +309,7 @@ static zpool_command_t command_table[] = {
{ "labelclear", zpool_do_labelclear, HELP_LABELCLEAR },
{ NULL },
{ "checkpoint", zpool_do_checkpoint, HELP_CHECKPOINT },
{ "prefetch", zpool_do_prefetch, HELP_PREFETCH },
{ NULL },
{ "list", zpool_do_list, HELP_LIST },
{ "iostat", zpool_do_iostat, HELP_IOSTAT },
@ -398,6 +401,9 @@ get_usage(zpool_help_t idx)
return (gettext("\tlist [-gHLpPv] [-o property[,...]] "
"[-T d|u] [pool] ... \n"
"\t [interval [count]]\n"));
case HELP_PREFETCH:
return (gettext("\tprefetch -t <type> [<type opts>] <pool>\n"
"\t -t ddt <pool>\n"));
case HELP_OFFLINE:
return (gettext("\toffline [--power]|[[-f][-t]] <pool> "
"<device> ...\n"));
@ -3827,6 +3833,72 @@ zpool_do_checkpoint(int argc, char **argv)
#define CHECKPOINT_OPT 1024
/*
* zpool prefetch <type> [<type opts>] <pool>
*
* Prefetchs a particular type of data in the specified pool.
*/
int
zpool_do_prefetch(int argc, char **argv)
{
int c;
char *poolname;
char *typestr = NULL;
zpool_prefetch_type_t type;
zpool_handle_t *zhp;
int err = 0;
while ((c = getopt(argc, argv, "t:")) != -1) {
switch (c) {
case 't':
typestr = optarg;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
usage(B_FALSE);
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}
argc -= optind;
argv += optind;
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name argument\n"));
usage(B_FALSE);
}
if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}
poolname = argv[0];
argc--;
argv++;
if (strcmp(typestr, "ddt") == 0) {
type = ZPOOL_PREFETCH_DDT;
} else {
(void) fprintf(stderr, gettext("unsupported prefetch type\n"));
usage(B_FALSE);
}
if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
return (1);
err = zpool_prefetch(zhp, type);
zpool_close(zhp);
return (err);
}
/*
* zpool import [-d dir] [-D]
* import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l]
@ -6446,6 +6518,7 @@ print_one_column(zpool_prop_t prop, uint64_t value, const char *str,
case ZPOOL_PROP_EXPANDSZ:
case ZPOOL_PROP_CHECKPOINT:
case ZPOOL_PROP_DEDUPRATIO:
case ZPOOL_PROP_DEDUPCACHED:
if (value == 0)
(void) strlcpy(propval, "-", sizeof (propval));
else
@ -8792,13 +8865,17 @@ print_l2cache(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t **l2cache,
}
static void
print_dedup_stats(nvlist_t *config)
print_dedup_stats(zpool_handle_t *zhp, nvlist_t *config, boolean_t literal)
{
ddt_histogram_t *ddh;
ddt_stat_t *dds;
ddt_object_t *ddo;
uint_t c;
char dspace[6], mspace[6];
/* Extra space provided for literal display */
char dspace[32], mspace[32], cspace[32];
uint64_t cspace_prop;
enum zfs_nicenum_format format;
zprop_source_t src;
/*
* If the pool was faulted then we may not have been able to
@ -8816,12 +8893,26 @@ print_dedup_stats(nvlist_t *config)
return;
}
zfs_nicebytes(ddo->ddo_dspace, dspace, sizeof (dspace));
zfs_nicebytes(ddo->ddo_mspace, mspace, sizeof (mspace));
(void) printf("DDT entries %llu, size %s on disk, %s in core\n",
/*
* Squash cached size into in-core size to handle race.
* Only include cached size if it is available.
*/
cspace_prop = zpool_get_prop_int(zhp, ZPOOL_PROP_DEDUPCACHED, &src);
cspace_prop = MIN(cspace_prop, ddo->ddo_mspace);
format = literal ? ZFS_NICENUM_RAW : ZFS_NICENUM_1024;
zfs_nicenum_format(cspace_prop, cspace, sizeof (cspace), format);
zfs_nicenum_format(ddo->ddo_dspace, dspace, sizeof (dspace), format);
zfs_nicenum_format(ddo->ddo_mspace, mspace, sizeof (mspace), format);
(void) printf("DDT entries %llu, size %s on disk, %s in core",
(u_longlong_t)ddo->ddo_count,
dspace,
mspace);
if (src != ZPROP_SRC_DEFAULT) {
(void) printf(", %s cached (%.02f%%)",
cspace,
(double)cspace_prop / (double)ddo->ddo_mspace * 100.0);
}
(void) printf("\n");
verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_STATS,
(uint64_t **)&dds, &c) == 0);
@ -8857,6 +8948,10 @@ status_callback(zpool_handle_t *zhp, void *data)
uint_t c;
vdev_stat_t *vs;
/* If dedup stats were requested, also fetch dedupcached. */
if (cbp->cb_dedup_stats > 1)
zpool_add_propname(zhp, ZPOOL_DEDUPCACHED_PROP_NAME);
config = zpool_get_config(zhp, NULL);
reason = zpool_get_status(zhp, &msgid, &errata);
@ -9338,7 +9433,7 @@ status_callback(zpool_handle_t *zhp, void *data)
}
if (cbp->cb_dedup_stats)
print_dedup_stats(config);
print_dedup_stats(zhp, config, cbp->cb_literal);
} else {
(void) printf(gettext("config: The configuration cannot be "
"determined.\n"));
@ -9412,7 +9507,8 @@ zpool_do_status(int argc, char **argv)
cmd = optarg;
break;
case 'D':
cb.cb_dedup_stats = B_TRUE;
if (++cb.cb_dedup_stats > 2)
cb.cb_dedup_stats = 2;
break;
case 'e':
cb.cb_print_unhealthy = B_TRUE;

View File

@ -26,6 +26,7 @@
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2023, Klara, Inc.
*/
/*
@ -444,6 +445,7 @@ ztest_func_t ztest_blake3;
ztest_func_t ztest_fletcher;
ztest_func_t ztest_fletcher_incr;
ztest_func_t ztest_verify_dnode_bt;
ztest_func_t ztest_pool_prefetch_ddt;
static uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */
static uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */
@ -499,6 +501,7 @@ static ztest_info_t ztest_info[] = {
ZTI_INIT(ztest_fletcher, 1, &zopt_rarely),
ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
ZTI_INIT(ztest_pool_prefetch_ddt, 1, &zopt_rarely),
};
#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
@ -6993,6 +6996,21 @@ ztest_fletcher_incr(ztest_ds_t *zd, uint64_t id)
}
}
void
ztest_pool_prefetch_ddt(ztest_ds_t *zd, uint64_t id)
{
(void) zd, (void) id;
spa_t *spa;
(void) pthread_rwlock_rdlock(&ztest_name_lock);
VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
ddt_prefetch_all(spa);
spa_close(spa, FTAG);
(void) pthread_rwlock_unlock(&ztest_name_lock);
}
static int
ztest_set_global_vars(void)
{

View File

@ -111,6 +111,7 @@ usr/share/man/man8/zpool-labelclear.8
usr/share/man/man8/zpool-list.8
usr/share/man/man8/zpool-offline.8
usr/share/man/man8/zpool-online.8
usr/share/man/man8/zpool-prefetch.8
usr/share/man/man8/zpool-reguid.8
usr/share/man/man8/zpool-remove.8
usr/share/man/man8/zpool-reopen.8

View File

@ -458,6 +458,7 @@ _LIBZFS_H nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **);
_LIBZFS_H nvlist_t *zpool_get_features(zpool_handle_t *);
_LIBZFS_H int zpool_refresh_stats(zpool_handle_t *, boolean_t *);
_LIBZFS_H int zpool_get_errlog(zpool_handle_t *, nvlist_t **);
_LIBZFS_H void zpool_add_propname(zpool_handle_t *, const char *);
/*
* Import and export functions
@ -504,6 +505,8 @@ _LIBZFS_H int zpool_checkpoint(zpool_handle_t *);
_LIBZFS_H int zpool_discard_checkpoint(zpool_handle_t *);
_LIBZFS_H boolean_t zpool_is_draid_spare(const char *);
_LIBZFS_H int zpool_prefetch(zpool_handle_t *, zpool_prefetch_type_t);
/*
* Basic handle manipulations. These functions do not create or destroy the
* underlying datasets, only the references to them.

View File

@ -148,6 +148,9 @@ _LIBZFS_CORE_H int lzc_pool_checkpoint_discard(const char *);
_LIBZFS_CORE_H int lzc_wait(const char *, zpool_wait_activity_t, boolean_t *);
_LIBZFS_CORE_H int lzc_wait_tag(const char *, zpool_wait_activity_t, uint64_t,
boolean_t *);
_LIBZFS_CORE_H int lzc_pool_prefetch(const char *, zpool_prefetch_type_t);
_LIBZFS_CORE_H int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *);
_LIBZFS_CORE_H int lzc_set_bootenv(const char *, const nvlist_t *);

View File

@ -250,6 +250,16 @@ typedef struct arc_buf_info {
enum zio_compress abi_l2arc_compress;
} arc_buf_info_t;
/*
* Flags returned by arc_cached; describes which part of the arc
* the block is cached in.
*/
#define ARC_CACHED_EMBEDDED (1U << 0)
#define ARC_CACHED_IN_L1 (1U << 1)
#define ARC_CACHED_IN_MRU (1U << 2)
#define ARC_CACHED_IN_MFU (1U << 3)
#define ARC_CACHED_IN_L2 (1U << 4)
void arc_space_consume(uint64_t space, arc_space_type_t type);
void arc_space_return(uint64_t space, arc_space_type_t type);
boolean_t arc_is_metadata(arc_buf_t *buf);
@ -310,6 +320,7 @@ zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *priv);
void arc_remove_prune_callback(arc_prune_t *p);
void arc_freed(spa_t *spa, const blkptr_t *bp);
int arc_cached(spa_t *spa, const blkptr_t *bp);
void arc_flush(spa_t *spa, boolean_t retry);
void arc_tempreserve_clear(uint64_t reserve);

View File

@ -236,6 +236,7 @@ extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);
extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
extern int ddt_get_pool_dedup_cached(spa_t *spa, uint64_t *psize);
extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
extern void ddt_enter(ddt_t *ddt);
@ -243,8 +244,9 @@ extern void ddt_exit(ddt_t *ddt);
extern void ddt_init(void);
extern void ddt_fini(void);
extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
extern void ddt_prefetch_all(spa_t *spa);
extern boolean_t ddt_class_contains(spa_t *spa, ddt_class_t max_class,
const blkptr_t *bp);

View File

@ -47,6 +47,7 @@ typedef struct {
const ddt_key_t *ddk);
void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
const ddt_key_t *ddk);
void (*ddt_op_prefetch_all)(objset_t *os, uint64_t object);
int (*ddt_op_update)(objset_t *os, uint64_t object,
const ddt_key_t *ddk, const ddt_phys_t *phys, size_t psize,
dmu_tx_t *tx);

View File

@ -505,6 +505,12 @@ void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
dmu_tx_t *tx);
/*
* Get an estimated cache size for an object. Caller must expect races.
*/
int dmu_object_cached_size(objset_t *os, uint64_t object,
uint64_t *l1sz, uint64_t *l2sz);
void dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
void *data, uint8_t etype, uint8_t comp, int uncompressed_size,
int compressed_size, int byteorder, dmu_tx_t *tx);
@ -903,6 +909,8 @@ void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
void dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
uint64_t len, enum zio_priority pri);
void dmu_prefetch_dnode(objset_t *os, uint64_t object, enum zio_priority pri);
int dmu_prefetch_wait(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size);
typedef struct dmu_object_info {
/* All sizes are in bytes unless otherwise indicated. */

View File

@ -260,6 +260,7 @@ typedef enum {
ZPOOL_PROP_BCLONERATIO,
ZPOOL_PROP_DEDUP_TABLE_SIZE,
ZPOOL_PROP_DEDUP_TABLE_QUOTA,
ZPOOL_PROP_DEDUPCACHED,
ZPOOL_NUM_PROPS
} zpool_prop_t;
@ -1517,6 +1518,7 @@ typedef enum zfs_ioc {
ZFS_IOC_VDEV_GET_PROPS, /* 0x5a55 */
ZFS_IOC_VDEV_SET_PROPS, /* 0x5a56 */
ZFS_IOC_POOL_SCRUB, /* 0x5a57 */
ZFS_IOC_POOL_PREFETCH, /* 0x5a58 */
/*
* Per-platform (Optional) - 8/128 numbers reserved.
@ -1648,6 +1650,11 @@ typedef enum {
ZFS_WAIT_NUM_ACTIVITIES
} zfs_wait_activity_t;
typedef enum {
ZPOOL_PREFETCH_NONE = 0,
ZPOOL_PREFETCH_DDT
} zpool_prefetch_type_t;
/*
* Bookmark name values.
*/
@ -1686,6 +1693,17 @@ typedef enum {
*/
#define ZPOOL_HIDDEN_ARGS "hidden_args"
/*
* The following is used when invoking ZFS_IOC_POOL_GET_PROPS.
*/
#define ZPOOL_GET_PROPS_NAMES "get_props_names"
/*
* Opt-in property names used with ZPOOL_GET_PROPS_NAMES.
* For example, properties that are hidden or expensive to compute.
*/
#define ZPOOL_DEDUPCACHED_PROP_NAME "dedupcached"
/*
* The following are names used when invoking ZFS_IOC_POOL_INITIALIZE.
*/
@ -1725,6 +1743,11 @@ typedef enum {
#define ZFS_WAIT_ACTIVITY "wait_activity"
#define ZFS_WAIT_WAITED "wait_waited"
/*
* The following are names used when invoking ZFS_IOC_POOL_PREFETCH.
*/
#define ZPOOL_PREFETCH_TYPE "prefetch_type"
/*
* Flags for ZFS_IOC_VDEV_SET_STATE
*/

View File

@ -1198,6 +1198,8 @@ extern void spa_boot_init(void);
/* properties */
extern int spa_prop_set(spa_t *spa, nvlist_t *nvp);
extern int spa_prop_get(spa_t *spa, nvlist_t **nvp);
extern int spa_prop_get_nvlist(spa_t *spa, char **props,
unsigned int n_props, nvlist_t **outnvl);
extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
extern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t);

View File

@ -225,6 +225,7 @@ int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);
int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name);
int zap_prefetch_object(objset_t *os, uint64_t zapobj);
int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints);

View File

@ -457,6 +457,7 @@
<elf-symbol name='zfs_wait_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_zpl_version_map' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_add' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_add_propname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_checkpoint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_clear' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_clear_label' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -520,6 +521,7 @@
<elf-symbol name='zpool_open' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_open_canfail' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_pool_state_to_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_prefetch' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_prepare_and_label_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_prepare_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_print_unsup_feat' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -1642,6 +1644,9 @@
<class-decl name='uu_avl' is-struct='yes' visibility='default' is-declaration-only='yes' id='4af029d1'/>
<class-decl name='uu_avl_pool' is-struct='yes' visibility='default' is-declaration-only='yes' id='12a530a8'/>
<class-decl name='uu_avl_walk' is-struct='yes' visibility='default' is-declaration-only='yes' id='e70a39e3'/>
<array-type-def dimensions='1' type-id='80f4b756' size-in-bits='256' id='71dc54ac'>
<subrange length='4' type-id='7359adad' id='16fe7105'/>
</array-type-def>
<type-decl name='int' size-in-bits='32' id='95e97e5e'/>
<type-decl name='long int' size-in-bits='64' id='bd54fe1a'/>
<type-decl name='long long int' size-in-bits='64' id='1eb56b1e'/>
@ -2096,7 +2101,7 @@
<var-decl name='zfs_props_table' type-id='ae3e8ca6' visibility='default'/>
</data-member>
</class-decl>
<class-decl name='zpool_handle' size-in-bits='2560' is-struct='yes' visibility='default' id='67002a8a'>
<class-decl name='zpool_handle' size-in-bits='2816' is-struct='yes' visibility='default' id='67002a8a'>
<data-member access='public' layout-offset-in-bits='0'>
<var-decl name='zpool_hdl' type-id='b0382bb3' visibility='default'/>
</data-member>
@ -2109,19 +2114,25 @@
<data-member access='public' layout-offset-in-bits='2176'>
<var-decl name='zpool_state' type-id='95e97e5e' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='2208'>
<var-decl name='zpool_n_propnames' type-id='f0981eeb' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='2240'>
<var-decl name='zpool_config_size' type-id='b59d7dce' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='2304'>
<var-decl name='zpool_config' type-id='5ce45b60' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='2368'>
<var-decl name='zpool_old_config' type-id='5ce45b60' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='2432'>
<var-decl name='zpool_props' type-id='5ce45b60' visibility='default'/>
<var-decl name='zpool_propnames' type-id='71dc54ac' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='2496'>
<var-decl name='zpool_config_size' type-id='b59d7dce' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='2560'>
<var-decl name='zpool_config' type-id='5ce45b60' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='2624'>
<var-decl name='zpool_old_config' type-id='5ce45b60' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='2688'>
<var-decl name='zpool_props' type-id='5ce45b60' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='2752'>
<var-decl name='zpool_start_block' type-id='804dc465' visibility='default'/>
</data-member>
</class-decl>
@ -2923,7 +2934,8 @@
<enumerator name='ZPOOL_PROP_BCLONERATIO' value='35'/>
<enumerator name='ZPOOL_PROP_DEDUP_TABLE_SIZE' value='36'/>
<enumerator name='ZPOOL_PROP_DEDUP_TABLE_QUOTA' value='37'/>
<enumerator name='ZPOOL_NUM_PROPS' value='38'/>
<enumerator name='ZPOOL_PROP_DEDUPCACHED' value='38'/>
<enumerator name='ZPOOL_NUM_PROPS' value='39'/>
</enum-decl>
<typedef-decl name='zpool_prop_t' type-id='af1ba157' id='5d0c23fb'/>
<typedef-decl name='regoff_t' type-id='95e97e5e' id='54a2a2a8'/>
@ -5896,6 +5908,7 @@
<enumerator name='ZFS_IOC_VDEV_GET_PROPS' value='23125'/>
<enumerator name='ZFS_IOC_VDEV_SET_PROPS' value='23126'/>
<enumerator name='ZFS_IOC_POOL_SCRUB' value='23127'/>
<enumerator name='ZFS_IOC_POOL_PREFETCH' value='23128'/>
<enumerator name='ZFS_IOC_PLATFORM' value='23168'/>
<enumerator name='ZFS_IOC_EVENTS_NEXT' value='23169'/>
<enumerator name='ZFS_IOC_EVENTS_CLEAR' value='23170'/>
@ -5924,6 +5937,12 @@
<enumerator name='ZPOOL_WAIT_NUM_ACTIVITIES' value='9'/>
</enum-decl>
<typedef-decl name='zpool_wait_activity_t' type-id='849338e3' id='73446457'/>
<enum-decl name='zpool_prefetch_type_t' naming-typedef-id='e55ff6bc' id='0299ab50'>
<underlying-type type-id='9cac1fee'/>
<enumerator name='ZPOOL_PREFETCH_NONE' value='0'/>
<enumerator name='ZPOOL_PREFETCH_DDT' value='1'/>
</enum-decl>
<typedef-decl name='zpool_prefetch_type_t' type-id='0299ab50' id='e55ff6bc'/>
<enum-decl name='spa_feature' id='33ecb627'>
<underlying-type type-id='9cac1fee'/>
<enumerator name='SPA_FEATURE_NONE' value='-1'/>
@ -5971,6 +5990,8 @@
<enumerator name='SPA_FEATURES' value='41'/>
</enum-decl>
<typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/>
<qualified-type-def type-id='80f4b756' const='yes' id='b99c00c9'/>
<pointer-type-def type-id='b99c00c9' size-in-bits='64' id='13956559'/>
<qualified-type-def type-id='22cce67b' const='yes' id='d2816df0'/>
<pointer-type-def type-id='d2816df0' size-in-bits='64' id='3bbfee2e'/>
<qualified-type-def type-id='b96825af' const='yes' id='2b61797f'/>
@ -6063,6 +6084,11 @@
<parameter type-id='37e3bd22'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='lzc_pool_prefetch' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='80f4b756'/>
<parameter type-id='e55ff6bc'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='lzc_set_bootenv' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='80f4b756'/>
<parameter type-id='22cce67b'/>
@ -6208,6 +6234,13 @@
<parameter type-id='9da381c4'/>
<return type-id='48b5725f'/>
</function-decl>
<function-decl name='fnvlist_add_string_array' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='5ce45b60'/>
<parameter type-id='80f4b756'/>
<parameter type-id='13956559'/>
<parameter type-id='3502e3ff'/>
<return type-id='48b5725f'/>
</function-decl>
<function-decl name='fnvlist_add_nvlist_array' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='5ce45b60'/>
<parameter type-id='80f4b756'/>
@ -6396,6 +6429,11 @@
<parameter type-id='4c81de99' name='zhp'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zpool_prefetch' mangled-name='zpool_prefetch' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prefetch'>
<parameter type-id='4c81de99' name='zhp'/>
<parameter type-id='e55ff6bc' name='type'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zpool_add' mangled-name='zpool_add' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_add'>
<parameter type-id='4c81de99' name='zhp'/>
<parameter type-id='5ce45b60' name='nvroot'/>
@ -6590,6 +6628,11 @@
<parameter type-id='95e97e5e' name='name_flags'/>
<return type-id='26a90f95'/>
</function-decl>
<function-decl name='zpool_add_propname' mangled-name='zpool_add_propname' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_add_propname'>
<parameter type-id='4c81de99' name='zhp'/>
<parameter type-id='80f4b756' name='propname'/>
<return type-id='48b5725f'/>
</function-decl>
<function-decl name='zpool_get_errlog' mangled-name='zpool_get_errlog' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_errlog'>
<parameter type-id='4c81de99' name='zhp'/>
<parameter type-id='857bb57e' name='nverrlistp'/>
@ -8689,7 +8732,6 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='lib/libzutil/zutil_device_path.c' language='LANG_C99'>
<pointer-type-def type-id='b99c00c9' size-in-bits='64' id='13956559'/>
<function-decl name='zpool_default_search_paths' mangled-name='zpool_default_search_paths' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_default_search_paths'>
<parameter type-id='78c01427'/>
<return type-id='13956559'/>
@ -9151,7 +9193,6 @@
<array-type-def dimensions='1' type-id='b99c00c9' size-in-bits='2624' id='5ce15418'>
<subrange length='41' type-id='7359adad' id='cb834f44'/>
</array-type-def>
<qualified-type-def type-id='80f4b756' const='yes' id='b99c00c9'/>
<pointer-type-def type-id='8f92235e' size-in-bits='64' id='90421557'/>
<function-decl name='nvpair_value_uint32' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='dace003f'/>

View File

@ -94,12 +94,15 @@ struct zfs_handle {
* snapshots of volumes.
*/
#define ZFS_IS_VOLUME(zhp) ((zhp)->zfs_head_type == ZFS_TYPE_VOLUME)
#define ZHP_MAX_PROPNAMES 4
struct zpool_handle {
libzfs_handle_t *zpool_hdl;
zpool_handle_t *zpool_next;
char zpool_name[ZFS_MAX_DATASET_NAME_LEN];
int zpool_state;
unsigned int zpool_n_propnames;
const char *zpool_propnames[ZHP_MAX_PROPNAMES];
size_t zpool_config_size;
nvlist_t *zpool_config;
nvlist_t *zpool_old_config;

View File

@ -79,6 +79,13 @@ zpool_get_all_props(zpool_handle_t *zhp)
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if (zhp->zpool_n_propnames > 0) {
nvlist_t *innvl = fnvlist_alloc();
fnvlist_add_string_array(innvl, ZPOOL_GET_PROPS_NAMES,
zhp->zpool_propnames, zhp->zpool_n_propnames);
zcmd_write_src_nvlist(hdl, &zc, innvl);
}
zcmd_alloc_dst_nvlist(hdl, &zc, 0);
while (zfs_ioctl(hdl, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
@ -318,6 +325,15 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
return (0);
}
/*
* ZPOOL_PROP_DEDUPCACHED can be fetched by name only using
* the ZPOOL_GET_PROPS_NAMES mechanism
*/
if (prop == ZPOOL_PROP_DEDUPCACHED) {
zpool_add_propname(zhp, ZPOOL_DEDUPCACHED_PROP_NAME);
(void) zpool_get_all_props(zhp);
}
if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
prop != ZPOOL_PROP_NAME)
return (-1);
@ -361,6 +377,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
case ZPOOL_PROP_BCLONESAVED:
case ZPOOL_PROP_BCLONEUSED:
case ZPOOL_PROP_DEDUP_TABLE_SIZE:
case ZPOOL_PROP_DEDUPCACHED:
if (literal)
(void) snprintf(buf, len, "%llu",
(u_longlong_t)intval);
@ -1738,6 +1755,28 @@ zpool_discard_checkpoint(zpool_handle_t *zhp)
return (0);
}
/*
* Load data type for the given pool.
*/
int
zpool_prefetch(zpool_handle_t *zhp, zpool_prefetch_type_t type)
{
libzfs_handle_t *hdl = zhp->zpool_hdl;
char msg[1024];
int error;
error = lzc_pool_prefetch(zhp->zpool_name, type);
if (error != 0) {
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
"cannot prefetch %s in '%s'"),
type == ZPOOL_PREFETCH_DDT ? "ddt" : "", zhp->zpool_name);
(void) zpool_standard_error(hdl, error, msg);
return (-1);
}
return (0);
}
/*
* Add the given vdevs to the pool. The caller must have already performed the
* necessary verification to ensure that the vdev specification is well-formed.
@ -4401,6 +4440,14 @@ zbookmark_mem_compare(const void *a, const void *b)
return (memcmp(a, b, sizeof (zbookmark_phys_t)));
}
void
zpool_add_propname(zpool_handle_t *zhp, const char *propname)
{
assert(zhp->zpool_n_propnames < ZHP_MAX_PROPNAMES);
zhp->zpool_propnames[zhp->zpool_n_propnames] = propname;
zhp->zpool_n_propnames++;
}
/*
* Retrieve the persistent error log, uniquify the members, and return to the
* caller.

View File

@ -176,6 +176,7 @@
<elf-symbol name='lzc_load_key' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_pool_checkpoint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_pool_checkpoint_discard' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_pool_prefetch' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_promote' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_receive' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_receive_one' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -1428,6 +1429,7 @@
<enumerator name='ZFS_IOC_VDEV_GET_PROPS' value='23125'/>
<enumerator name='ZFS_IOC_VDEV_SET_PROPS' value='23126'/>
<enumerator name='ZFS_IOC_POOL_SCRUB' value='23127'/>
<enumerator name='ZFS_IOC_POOL_PREFETCH' value='23128'/>
<enumerator name='ZFS_IOC_PLATFORM' value='23168'/>
<enumerator name='ZFS_IOC_EVENTS_NEXT' value='23169'/>
<enumerator name='ZFS_IOC_EVENTS_CLEAR' value='23170'/>
@ -1462,6 +1464,12 @@
<enumerator name='ZFS_WAIT_NUM_ACTIVITIES' value='1'/>
</enum-decl>
<typedef-decl name='zfs_wait_activity_t' type-id='527d5dc6' id='3024501a'/>
<enum-decl name='zpool_prefetch_type_t' naming-typedef-id='e55ff6bc' id='0299ab50'>
<underlying-type type-id='9cac1fee'/>
<enumerator name='ZPOOL_PREFETCH_NONE' value='0'/>
<enumerator name='ZPOOL_PREFETCH_DDT' value='1'/>
</enum-decl>
<typedef-decl name='zpool_prefetch_type_t' type-id='0299ab50' id='e55ff6bc'/>
<enum-decl name='data_type_t' naming-typedef-id='8d0687d2' id='aeeae136'>
<underlying-type type-id='9cac1fee'/>
<enumerator name='DATA_TYPE_DONTCARE' value='-1'/>
@ -2892,6 +2900,11 @@
<parameter type-id='80f4b756' name='pool'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='lzc_pool_prefetch' mangled-name='lzc_pool_prefetch' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_pool_prefetch'>
<parameter type-id='80f4b756' name='pool'/>
<parameter type-id='e55ff6bc' name='type'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='lzc_channel_program_nosync' mangled-name='lzc_channel_program_nosync' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_channel_program_nosync'>
<parameter type-id='80f4b756' name='pool'/>
<parameter type-id='80f4b756' name='program'/>

View File

@ -1629,6 +1629,26 @@ lzc_pool_checkpoint_discard(const char *pool)
return (error);
}
/*
* Load the requested data type for the specified pool.
*/
int
lzc_pool_prefetch(const char *pool, zpool_prefetch_type_t type)
{
int error;
nvlist_t *result = NULL;
nvlist_t *args = fnvlist_alloc();
fnvlist_add_int32(args, ZPOOL_PREFETCH_TYPE, type);
error = lzc_ioctl(ZFS_IOC_POOL_PREFETCH, pool, args, &result);
fnvlist_free(args);
fnvlist_free(result);
return (error);
}
/*
* Executes a read-only channel program.
*

View File

@ -83,6 +83,7 @@ dist_man_MANS = \
%D%/man8/zpool-list.8 \
%D%/man8/zpool-offline.8 \
%D%/man8/zpool-online.8 \
%D%/man8/zpool-prefetch.8 \
%D%/man8/zpool-reguid.8 \
%D%/man8/zpool-remove.8 \
%D%/man8/zpool-reopen.8 \

View File

@ -73,6 +73,10 @@ The amount of storage used by cloned blocks.
Percentage of pool space used.
This property can also be referred to by its shortened column name,
.Sy cap .
.It Sy dedupcached
Total size of the deduplication table currently loaded into the ARC.
See
.Xr zpool-prefetch 8 .
.It Sy dedup_table_size
Total on-disk size of the deduplication table.
.It Sy expandsize

46
man/man8/zpool-prefetch.8 Normal file
View File

@ -0,0 +1,46 @@
.\"
.\" CDDL HEADER START
.\"
.\" The contents of this file are subject to the terms of the
.\" Common Development and Distribution License (the "License").
.\" You may not use this file except in compliance with the License.
.\"
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
.\" or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions
.\" and limitations under the License.
.\"
.\" When distributing Covered Code, include this CDDL HEADER in each
.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
.\"
.\" CDDL HEADER END
.\"
.\"
.\" Copyright (c) 2023, Klara Inc.
.\"
.Dd February 14, 2024
.Dt ZPOOL-PREFETCH 8
.Os
.
.Sh NAME
.Nm zpool-prefetch
.Nd Loads specific types of data for the given pool
.Sh SYNOPSIS
.Nm zpool
.Cm prefetch
.Fl t Ar type
.Ar pool
.Sh DESCRIPTION
.Bl -tag -width Ds
.It Xo
.Nm zpool
.Cm prefetch
.Fl t Li ddt
.Ar pool
.Xc
Prefetch data of a specific type for the given pool; specifically the DDT,
which will improve write I/O performance when the DDT is resident in the ARC.
.El

View File

@ -26,7 +26,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
.Dd March 16, 2022
.Dd February 14, 2024
.Dt ZPOOL-STATUS 8
.Os
.
@ -75,6 +75,8 @@ Display a histogram of deduplication statistics, showing the allocated
and referenced
.Pq logically referenced in the pool
block counts and sizes by reference count.
If repeated, (-DD), also shows statistics on how much of the DDT is resident
in the ARC.
.It Fl e
Only show unhealthy vdevs (not-ONLINE or with errors).
.It Fl g

View File

@ -26,7 +26,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
.Dd March 16, 2022
.Dd February 14, 2024
.Dt ZPOOL 8
.Os
.
@ -168,6 +168,8 @@ specified.
.
.Ss Maintenance
.Bl -tag -width Ds
.It Xr zpool-prefetch 8
Prefetches specific types of pool data.
.It Xr zpool-scrub 8
Begins a scrub or resumes a paused scrub.
.It Xr zpool-checkpoint 8
@ -598,6 +600,7 @@ don't wait.
.Xr zpool-list 8 ,
.Xr zpool-offline 8 ,
.Xr zpool-online 8 ,
.Xr zpool-prefetch 8 ,
.Xr zpool-reguid 8 ,
.Xr zpool-remove 8 ,
.Xr zpool-reopen 8 ,

View File

@ -183,6 +183,9 @@ zpool_prop_init(void)
zprop_register_hidden(ZPOOL_PROP_DEDUPDITTO, "dedupditto",
PROP_TYPE_NUMBER, PROP_DEFAULT, ZFS_TYPE_POOL, "DEDUPDITTO",
B_FALSE, sfeatures);
zprop_register_hidden(ZPOOL_PROP_DEDUPCACHED,
ZPOOL_DEDUPCACHED_PROP_NAME, PROP_TYPE_NUMBER, PROP_READONLY,
ZFS_TYPE_POOL, "DEDUPCACHED", B_FALSE, sfeatures);
zfs_mod_list_supported_free(sfeatures);
}

View File

@ -26,7 +26,7 @@
* Copyright (c) 2017, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
* Copyright (c) 2020, George Amanakis. All rights reserved.
* Copyright (c) 2019, Klara Inc.
* Copyright (c) 2019, 2023, Klara Inc.
* Copyright (c) 2019, Allan Jude
* Copyright (c) 2020, The FreeBSD Foundation [1]
*
@ -5471,6 +5471,57 @@ arc_read_done(zio_t *zio)
}
}
/*
* Lookup the block at the specified DVA (in bp), and return the manner in
* which the block is cached. A zero return indicates not cached.
*/
int
arc_cached(spa_t *spa, const blkptr_t *bp)
{
arc_buf_hdr_t *hdr = NULL;
kmutex_t *hash_lock = NULL;
uint64_t guid = spa_load_guid(spa);
int flags = 0;
if (BP_IS_EMBEDDED(bp))
return (ARC_CACHED_EMBEDDED);
hdr = buf_hash_find(guid, bp, &hash_lock);
if (hdr == NULL)
return (0);
if (HDR_HAS_L1HDR(hdr)) {
arc_state_t *state = hdr->b_l1hdr.b_state;
/*
* We switch to ensure that any future arc_state_type_t
* changes are handled. This is just a shift to promote
* more compile-time checking.
*/
switch (state->arcs_state) {
case ARC_STATE_ANON:
break;
case ARC_STATE_MRU:
flags |= ARC_CACHED_IN_MRU | ARC_CACHED_IN_L1;
break;
case ARC_STATE_MFU:
flags |= ARC_CACHED_IN_MFU | ARC_CACHED_IN_L1;
break;
case ARC_STATE_UNCACHED:
/* The header is still in L1, probably not for long */
flags |= ARC_CACHED_IN_L1;
break;
default:
break;
}
}
if (HDR_HAS_L2HDR(hdr))
flags |= ARC_CACHED_IN_L2;
mutex_exit(hash_lock);
return (flags);
}
/*
* "Read" the block at the specified DVA (in bp) via the
* cache. If the block is found in the cache, invoke the provided

View File

@ -23,7 +23,7 @@
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2022 by Pawel Jakub Dawidek
* Copyright (c) 2023, Klara Inc.
* Copyright (c) 2019, 2023, Klara Inc.
*/
#include <sys/zfs_context.h>
@ -340,6 +340,16 @@ ddt_object_prefetch(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ddt->ddt_object[type][class], ddk);
}
static void
ddt_object_prefetch_all(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
{
if (!ddt_object_exists(ddt, type, class))
return;
ddt_ops[type]->ddt_op_prefetch_all(ddt->ddt_os,
ddt->ddt_object[type][class]);
}
static int
ddt_object_update(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ddt_entry_t *dde, dmu_tx_t *tx)
@ -652,6 +662,28 @@ ddt_over_quota(spa_t *spa)
return (B_FALSE);
}
void
ddt_prefetch_all(spa_t *spa)
{
/*
* Load all DDT entries for each type/class combination. This is
* indended to perform a prefetch on all such blocks. For the same
* reason that ddt_prefetch isn't locked, this is also not locked.
*/
for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_t *ddt = spa->spa_ddt[c];
if (!ddt)
continue;
for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
for (ddt_class_t class = 0; class < DDT_CLASSES;
class++) {
ddt_object_prefetch_all(ddt, type, class);
}
}
}
}
ddt_entry_t *
ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add)
{

View File

@ -248,3 +248,32 @@ ddt_get_pool_dedup_ratio(spa_t *spa)
return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize);
}
int
ddt_get_pool_dedup_cached(spa_t *spa, uint64_t *psize)
{
uint64_t l1sz, l1tot, l2sz, l2tot;
int err = 0;
l1tot = l2tot = 0;
*psize = 0;
for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_t *ddt = spa->spa_ddt[c];
if (ddt == NULL)
continue;
for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
for (ddt_class_t class = 0; class < DDT_CLASSES;
class++) {
err = dmu_object_cached_size(ddt->ddt_os,
ddt->ddt_object[type][class], &l1sz, &l2sz);
if (err != 0)
return (err);
l1tot += l1sz;
l2tot += l2sz;
}
}
}
*psize = l1tot + l2tot;
return (err);
}

View File

@ -147,6 +147,12 @@ ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk)
(void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS);
}
static void
ddt_zap_prefetch_all(objset_t *os, uint64_t object)
{
(void) zap_prefetch_object(os, object);
}
static int
ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,
const ddt_phys_t *phys, size_t psize, dmu_tx_t *tx)
@ -231,6 +237,7 @@ const ddt_ops_t ddt_zap_ops = {
ddt_zap_lookup,
ddt_zap_contains,
ddt_zap_prefetch,
ddt_zap_prefetch_all,
ddt_zap_update,
ddt_zap_remove,
ddt_zap_walk,

View File

@ -26,7 +26,7 @@
* Copyright (c) 2016, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
* Copyright (c) 2019 Datto Inc.
* Copyright (c) 2019, Klara Inc.
* Copyright (c) 2019, 2023, Klara Inc.
* Copyright (c) 2019, Allan Jude
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
* Copyright (c) 2021, 2022 by Pawel Jakub Dawidek
@ -701,7 +701,7 @@ dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, const void *tag)
* Issue prefetch I/Os for the given blocks. If level is greater than 0, the
* indirect blocks prefetched will be those that point to the blocks containing
* the data starting at offset, and continuing to offset + len. If the range
* it too long, prefetch the first dmu_prefetch_max bytes as requested, while
* is too long, prefetch the first dmu_prefetch_max bytes as requested, while
* for the rest only a higher level, also fitting within dmu_prefetch_max. It
* should primarily help random reads, since for long sequential reads there is
* a speculative prefetcher.
@ -777,6 +777,106 @@ dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
rw_exit(&dn->dn_struct_rwlock);
}
typedef struct {
kmutex_t dpa_lock;
kcondvar_t dpa_cv;
uint64_t dpa_pending_io;
} dmu_prefetch_arg_t;
static void
dmu_prefetch_done(void *arg, uint64_t level, uint64_t blkid, boolean_t issued)
{
(void) level; (void) blkid; (void)issued;
dmu_prefetch_arg_t *dpa = arg;
ASSERT0(level);
mutex_enter(&dpa->dpa_lock);
ASSERT3U(dpa->dpa_pending_io, >, 0);
if (--dpa->dpa_pending_io == 0)
cv_broadcast(&dpa->dpa_cv);
mutex_exit(&dpa->dpa_lock);
}
static void
dmu_prefetch_wait_by_dnode(dnode_t *dn, uint64_t offset, uint64_t len)
{
dmu_prefetch_arg_t dpa;
mutex_init(&dpa.dpa_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dpa.dpa_cv, NULL, CV_DEFAULT, NULL);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
uint64_t start = dbuf_whichblock(dn, 0, offset);
uint64_t end = dbuf_whichblock(dn, 0, offset + len - 1) + 1;
dpa.dpa_pending_io = end - start;
for (uint64_t blk = start; blk < end; blk++) {
(void) dbuf_prefetch_impl(dn, 0, blk, ZIO_PRIORITY_ASYNC_READ,
0, dmu_prefetch_done, &dpa);
}
rw_exit(&dn->dn_struct_rwlock);
/* wait for prefetch L0 reads to finish */
mutex_enter(&dpa.dpa_lock);
while (dpa.dpa_pending_io > 0) {
cv_wait(&dpa.dpa_cv, &dpa.dpa_lock);
}
mutex_exit(&dpa.dpa_lock);
mutex_destroy(&dpa.dpa_lock);
cv_destroy(&dpa.dpa_cv);
}
/*
* Issue prefetch I/Os for the given L0 block range and wait for the I/O
* to complete. This does not enforce dmu_prefetch_max and will prefetch
* the entire range. The blocks are read from disk into the ARC but no
* decompression occurs (i.e., the dbuf cache is not required).
*/
int
dmu_prefetch_wait(objset_t *os, uint64_t object, uint64_t offset, uint64_t size)
{
dnode_t *dn;
int err = 0;
err = dnode_hold(os, object, FTAG, &dn);
if (err != 0)
return (err);
/*
* Chunk the requests (16 indirects worth) so that we can be interrupted
*/
uint64_t chunksize;
if (dn->dn_indblkshift) {
uint64_t nbps = bp_span_in_blocks(dn->dn_indblkshift, 1);
chunksize = (nbps * 16) << dn->dn_datablkshift;
} else {
chunksize = dn->dn_datablksz;
}
while (size > 0) {
uint64_t mylen = MIN(size, chunksize);
dmu_prefetch_wait_by_dnode(dn, offset, mylen);
offset += mylen;
size -= mylen;
if (issig()) {
err = SET_ERROR(EINTR);
break;
}
}
dnode_rele(dn, FTAG);
return (err);
}
/*
* Issue prefetch I/Os for the given object's dnode.
*/
@ -1451,6 +1551,114 @@ dmu_write_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
}
#endif /* _KERNEL */
static void
dmu_cached_bps(spa_t *spa, blkptr_t *bps, uint_t nbps,
uint64_t *l1sz, uint64_t *l2sz)
{
int cached_flags;
if (bps == NULL)
return;
for (size_t blk_off = 0; blk_off < nbps; blk_off++) {
blkptr_t *bp = &bps[blk_off];
if (BP_IS_HOLE(bp))
continue;
cached_flags = arc_cached(spa, bp);
if (cached_flags == 0)
continue;
if ((cached_flags & (ARC_CACHED_IN_L1 | ARC_CACHED_IN_L2)) ==
ARC_CACHED_IN_L2)
*l2sz += BP_GET_LSIZE(bp);
else
*l1sz += BP_GET_LSIZE(bp);
}
}
/*
* Estimate DMU object cached size.
*/
int
dmu_object_cached_size(objset_t *os, uint64_t object,
uint64_t *l1sz, uint64_t *l2sz)
{
dnode_t *dn;
dmu_object_info_t doi;
int err = 0;
*l1sz = *l2sz = 0;
if (dnode_hold(os, object, FTAG, &dn) != 0)
return (0);
if (dn->dn_nlevels < 2) {
dnode_rele(dn, FTAG);
return (0);
}
dmu_object_info_from_dnode(dn, &doi);
for (uint64_t off = 0; off < doi.doi_max_offset;
off += dmu_prefetch_max) {
/* dbuf_read doesn't prefetch L1 blocks. */
dmu_prefetch_by_dnode(dn, 1, off,
dmu_prefetch_max, ZIO_PRIORITY_SYNC_READ);
}
/*
* Hold all valid L1 blocks, asking ARC the status of each BP
* contained in each such L1 block.
*/
uint_t nbps = bp_span_in_blocks(dn->dn_indblkshift, 1);
uint64_t l1blks = 1 + (dn->dn_maxblkid / nbps);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
for (uint64_t blk = 0; blk < l1blks; blk++) {
dmu_buf_impl_t *db = NULL;
if (issig()) {
/*
* On interrupt, get out, and bubble up EINTR
*/
err = EINTR;
break;
}
/*
* If we get an i/o error here, the L1 can't be read,
* and nothing under it could be cached, so we just
* continue. Ignoring the error from dbuf_hold_impl
* or from dbuf_read is then a reasonable choice.
*/
err = dbuf_hold_impl(dn, 1, blk, B_TRUE, B_FALSE, FTAG, &db);
if (err != 0) {
/*
* ignore error and continue
*/
err = 0;
continue;
}
err = dbuf_read(db, NULL, DB_RF_CANFAIL);
if (err == 0) {
dmu_cached_bps(dmu_objset_spa(os), db->db.db_data,
nbps, l1sz, l2sz);
}
/*
* error may be ignored, and we continue
*/
err = 0;
dbuf_rele(db, FTAG);
}
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
return (err);
}
/*
* Allocate a loaned anonymous arc buffer.
*/

View File

@ -34,7 +34,7 @@
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
* Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
* Copyright (c) 2024, Klara Inc.
* Copyright (c) 2023, 2024, Klara Inc.
*/
/*
@ -337,6 +337,55 @@ spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, const char *strval,
nvlist_free(propval);
}
static int
spa_prop_add(spa_t *spa, const char *propname, nvlist_t *outnvl)
{
zpool_prop_t prop = zpool_name_to_prop(propname);
zprop_source_t src = ZPROP_SRC_NONE;
uint64_t intval;
int err;
/*
* NB: Not all properties lookups via this API require
* the spa props lock, so they must explicitly grab it here.
*/
switch (prop) {
case ZPOOL_PROP_DEDUPCACHED:
err = ddt_get_pool_dedup_cached(spa, &intval);
if (err != 0)
return (SET_ERROR(err));
break;
default:
return (SET_ERROR(EINVAL));
}
spa_prop_add_list(outnvl, prop, NULL, intval, src);
return (0);
}
int
spa_prop_get_nvlist(spa_t *spa, char **props, unsigned int n_props,
nvlist_t **outnvl)
{
int err = 0;
if (props == NULL)
return (0);
if (*outnvl == NULL) {
err = nvlist_alloc(outnvl, NV_UNIQUE_NAME, KM_SLEEP);
if (err)
return (err);
}
for (unsigned int i = 0; i < n_props && err == 0; i++) {
err = spa_prop_add(spa, props[i], *outnvl);
}
return (err);
}
/*
* Add a user property (source=src, propname=propval) to an nvlist.
*/
@ -503,9 +552,11 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
dsl_pool_t *dp;
int err;
err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP);
if (err)
return (err);
if (*nvp == NULL) {
err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP);
if (err)
return (err);
}
dp = spa_get_dsl(spa);
dsl_pool_config_enter(dp, FTAG);

View File

@ -1072,6 +1072,21 @@ zap_prefetch(objset_t *os, uint64_t zapobj, const char *name)
return (err);
}
int
zap_prefetch_object(objset_t *os, uint64_t zapobj)
{
int error;
dmu_object_info_t doi;
error = dmu_object_info(os, zapobj, &doi);
if (error == 0 && DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP)
error = SET_ERROR(EINVAL);
if (error == 0)
dmu_prefetch_wait(os, zapobj, 0, doi.doi_max_offset);
return (error);
}
int
zap_lookup_by_dnode(dnode_t *dn, const char *name,
uint64_t integer_size, uint64_t num_integers, void *buf)
@ -1784,6 +1799,7 @@ EXPORT_SYMBOL(zap_lookup_uint64);
EXPORT_SYMBOL(zap_contains);
EXPORT_SYMBOL(zap_prefetch);
EXPORT_SYMBOL(zap_prefetch_uint64);
EXPORT_SYMBOL(zap_prefetch_object);
EXPORT_SYMBOL(zap_add);
EXPORT_SYMBOL(zap_add_by_dnode);
EXPORT_SYMBOL(zap_add_uint64);

View File

@ -38,7 +38,7 @@
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2019 Datto Inc.
* Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
* Copyright (c) 2019, 2021, 2024, Klara Inc.
* Copyright (c) 2019, 2021, 2023, 2024, Klara Inc.
* Copyright (c) 2019, Allan Jude
* Copyright 2024 Oxide Computer Company
*/
@ -3009,34 +3009,51 @@ zfs_ioc_pool_set_props(zfs_cmd_t *zc)
return (error);
}
static int
zfs_ioc_pool_get_props(zfs_cmd_t *zc)
{
spa_t *spa;
int error;
nvlist_t *nvp = NULL;
/*
* innvl: {
* "get_props_names": [ "prop1", "prop2", ..., "propN" ]
* }
*/
if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
static const zfs_ioc_key_t zfs_keys_get_props[] = {
{ ZPOOL_GET_PROPS_NAMES, DATA_TYPE_STRING_ARRAY, ZK_OPTIONAL },
};
static int
zfs_ioc_pool_get_props(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
{
nvlist_t *nvp = outnvl;
spa_t *spa;
char **props = NULL;
unsigned int n_props = 0;
int error;
if (nvlist_lookup_string_array(innvl, ZPOOL_GET_PROPS_NAMES,
&props, &n_props) != 0) {
props = NULL;
}
if ((error = spa_open(pool, &spa, FTAG)) != 0) {
/*
* If the pool is faulted, there may be properties we can still
* get (such as altroot and cachefile), so attempt to get them
* anyway.
*/
mutex_enter(&spa_namespace_lock);
if ((spa = spa_lookup(zc->zc_name)) != NULL)
if ((spa = spa_lookup(pool)) != NULL) {
error = spa_prop_get(spa, &nvp);
if (error == 0 && props != NULL)
error = spa_prop_get_nvlist(spa, props, n_props,
&nvp);
}
mutex_exit(&spa_namespace_lock);
} else {
error = spa_prop_get(spa, &nvp);
if (error == 0 && props != NULL)
error = spa_prop_get_nvlist(spa, props, n_props, &nvp);
spa_close(spa, FTAG);
}
if (error == 0 && zc->zc_nvlist_dst != 0)
error = put_nvlist(zc, nvp);
else
error = SET_ERROR(EFAULT);
nvlist_free(nvp);
return (error);
}
@ -4031,6 +4048,52 @@ zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
return (spa_checkpoint_discard(poolname));
}
/*
* Loads specific types of data for the given pool
*
* innvl: {
* "prefetch_type" -> int32_t
* }
*
* outnvl: empty
*/
static const zfs_ioc_key_t zfs_keys_pool_prefetch[] = {
{ZPOOL_PREFETCH_TYPE, DATA_TYPE_INT32, 0},
};
static int
zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
{
(void) outnvl;
int error;
spa_t *spa;
int32_t type;
/*
* Currently, only ZPOOL_PREFETCH_DDT is supported
*/
if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0 ||
type != ZPOOL_PREFETCH_DDT) {
return (EINVAL);
}
error = spa_open(poolname, &spa, FTAG);
if (error != 0)
return (error);
hrtime_t start_time = gethrtime();
ddt_prefetch_all(spa);
zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms", spa->spa_name,
(u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
spa_close(spa, FTAG);
return (error);
}
/*
* inputs:
* zc_name name of dataset to destroy
@ -7283,6 +7346,12 @@ zfs_ioctl_init(void)
zfs_keys_pool_discard_checkpoint,
ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));
zfs_ioctl_register("zpool_prefetch",
ZFS_IOC_POOL_PREFETCH, zfs_ioc_pool_prefetch,
zfs_secpolicy_config, POOL_NAME,
POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
zfs_keys_pool_prefetch, ARRAY_SIZE(zfs_keys_pool_prefetch));
zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
@ -7328,6 +7397,11 @@ zfs_ioctl_init(void)
POOL_CHECK_NONE, B_TRUE, B_TRUE,
zfs_keys_pool_scrub, ARRAY_SIZE(zfs_keys_pool_scrub));
zfs_ioctl_register("get_props", ZFS_IOC_POOL_GET_PROPS,
zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME,
POOL_CHECK_NONE, B_FALSE, B_FALSE,
zfs_keys_get_props, ARRAY_SIZE(zfs_keys_get_props));
/* IOCTLS that use the legacy function signature */
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
@ -7383,8 +7457,6 @@ zfs_ioctl_init(void)
zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);

View File

@ -208,6 +208,10 @@ tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos',
'zfs_create_verbose']
tags = ['functional', 'cli_root', 'zfs_create']
[tests/functional/cli_root/zpool_prefetch]
tests = ['zpool_prefetch_001_pos']
tags = ['functional', 'cli_root', 'zpool_prefetch']
[tests/functional/cli_root/zfs_destroy]
tests = ['zfs_clone_livelist_condense_and_disable',
'zfs_clone_livelist_condense_races', 'zfs_clone_livelist_dedup',

View File

@ -1176,6 +1176,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_online/setup.ksh \
functional/cli_root/zpool_online/zpool_online_001_pos.ksh \
functional/cli_root/zpool_online/zpool_online_002_neg.ksh \
functional/cli_root/zpool_prefetch/cleanup.ksh \
functional/cli_root/zpool_prefetch/setup.ksh \
functional/cli_root/zpool_prefetch/zpool_prefetch_001_pos.ksh \
functional/cli_root/zpool_remove/cleanup.ksh \
functional/cli_root/zpool_remove/setup.ksh \
functional/cli_root/zpool_remove/zpool_remove_001_neg.ksh \

View File

@ -0,0 +1,30 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
. $STF_SUITE/include/libtest.shlib
default_cleanup

View File

@ -0,0 +1,32 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
. $STF_SUITE/include/libtest.shlib
DISK=${DISKS%% *}
default_setup $DISK

View File

@ -0,0 +1,128 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2019, 2023 by Klara Inc. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
#
# DESCRIPTION:
# 'zpool prefetch -t ddt <pool>' can successfully load a pool's DDT on demand.
#
# STRATEGY:
# 1. Build up storage pool with deduplicated dataset.
# 2. Export the pool.
# 3. Import the pool, and use zpool prefetch -t ddt to load its table.
# 4. Verify the DDT was loaded successfully using ddt cache stats
#
verify_runnable "both"
log_assert "'zpool prefetch -t ddt <pool>' can successfully load the DDT for a pool."
function getddtstats
{
typeset -n gds=$1
typeset pool=$2
out=$(zpool status -DDp $pool | awk '/^ dedup: / {print $6 " " $9 " " $12}')
log_note "status -DDp output: ${out}"
gds.ondisk=$(echo $out | cut -d" " -f1)
gds.incore=$(echo $out | cut -d" " -f2)
gds.cached=$(echo $out | cut -d" " -f3)
# In case of missing data, reset to 0. This should normally be due
# to a pool without any DDT.
[ -z "${gds.ondisk}" ] && gds.ondisk="0"
[ -z "${gds.incore}" ] && gds.incore="0"
[ -z "${gds.cached}" ] && gds.cached="0"
return true
}
# Confirm that nothing happens on a standard pool config.
typeset -A before
log_must getddtstats before $TESTPOOL
log_note "before stats: ${before}"
log_must test "${before.ondisk}" -eq "0"
log_must test "${before.incore}" -eq "0"
log_must test "${before.cached}" -eq "0"
log_must zpool prefetch -t ddt $TESTPOOL
# Build up the deduplicated dataset. This consists of creating enough files
# to generate a reasonable size DDT for testing purposes.
DATASET=$TESTPOOL/ddt
log_must zfs create -o dedup=on $DATASET
MNTPOINT=$(get_prop mountpoint $TESTPOOL/ddt)
log_note "Generating dataset ..."
typeset -i i=0
while (( i < 16384 )); do
echo -n $i > $MNTPOINT/f.$i
# Create some copies of the original mainly for the purpose of
# having duplicate entries. About half will have no copies, while
# the remainder will have an equal distribution of 1-4 copies,
# depending on the number put into the original.
typeset -i j
((j = i % 8))
while (( j < 4 )); do
cp $MNTPOINT/f.$i $MNTPOINT/f.$i.$j
((j += 1))
done
((i += 1))
done
log_note "Dataset generation completed."
typeset -A generated
log_must getddtstats generated $TESTPOOL
log_note "generated stats: ${generated}"
log_must test "${generated.ondisk}" -ge "1048576"
log_must test "${generated.incore}" -ge "1048576"
log_must test "${generated.cached}" -ge "1048576"
log_must zpool prefetch -t ddt $TESTPOOL
# Do an export/import series to flush the DDT dataset cache.
typeset -A reimport
log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL
log_must getddtstats reimport $TESTPOOL
log_note "reimport stats: ${reimport}"
log_must test "${reimport.ondisk}" -ge "1048576"
log_must test "${reimport.incore}" -ge "1048576"
# On reimport, only the first block or two should be cached.
log_must test "${reimport.cached}" -le "65536"
# Finally, reload it and check again.
typeset -A reloaded
log_must zpool prefetch -t ddt $TESTPOOL
log_must getddtstats reloaded $TESTPOOL
log_note "reloaded stats: ${reloaded}"
log_must test "${reloaded.ondisk}" -ge "1048576"
log_must test "${reloaded.incore}" -ge "1048576"
log_must test "${reloaded.cached}" -eq "${reloaded.incore}"
log_pass "'zpool prefetch -t ddt <pool>' success."