zio: remove io_cmd and DKIOCFLUSHWRITECACHE

There's no other options, so we can just always assume its a flush.

Includes some light refactoring where a switch statement was doing
control flow that no longer works.

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Closes #16064
This commit is contained in:
Rob Norris 2024-04-04 22:34:54 +11:00 committed by Brian Behlendorf
parent cac416f106
commit c9c838aa1f
10 changed files with 106 additions and 157 deletions

View File

@ -31,7 +31,6 @@
/* ZIO macros */ /* ZIO macros */
#define ZIO_TP_STRUCT_ENTRY \ #define ZIO_TP_STRUCT_ENTRY \
__field(zio_type_t, zio_type) \ __field(zio_type_t, zio_type) \
__field(int, zio_cmd) \
__field(zio_priority_t, zio_priority) \ __field(zio_priority_t, zio_priority) \
__field(uint64_t, zio_size) \ __field(uint64_t, zio_size) \
__field(uint64_t, zio_orig_size) \ __field(uint64_t, zio_orig_size) \
@ -61,7 +60,6 @@
#define ZIO_TP_FAST_ASSIGN \ #define ZIO_TP_FAST_ASSIGN \
__entry->zio_type = zio->io_type; \ __entry->zio_type = zio->io_type; \
__entry->zio_cmd = zio->io_cmd; \
__entry->zio_priority = zio->io_priority; \ __entry->zio_priority = zio->io_priority; \
__entry->zio_size = zio->io_size; \ __entry->zio_size = zio->io_size; \
__entry->zio_orig_size = zio->io_orig_size; \ __entry->zio_orig_size = zio->io_orig_size; \
@ -90,7 +88,7 @@
__entry->zp_dedup_verify = zio->io_prop.zp_dedup_verify; __entry->zp_dedup_verify = zio->io_prop.zp_dedup_verify;
#define ZIO_TP_PRINTK_FMT \ #define ZIO_TP_PRINTK_FMT \
"zio { type %u cmd %i prio %u size %llu orig_size %llu " \ "zio { type %u prio %u size %llu orig_size %llu " \
"offset %llu timestamp %llu delta %llu delay %llu " \ "offset %llu timestamp %llu delta %llu delay %llu " \
"flags 0x%llx stage 0x%x pipeline 0x%x orig_flags 0x%llx " \ "flags 0x%llx stage 0x%x pipeline 0x%x orig_flags 0x%llx " \
"orig_stage 0x%x orig_pipeline 0x%x reexecute %u " \ "orig_stage 0x%x orig_pipeline 0x%x reexecute %u " \
@ -98,7 +96,7 @@
"type %u level %u copies %u dedup %u dedup_verify %u nopwrite %u } }" "type %u level %u copies %u dedup %u dedup_verify %u nopwrite %u } }"
#define ZIO_TP_PRINTK_ARGS \ #define ZIO_TP_PRINTK_ARGS \
__entry->zio_type, __entry->zio_cmd, __entry->zio_priority, \ __entry->zio_type, __entry->zio_priority, \
__entry->zio_size, __entry->zio_orig_size, __entry->zio_offset, \ __entry->zio_size, __entry->zio_orig_size, __entry->zio_offset, \
__entry->zio_timestamp, __entry->zio_delta, __entry->zio_delay, \ __entry->zio_timestamp, __entry->zio_delta, __entry->zio_delay, \
__entry->zio_flags, __entry->zio_stage, __entry->zio_pipeline, \ __entry->zio_flags, __entry->zio_stage, __entry->zio_pipeline, \

View File

@ -451,7 +451,6 @@ struct zio {
zio_type_t io_type; zio_type_t io_type;
enum zio_child io_child_type; enum zio_child io_child_type;
enum trim_flag io_trim_flags; enum trim_flag io_trim_flags;
int io_cmd;
zio_priority_t io_priority; zio_priority_t io_priority;
uint8_t io_reexecute; uint8_t io_reexecute;
uint8_t io_state[ZIO_WAIT_TYPES]; uint8_t io_state[ZIO_WAIT_TYPES];

View File

@ -255,14 +255,7 @@ vdev_file_io_start(zio_t *zio)
return; return;
} }
switch (zio->io_cmd) { zio->io_error = zfs_file_fsync(vf->vf_file, O_SYNC|O_DSYNC);
case DKIOCFLUSHWRITECACHE:
zio->io_error = zfs_file_fsync(vf->vf_file,
O_SYNC|O_DSYNC);
break;
default:
zio->io_error = SET_ERROR(ENOTSUP);
}
zio_execute(zio); zio_execute(zio);
return; return;

View File

@ -1153,42 +1153,31 @@ vdev_geom_io_start(zio_t *zio)
vd = zio->io_vd; vd = zio->io_vd;
switch (zio->io_type) { if (zio->io_type == ZIO_TYPE_IOCTL) {
case ZIO_TYPE_IOCTL:
/* XXPOLICY */ /* XXPOLICY */
if (!vdev_readable(vd)) { if (!vdev_readable(vd)) {
zio->io_error = SET_ERROR(ENXIO); zio->io_error = SET_ERROR(ENXIO);
zio_interrupt(zio); zio_interrupt(zio);
return; return;
} else { }
switch (zio->io_cmd) {
case DKIOCFLUSHWRITECACHE: if (zfs_nocacheflush || vdev_geom_bio_flush_disable) {
if (zfs_nocacheflush || zio_execute(zio);
vdev_geom_bio_flush_disable) return;
break; }
if (vd->vdev_nowritecache) { if (vd->vdev_nowritecache) {
zio->io_error = SET_ERROR(ENOTSUP); zio->io_error = SET_ERROR(ENOTSUP);
break; zio_execute(zio);
return;
} }
goto sendreq; } else if (zio->io_type == ZIO_TYPE_TRIM) {
default: if (vdev_geom_bio_delete_disable) {
zio->io_error = SET_ERROR(ENOTSUP); zio_execute(zio);
return;
} }
} }
zio_execute(zio);
return;
case ZIO_TYPE_TRIM:
if (!vdev_geom_bio_delete_disable) {
goto sendreq;
}
zio_execute(zio);
return;
default:
;
/* PASSTHROUGH --- placate compiler */
}
sendreq:
ASSERT(zio->io_type == ZIO_TYPE_READ || ASSERT(zio->io_type == ZIO_TYPE_READ ||
zio->io_type == ZIO_TYPE_WRITE || zio->io_type == ZIO_TYPE_WRITE ||
zio->io_type == ZIO_TYPE_TRIM || zio->io_type == ZIO_TYPE_TRIM ||

View File

@ -1403,38 +1403,29 @@ vdev_disk_io_start(zio_t *zio)
case ZIO_TYPE_IOCTL: case ZIO_TYPE_IOCTL:
if (!vdev_readable(v)) { if (!vdev_readable(v)) {
rw_exit(&vd->vd_lock); /* Drive not there, can't flush */
zio->io_error = SET_ERROR(ENXIO); error = SET_ERROR(ENXIO);
zio_interrupt(zio); } else if (zfs_nocacheflush) {
return; /* Flushing disabled by operator, declare success */
} error = 0;
} else if (v->vdev_nowritecache) {
switch (zio->io_cmd) { /* This vdev not capable of flushing */
case DKIOCFLUSHWRITECACHE: error = SET_ERROR(ENOTSUP);
} else {
if (zfs_nocacheflush) /*
break; * Issue the flush. If successful, the response will
* be handled in the completion callback, so we're done.
if (v->vdev_nowritecache) { */
zio->io_error = SET_ERROR(ENOTSUP);
break;
}
error = vdev_disk_io_flush(BDH_BDEV(vd->vd_bdh), zio); error = vdev_disk_io_flush(BDH_BDEV(vd->vd_bdh), zio);
if (error == 0) { if (error == 0) {
rw_exit(&vd->vd_lock); rw_exit(&vd->vd_lock);
return; return;
} }
zio->io_error = error;
break;
default:
zio->io_error = SET_ERROR(ENOTSUP);
} }
/* Couldn't issue the flush, so set the error and return it */
rw_exit(&vd->vd_lock); rw_exit(&vd->vd_lock);
zio->io_error = error;
zio_execute(zio); zio_execute(zio);
return; return;

View File

@ -250,11 +250,10 @@ vdev_file_io_start(zio_t *zio)
return; return;
} }
switch (zio->io_cmd) { if (zfs_nocacheflush) {
case DKIOCFLUSHWRITECACHE: zio_execute(zio);
return;
if (zfs_nocacheflush) }
break;
/* /*
* We cannot safely call vfs_fsync() when PF_FSTRANS * We cannot safely call vfs_fsync() when PF_FSTRANS
@ -270,12 +269,7 @@ vdev_file_io_start(zio_t *zio)
return; return;
} }
zio->io_error = zfs_file_fsync(vf->vf_file, zio->io_error = zfs_file_fsync(vf->vf_file, O_SYNC | O_DSYNC);
O_SYNC | O_DSYNC);
break;
default:
zio->io_error = SET_ERROR(ENOTSUP);
}
zio_execute(zio); zio_execute(zio);
return; return;

View File

@ -2557,16 +2557,12 @@ vdev_draid_spare_ioctl(zio_t *zio)
vdev_t *vd = zio->io_vd; vdev_t *vd = zio->io_vd;
int error = 0; int error = 0;
if (zio->io_cmd == DKIOCFLUSHWRITECACHE) {
for (int c = 0; c < vd->vdev_children; c++) { for (int c = 0; c < vd->vdev_children; c++) {
zio_nowait(zio_vdev_child_io(zio, NULL, zio_nowait(zio_vdev_child_io(zio, NULL,
vd->vdev_child[c], zio->io_offset, zio->io_abd, vd->vdev_child[c], zio->io_offset, zio->io_abd,
zio->io_size, zio->io_type, zio->io_priority, 0, zio->io_size, zio->io_type, zio->io_priority, 0,
vdev_draid_spare_child_done, zio)); vdev_draid_spare_child_done, zio));
} }
} else {
error = SET_ERROR(ENOTSUP);
}
return (error); return (error);
} }

View File

@ -1096,10 +1096,7 @@ zfs_ereport_is_valid(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio)
return (B_FALSE); return (B_FALSE);
if (zio != NULL) { if (zio != NULL) {
/* /* If this is not a read or write zio, ignore the error */
* If this is not a read or write zio, ignore the error. This
* can occur if the DKIOCFLUSHWRITECACHE ioctl fails.
*/
if (zio->io_type != ZIO_TYPE_READ && if (zio->io_type != ZIO_TYPE_READ &&
zio->io_type != ZIO_TYPE_WRITE) zio->io_type != ZIO_TYPE_WRITE)
return (B_FALSE); return (B_FALSE);

View File

@ -125,10 +125,9 @@ static kstat_t *zil_kstats_global;
int zil_replay_disable = 0; int zil_replay_disable = 0;
/* /*
* Disable the DKIOCFLUSHWRITECACHE commands that are normally sent to * Disable the flush commands that are normally sent to the disk(s) by the ZIL
* the disk(s) by the ZIL after an LWB write has completed. Setting this * after an LWB write has completed. Setting this will cause ZIL corruption on
* will cause ZIL corruption on power loss if a volatile out-of-order * power loss if a volatile out-of-order write cache is enabled.
* write cache is enabled.
*/ */
static int zil_nocacheflush = 0; static int zil_nocacheflush = 0;
@ -1406,19 +1405,17 @@ zil_lwb_add_txg(lwb_t *lwb, uint64_t txg)
} }
/* /*
* This function is a called after all vdevs associated with a given lwb * This function is a called after all vdevs associated with a given lwb write
* write have completed their DKIOCFLUSHWRITECACHE command; or as soon * have completed their flush command; or as soon as the lwb write completes,
* as the lwb write completes, if "zil_nocacheflush" is set. Further, * if "zil_nocacheflush" is set. Further, all "previous" lwb's will have
* all "previous" lwb's will have completed before this function is * completed before this function is called; i.e. this function is called for
* called; i.e. this function is called for all previous lwbs before * all previous lwbs before it's called for "this" lwb (enforced via zio the
* it's called for "this" lwb (enforced via zio the dependencies * dependencies configured in zil_lwb_set_zio_dependency()).
* configured in zil_lwb_set_zio_dependency()).
* *
* The intention is for this function to be called as soon as the * The intention is for this function to be called as soon as the contents of
* contents of an lwb are considered "stable" on disk, and will survive * an lwb are considered "stable" on disk, and will survive any sudden loss of
* any sudden loss of power. At this point, any threads waiting for the * power. At this point, any threads waiting for the lwb to reach this state
* lwb to reach this state are signalled, and the "waiter" structures * are signalled, and the "waiter" structures are marked "done".
* are marked "done".
*/ */
static void static void
zil_lwb_flush_vdevs_done(zio_t *zio) zil_lwb_flush_vdevs_done(zio_t *zio)
@ -1532,17 +1529,16 @@ zil_lwb_flush_wait_all(zilog_t *zilog, uint64_t txg)
} }
/* /*
* This is called when an lwb's write zio completes. The callback's * This is called when an lwb's write zio completes. The callback's purpose is
* purpose is to issue the DKIOCFLUSHWRITECACHE commands for the vdevs * to issue the flush commands for the vdevs in the lwb's lwb_vdev_tree. The
* in the lwb's lwb_vdev_tree. The tree will contain the vdevs involved * tree will contain the vdevs involved in writing out this specific lwb's
* in writing out this specific lwb's data, and in the case that cache * data, and in the case that cache flushes have been deferred, vdevs involved
* flushes have been deferred, vdevs involved in writing the data for * in writing the data for previous lwbs. The writes corresponding to all the
* previous lwbs. The writes corresponding to all the vdevs in the * vdevs in the lwb_vdev_tree will have completed by the time this is called,
* lwb_vdev_tree will have completed by the time this is called, due to * due to the zio dependencies configured in zil_lwb_set_zio_dependency(),
* the zio dependencies configured in zil_lwb_set_zio_dependency(), * which takes deferred flushes into account. The lwb will be "done" once
* which takes deferred flushes into account. The lwb will be "done" * zil_lwb_flush_vdevs_done() is called, which occurs in the zio completion
* once zil_lwb_flush_vdevs_done() is called, which occurs in the zio * callback for the lwb's root zio.
* completion callback for the lwb's root zio.
*/ */
static void static void
zil_lwb_write_done(zio_t *zio) zil_lwb_write_done(zio_t *zio)
@ -1601,19 +1597,18 @@ zil_lwb_write_done(zio_t *zio)
} }
/* /*
* If this lwb does not have any threads waiting for it to * If this lwb does not have any threads waiting for it to complete, we
* complete, we want to defer issuing the DKIOCFLUSHWRITECACHE * want to defer issuing the flush command to the vdevs written to by
* command to the vdevs written to by "this" lwb, and instead * "this" lwb, and instead rely on the "next" lwb to handle the flush
* rely on the "next" lwb to handle the DKIOCFLUSHWRITECACHE * command for those vdevs. Thus, we merge the vdev tree of "this" lwb
* command for those vdevs. Thus, we merge the vdev tree of * with the vdev tree of the "next" lwb in the list, and assume the
* "this" lwb with the vdev tree of the "next" lwb in the list, * "next" lwb will handle flushing the vdevs (or deferring the flush(s)
* and assume the "next" lwb will handle flushing the vdevs (or * again).
* deferring the flush(s) again).
* *
* This is a useful performance optimization, especially for * This is a useful performance optimization, especially for workloads
* workloads with lots of async write activity and few sync * with lots of async write activity and few sync write and/or fsync
* write and/or fsync activity, as it has the potential to * activity, as it has the potential to coalesce multiple flush
* coalesce multiple flush commands to a vdev into one. * commands to a vdev into one.
*/ */
if (list_is_empty(&lwb->lwb_waiters) && nlwb != NULL) { if (list_is_empty(&lwb->lwb_waiters) && nlwb != NULL) {
zil_lwb_flush_defer(lwb, nlwb); zil_lwb_flush_defer(lwb, nlwb);
@ -1663,16 +1658,16 @@ zil_lwb_set_zio_dependency(zilog_t *zilog, lwb_t *lwb)
* If the previous lwb's write hasn't already completed, we also want * If the previous lwb's write hasn't already completed, we also want
* to order the completion of the lwb write zios (above, we only order * to order the completion of the lwb write zios (above, we only order
* the completion of the lwb root zios). This is required because of * the completion of the lwb root zios). This is required because of
* how we can defer the DKIOCFLUSHWRITECACHE commands for each lwb. * how we can defer the flush commands for each lwb.
* *
* When the DKIOCFLUSHWRITECACHE commands are deferred, the previous * When the flush commands are deferred, the previous lwb will rely on
* lwb will rely on this lwb to flush the vdevs written to by that * this lwb to flush the vdevs written to by that previous lwb. Thus,
* previous lwb. Thus, we need to ensure this lwb doesn't issue the * we need to ensure this lwb doesn't issue the flush until after the
* flush until after the previous lwb's write completes. We ensure * previous lwb's write completes. We ensure this ordering by setting
* this ordering by setting the zio parent/child relationship here. * the zio parent/child relationship here.
* *
* Without this relationship on the lwb's write zio, it's possible * Without this relationship on the lwb's write zio, it's possible for
* for this lwb's write to complete prior to the previous lwb's write * this lwb's write to complete prior to the previous lwb's write
* completing; and thus, the vdevs for the previous lwb would be * completing; and thus, the vdevs for the previous lwb would be
* flushed prior to that lwb's data being written to those vdevs (the * flushed prior to that lwb's data being written to those vdevs (the
* vdevs are flushed in the lwb write zio's completion handler, * vdevs are flushed in the lwb write zio's completion handler,
@ -3499,8 +3494,8 @@ zil_commit_itx_assign(zilog_t *zilog, zil_commit_waiter_t *zcw)
* callback of the lwb's zio[*]. * callback of the lwb's zio[*].
* *
* * Actually, the waiters are signaled in the zio completion * * Actually, the waiters are signaled in the zio completion
* callback of the root zio for the DKIOCFLUSHWRITECACHE commands * callback of the root zio for the flush commands that are sent to
* that are sent to the vdevs upon completion of the lwb zio. * the vdevs upon completion of the lwb zio.
* *
* 2. When the itxs are inserted into the ZIL's queue of uncommitted * 2. When the itxs are inserted into the ZIL's queue of uncommitted
* itxs, the order in which they are inserted is preserved[*]; as * itxs, the order in which they are inserted is preserved[*]; as

View File

@ -1631,11 +1631,9 @@ zio_flush(zio_t *pio, vdev_t *vd)
return; return;
if (vd->vdev_children == 0) { if (vd->vdev_children == 0) {
zio_t *zio = zio_create(pio, vd->vdev_spa, 0, NULL, NULL, 0, 0, zio_nowait(zio_create(pio, vd->vdev_spa, 0, NULL, NULL, 0, 0,
NULL, NULL, ZIO_TYPE_IOCTL, ZIO_PRIORITY_NOW, flags, vd, 0, NULL, NULL, ZIO_TYPE_IOCTL, ZIO_PRIORITY_NOW, flags, vd, 0,
NULL, ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE); NULL, ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE));
zio->io_cmd = DKIOCFLUSHWRITECACHE;
zio_nowait(zio);
} else { } else {
for (uint64_t c = 0; c < vd->vdev_children; c++) for (uint64_t c = 0; c < vd->vdev_children; c++)
zio_flush(pio, vd->vdev_child[c]); zio_flush(pio, vd->vdev_child[c]);
@ -4241,8 +4239,7 @@ zio_vdev_io_assess(zio_t *zio)
* boolean flag so that we don't bother with it in the future. * boolean flag so that we don't bother with it in the future.
*/ */
if ((zio->io_error == ENOTSUP || zio->io_error == ENOTTY) && if ((zio->io_error == ENOTSUP || zio->io_error == ENOTTY) &&
zio->io_type == ZIO_TYPE_IOCTL && zio->io_type == ZIO_TYPE_IOCTL && vd != NULL)
zio->io_cmd == DKIOCFLUSHWRITECACHE && vd != NULL)
vd->vdev_nowritecache = B_TRUE; vd->vdev_nowritecache = B_TRUE;
if (zio->io_error) if (zio->io_error)