From d04b5c9e877a4d4b2337e6b2b453c7650aed433d Mon Sep 17 00:00:00 2001 From: Fedor Uporov <60701163+fuporovvStack@users.noreply.github.com> Date: Thu, 11 Nov 2021 11:26:18 -0800 Subject: [PATCH] zhack: Add repair label option In case if all label checksums will be invalid on any vdev, the pool will become unimportable. The zhack with newly added cli options could be used to restore label checksums and make pool importable again. Reviewed-by: Brian Behlendorf Signed-off-by: Fedor Uporov Closes #2510 Closes #12686 --- cmd/zhack/zhack.c | 171 +++++++++++++++++- configure.ac | 1 + man/man1/zhack.1 | 7 + tests/runfiles/common.run | 6 + .../tests/functional/cli_root/Makefile.am | 1 + .../functional/cli_root/zhack/Makefile.am | 3 + .../cli_root/zhack/zhack_label_checksum.ksh | 64 +++++++ 7 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 tests/zfs-tests/tests/functional/cli_root/zhack/Makefile.am create mode 100755 tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_checksum.ksh diff --git a/cmd/zhack/zhack.c b/cmd/zhack/zhack.c index b27423f538..bae242712a 100644 --- a/cmd/zhack/zhack.c +++ b/cmd/zhack/zhack.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -76,7 +78,12 @@ usage(void) " -d decrease instead of increase the refcount\n" " -m add the feature to the label if increasing refcount\n" "\n" - " : should be a feature guid\n"); + " : should be a feature guid\n" + "\n" + " label repair \n" + " repair corrupted label checksums\n" + "\n" + " : path to vdev\n"); exit(1); } @@ -471,6 +478,166 @@ zhack_do_feature(int argc, char **argv) return (0); } +static int +zhack_repair_label_cksum(int argc, char **argv) +{ + zio_checksum_info_t *ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; + const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION, + ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID }; + boolean_t labels_repaired[VDEV_LABELS]; + boolean_t repaired = B_FALSE; + vdev_label_t labels[VDEV_LABELS]; + struct stat st; + int fd; + + bzero(labels_repaired, sizeof (labels_repaired)); + bzero(labels, sizeof (labels)); + + abd_init(); + + argc -= 1; + argv += 1; + + if (argc < 1) { + (void) fprintf(stderr, "error: missing device\n"); + usage(); + } + + if ((fd = open(argv[0], O_RDWR)) == -1) + fatal(NULL, FTAG, "cannot open '%s': %s", argv[0], + strerror(errno)); + + if (stat(argv[0], &st) != 0) + fatal(NULL, FTAG, "cannot stat '%s': %s", argv[0], + strerror(errno)); + + for (int l = 0; l < VDEV_LABELS; l++) { + uint64_t label_offset, offset; + zio_cksum_t expected_cksum; + zio_cksum_t actual_cksum; + zio_cksum_t verifier; + zio_eck_t *eck; + nvlist_t *cfg; + int byteswap; + uint64_t val; + ssize_t err; + + vdev_label_t *vl = &labels[l]; + + label_offset = vdev_label_offset(st.st_size, l, 0); + err = pread64(fd, vl, sizeof (vdev_label_t), label_offset); + if (err == -1) { + (void) fprintf(stderr, "error: cannot read " + "label %d: %s\n", l, strerror(errno)); + continue; + } else if (err != sizeof (vdev_label_t)) { + (void) fprintf(stderr, "error: bad label %d read size " + "\n", l); + continue; + } + + err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist, + VDEV_PHYS_SIZE - sizeof (zio_eck_t), &cfg, 0); + if (err) { + (void) fprintf(stderr, "error: cannot unpack nvlist " + "label %d\n", l); + continue; + } + + for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) { + err = nvlist_lookup_uint64(cfg, cfg_keys[i], &val); + if (err) { + (void) fprintf(stderr, "error: label %d: " + "cannot find nvlist key %s\n", + l, cfg_keys[i]); + continue; + } + } + + void *data = (char *)vl + offsetof(vdev_label_t, vl_vdev_phys); + eck = (zio_eck_t *)((char *)(data) + VDEV_PHYS_SIZE) - 1; + + offset = label_offset + offsetof(vdev_label_t, vl_vdev_phys); + ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0); + + byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); + if (byteswap) + byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); + + expected_cksum = eck->zec_cksum; + eck->zec_cksum = verifier; + + abd_t *abd = abd_get_from_buf(data, VDEV_PHYS_SIZE); + ci->ci_func[byteswap](abd, VDEV_PHYS_SIZE, NULL, &actual_cksum); + abd_free(abd); + + if (byteswap) + byteswap_uint64_array(&expected_cksum, + sizeof (zio_cksum_t)); + + if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) + continue; + + eck->zec_cksum = actual_cksum; + + err = pwrite64(fd, data, VDEV_PHYS_SIZE, offset); + if (err == -1) { + (void) fprintf(stderr, "error: cannot write " + "label %d: %s\n", l, strerror(errno)); + continue; + } else if (err != VDEV_PHYS_SIZE) { + (void) fprintf(stderr, "error: bad write size " + "label %d\n", l); + continue; + } + + fsync(fd); + + labels_repaired[l] = B_TRUE; + } + + close(fd); + + abd_fini(); + + for (int l = 0; l < VDEV_LABELS; l++) { + (void) printf("label %d: %s\n", l, + labels_repaired[l] ? "repaired" : "skipped"); + repaired |= labels_repaired[l]; + } + + if (repaired) + return (0); + + return (1); +} + +static int +zhack_do_label(int argc, char **argv) +{ + char *subcommand; + int err; + + argc--; + argv++; + if (argc == 0) { + (void) fprintf(stderr, + "error: no label operation specified\n"); + usage(); + } + + subcommand = argv[0]; + if (strcmp(subcommand, "repair") == 0) { + err = zhack_repair_label_cksum(argc, argv); + } else { + (void) fprintf(stderr, "error: unknown subcommand: %s\n", + subcommand); + usage(); + } + + return (err); +} + #define MAX_NUM_PATHS 1024 int @@ -516,6 +683,8 @@ main(int argc, char **argv) if (strcmp(subcommand, "feature") == 0) { rv = zhack_do_feature(argc, argv); + } else if (strcmp(subcommand, "label") == 0) { + return (zhack_do_label(argc, argv)); } else { (void) fprintf(stderr, "error: unknown subcommand: %s\n", subcommand); diff --git a/configure.ac b/configure.ac index ebc7b276a6..4ff902cdc2 100644 --- a/configure.ac +++ b/configure.ac @@ -288,6 +288,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/cli_root/zfs_unshare/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile + tests/zfs-tests/tests/functional/cli_root/zhack/Makefile tests/zfs-tests/tests/functional/cli_root/zpool/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_add/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_attach/Makefile diff --git a/man/man1/zhack.1 b/man/man1/zhack.1 index 83046ee8f5..b03b87a1bd 100644 --- a/man/man1/zhack.1 +++ b/man/man1/zhack.1 @@ -94,6 +94,13 @@ The flag indicates that the .Ar guid feature is now required to read the pool MOS. +. +.It Xo +.Nm zhack +.Cm label repair +.Ar device +.Xc +Repair corrupted labels by rewriting the checksum using the presumed valid contents of the label. .El . .Sh GLOBAL OPTIONS diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 2207105483..980e25958f 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -317,6 +317,12 @@ tags = ['functional', 'cli_root', 'zfs_upgrade'] tests = ['zfs_wait_deleteq'] tags = ['functional', 'cli_root', 'zfs_wait'] +[tests/functional/cli_root/zhack] +tests = ['zhack_label_checksum'] +pre = +post = +tags = ['functional', 'cli_root', 'zhack'] + [tests/functional/cli_root/zpool] tests = ['zpool_001_neg', 'zpool_002_pos', 'zpool_003_pos', 'zpool_colors'] tags = ['functional', 'cli_root', 'zpool'] diff --git a/tests/zfs-tests/tests/functional/cli_root/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/Makefile.am index c01ecee896..9951f96f31 100644 --- a/tests/zfs-tests/tests/functional/cli_root/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/Makefile.am @@ -35,6 +35,7 @@ SUBDIRS = \ zfs_unshare \ zfs_upgrade \ zfs_wait \ + zhack \ zpool \ zpool_add \ zpool_attach \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zhack/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zhack/Makefile.am new file mode 100644 index 0000000000..931dacde6b --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zhack/Makefile.am @@ -0,0 +1,3 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zhack +dist_pkgdata_SCRIPTS = \ + zhack_label_checksum.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_checksum.ksh b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_checksum.ksh new file mode 100755 index 0000000000..67c7e7c448 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_checksum.ksh @@ -0,0 +1,64 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2021 by vStack. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/blkdev.shlib + +# +# Description: +# zhack label repair will calculate and rewrite label checksum if invalid +# +# Strategy: +# 1. Create pool with some number of vdevs and export it +# 2. Corrupt all labels checksums +# 3. Check that pool cannot be imported +# 4. Use zhack to repair labels checksums +# 5. Check that pool can be imported +# + +log_assert "Verify zhack label repair will repair labels checksums" +log_onexit cleanup + +VIRTUAL_DISK=$TEST_BASE_DIR/disk + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL + [[ -f $VIRTUAL_DISK ]] && log_must rm $VIRTUAL_DISK +} + +log_must truncate -s $(($MINVDEVSIZE * 8)) $VIRTUAL_DISK + +log_must zpool create $TESTPOOL $VIRTUAL_DISK +log_must zpool export $TESTPOOL + +log_mustnot zhack label repair $VIRTUAL_DISK + +corrupt_label_checksum 0 $VIRTUAL_DISK +corrupt_label_checksum 1 $VIRTUAL_DISK +corrupt_label_checksum 2 $VIRTUAL_DISK +corrupt_label_checksum 3 $VIRTUAL_DISK + +log_mustnot zpool import $TESTPOOL -d $TEST_BASE_DIR + +log_must zhack label repair $VIRTUAL_DISK + +log_must zpool import $TESTPOOL -d $TEST_BASE_DIR + +cleanup + +log_pass "zhack label repair works correctly."