From: "Vladimir 'φ-coder/phcoder' Serbinenko" <phcoder@gmail.com>
To: Richard Laager <rlaager@wiktel.com>
Cc: grub-devel@gnu.org, Zachary Bedell <pendorbound@gmail.com>
Subject: Re: [Patch] Robustly search for ZFS labels & uberblocks
Date: Sun, 22 Jan 2012 15:18:37 +0100 [thread overview]
Message-ID: <4F1C1ABD.1010303@gmail.com> (raw)
In-Reply-To: <1326973014.6387.224.camel@watermelon.coderich.net>
[-- Attachment #1: Type: text/plain, Size: 18895 bytes --]
I've cleaned this patch up (attached). Can you test it on these 4K FSes
or tell me how to create one w/o really having such a disk?
On 19.01.2012 12:36, Richard Laager wrote:
> static grub_err_t
> -uberblock_verify (uberblock_phys_t * ub, int offset)
> +uberblock_verify (uberblock_t * uber, int offset, struct grub_zfs_data *data)
> {
> - uberblock_t *uber =&ub->ubp_uberblock;
> grub_err_t err;
> grub_zfs_endian_t endian = UNKNOWN_ENDIAN;
> zio_cksum_t zc;
> + void *osp = NULL;
> + grub_size_t ospsize;
> +
> + if (uber->ub_txg< data->label_txg)
> + return grub_error (GRUB_ERR_BAD_FS,
> + "ignoring partially written label: uber_txg< label_txg");
>
I've left this part out since outside loop already looks for the newest txg.
> if (grub_zfs_to_cpu64 (uber->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC
> && grub_zfs_to_cpu64 (uber->ub_version, LITTLE_ENDIAN)> 0
> @@ -361,7 +390,19 @@
>
> zc.zc_word[0] = grub_cpu_to_zfs64 (offset, endian);
> err = zio_checksum_verify (zc, ZIO_CHECKSUM_LABEL, endian,
> - (char *) ub, UBERBLOCK_SIZE);
> + (char *) uber, UBERBLOCK_SIZE(data->vdev_ashift));
> +
> + if (!err)
> + {
> + /* Check that the data pointed by the rootbp is usable. */
> + void *osp = NULL;
> + grub_size_t ospsize;
> + err = zio_read (&uber->ub_rootbp, endian,&osp,&ospsize, data);
> + grub_free (osp);
> +
> + if (!err&& ospsize< OBJSET_PHYS_SIZE_V14)
> + return grub_error (GRUB_ERR_BAD_FS, "uberblock rootbp points to invalid data");
> + }
>
Left this one out: why would it happen that uberblock checksum is valid
but the other data in it isn't?
> return err;
> }
> @@ -372,32 +413,40 @@
> * Success - Pointer to the best uberblock.
> * Failure - NULL
> */
> -static uberblock_phys_t *
> -find_bestub (uberblock_phys_t * ub_array, grub_disk_addr_t sector)
> +static uberblock_t *
> +find_bestub (char *ub_array, struct grub_zfs_data *data)
> {
> - uberblock_phys_t *ubbest = NULL;
> - int i;
> - grub_disk_addr_t offset;
> + const grub_disk_addr_t sector = data->vdev_phys_sector;
> + uberblock_t *ubbest = NULL;
> + uberblock_t *ubnext;
> + unsigned int i, offset, pickedub = 0;
> grub_err_t err = GRUB_ERR_NONE;
>
> - for (i = 0; i< (VDEV_UBERBLOCK_RING>> VDEV_UBERBLOCK_SHIFT); i++)
> + const unsigned int UBCOUNT = UBERBLOCK_COUNT (data->vdev_ashift);
> + const grub_uint64_t UBBYTES = UBERBLOCK_SIZE (data->vdev_ashift);
> +
> + for (i = 0; i< UBCOUNT; i++)
> {
> - offset = (sector<< SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE
> - + (i<< VDEV_UBERBLOCK_SHIFT);
> + ubnext = (uberblock_t *) (i * UBBYTES + ub_array);
> + offset = (sector<< SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE + (i * UBBYTES);
>
This is wrong with respect to alignment invariants.
> - err = uberblock_verify (&ub_array[i], offset);
> + err = uberblock_verify (ubnext, offset, data);
> if (err)
> {
> grub_errno = GRUB_ERR_NONE;
> continue;
> }
> - if (ubbest == NULL
> - || vdev_uberblock_compare (&(ub_array[i].ubp_uberblock),
> - &(ubbest->ubp_uberblock))> 0)
> - ubbest =&ub_array[i];
> + if (ubbest == NULL || vdev_uberblock_compare (ubnext, ubbest)> 0)
> + {
> + ubbest = ubnext;
> + pickedub = i;
> + }
Skipped, it sets the variable but never uses it.
> }
> if (!ubbest)
> grub_errno = err;
> + else
> + grub_dprintf ("zfs", "Found best uberblock at idx %d, txg %llu\n",
> + pickedub, (unsigned long long) ubbest->ub_txg);
>
Skipped, not needed.
> return (ubbest);
> }
> @@ -515,8 +564,18 @@
> sector = DVA_OFFSET_TO_PHYS_SECTOR (offset);
> err = grub_disk_read (data->disk, sector, 0, psize, buf);
> }
> +
> if (!err)
> - return GRUB_ERR_NONE;
> + {
> + /* Check the underlying checksum before we rule this DVA as "good" */
> + grub_uint32_t checkalgo = (grub_zfs_to_cpu64 ((bp)->blk_prop, endian)>> 40)& 0xff;
> + err = zio_checksum_verify (bp->blk_cksum, checkalgo, endian, buf, psize);
> + if (!err)
> + return GRUB_ERR_NONE;
> + }
> +
> + /* If read failed or checksum bad, reset the error. Hopefully we've got
> + * some more DVA's to try. */
Skipped, already commented on this one.
> grub_errno = GRUB_ERR_NONE;
> }
>
> @@ -539,12 +598,9 @@
> unsigned int comp;
> char *compbuf = NULL;
> grub_err_t err;
> - zio_cksum_t zc = bp->blk_cksum;
> - grub_uint32_t checksum;
>
> *buf = NULL;
>
> - checksum = (grub_zfs_to_cpu64((bp)->blk_prop, endian)>> 40)& 0xff;
> comp = (grub_zfs_to_cpu64((bp)->blk_prop, endian)>>32)& 0xff;
> lsize = (BP_IS_HOLE(bp) ? 0 :
> (((grub_zfs_to_cpu64 ((bp)->blk_prop, endian)& 0xffff) + 1)
> @@ -580,15 +636,6 @@
> return err;
> }
>
> - err = zio_checksum_verify (zc, checksum, endian, compbuf, psize);
> - if (err)
> - {
> - grub_dprintf ("zfs", "incorrect checksum\n");
> - grub_free (compbuf);
> - *buf = NULL;
> - return err;
> - }
> -
> if (comp != ZIO_COMPRESS_OFF)
> {
> *buf = grub_malloc (lsize);
> @@ -1864,11 +1911,9 @@
> static grub_err_t
> check_pool_label (struct grub_zfs_data *data)
> {
> - grub_uint64_t pool_state, txg = 0;
> - char *nvlist;
> -#if 0
> - char *nv;
> -#endif
> + grub_uint64_t pool_state;
> + char *nvlist; /* for the pool */
> + char *vdevnvlist; /* for the vdev */
> grub_uint64_t diskguid;
> grub_uint64_t version;
> int found;
> @@ -1898,7 +1943,9 @@
> }
> grub_dprintf ("zfs", "check 4 passed\n");
>
> - found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_TXG,&txg);
> + data->label_txg = 0;
> + found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_TXG,
> + &data->label_txg);
Not needed. Skipped.
> if (!found)
> {
> grub_free (nvlist);
> @@ -1909,7 +1956,7 @@
> grub_dprintf ("zfs", "check 6 passed\n");
>
> /* not an active device */
> - if (txg == 0)
> + if (data->label_txg == 0)
> {
> grub_free (nvlist);
> return grub_error (GRUB_ERR_BAD_FS, "zpool isn't active");
> @@ -1931,20 +1978,32 @@
> {
> grub_free (nvlist);
> return grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET,
> - "too new version %llu> %llu",
> + "SPA version too new %llu> %llu",
> (unsigned long long) version,
> (unsigned long long) SPA_VERSION);
> }
> grub_dprintf ("zfs", "check 9 passed\n");
> -#if 0
> - if (nvlist_lookup_value (nvlist, ZPOOL_CONFIG_VDEV_TREE,&nv,
> - DATA_TYPE_NVLIST, NULL))
> +
> + vdevnvlist = grub_zfs_nvlist_lookup_nvlist (nvlist, ZPOOL_CONFIG_VDEV_TREE);
> + if (!vdevnvlist)
> {
> - grub_free (vdev);
> - return (GRUB_ERR_BAD_FS);
> + grub_free (nvlist);
> + if (!grub_errno)
> + grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_VDEV_TREE " not found");
> + return grub_errno;
> }
> grub_dprintf ("zfs", "check 10 passed\n");
> -#endif
> +
> + found = grub_zfs_nvlist_lookup_uint64 (vdevnvlist, ZPOOL_CONFIG_ASHIFT,
> + &data->vdev_ashift);
> + grub_free (vdevnvlist);
> + if (!found)
> + {
> + grub_free (nvlist);
> + if (!grub_errno)
> + grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_ASHIFT " not found");
> + return grub_errno;
> + }
>
> found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_GUID,&diskguid);
> if (! found)
> @@ -1956,11 +2015,42 @@
> }
> grub_dprintf ("zfs", "check 11 passed\n");
>
> + found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_GUID,&data->pool_guid);
> + if (! found)
> + {
> + grub_free (nvlist);
> + if (! grub_errno)
> + grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_POOL_GUID " not found");
> + return grub_errno;
> + }
> + grub_dprintf ("zfs", "check 12 passed\n");
> +
> +
This check is already in. Skipped.
> grub_free (nvlist);
>
> + grub_dprintf ("zfs", "ZFS Pool GUID: %llu (%016llx) Label: GUID: %llu (%016llx), txg: %llu, SPA v%llu, ashift: %llu\n",
> + (unsigned long long) data->pool_guid,
> + (unsigned long long) data->pool_guid,
> + (unsigned long long) diskguid,
> + (unsigned long long) diskguid,
> + (unsigned long long) data->label_txg,
> + (unsigned long long) version,
> + (unsigned long long) data->vdev_ashift);
> +
Useless. Skipped.
> return GRUB_ERR_NONE;
> }
>
> +/*
> + * vdev_label_start returns the physical disk offset (in bytes) of
> + * label "l".
> + */
> +static grub_uint64_t vdev_label_start (grub_uint64_t psize, int l)
> +{
> + return (l * sizeof (vdev_label_t) + (l< VDEV_LABELS / 2 ?
> + 0 : psize -
> + VDEV_LABELS * sizeof (vdev_label_t)));
> +}
> +
> static void
> zfs_unmount (struct grub_zfs_data *data)
> {
> @@ -1979,13 +2069,13 @@
> zfs_mount (grub_device_t dev)
> {
> struct grub_zfs_data *data = 0;
> - int label = 0;
> - uberblock_phys_t *ub_array, *ubbest = NULL;
> - vdev_boot_header_t *bh;
> + int label = 0, bestlabel = -1;
> + char *ub_array;
> + uberblock_t *ubbest;
> + uberblock_t *ubcur = NULL;
> void *osp = 0;
> grub_size_t ospsize;
> grub_err_t err;
> - int vdevnum;
>
> if (! dev->disk)
> {
> @@ -1997,11 +2087,6 @@
> if (!data)
> return 0;
> grub_memset (data, 0, sizeof (*data));
> -#if 0
> - /* if it's our first time here, zero the best uberblock out */
> - if (data->best_drive == 0&& data->best_part == 0&& find_best_root)
> - grub_memset (¤t_uberblock, 0, sizeof (uberblock_t));
> -#endif
>
> data->disk = dev->disk;
>
> @@ -2012,100 +2097,129 @@
> return 0;
> }
>
> - bh = grub_malloc (VDEV_BOOT_HEADER_SIZE);
> - if (!bh)
> + ubbest = grub_malloc (sizeof (*ubbest));
> + if (!ubbest)
> {
> zfs_unmount (data);
> - grub_free (ub_array);
> return 0;
> }
> -
> - vdevnum = VDEV_LABELS;
> -
> - /* Don't check back labels on CDROM. */
> - if (grub_disk_get_size (dev->disk) == GRUB_DISK_SIZE_UNKNOWN)
> - vdevnum = VDEV_LABELS / 2;
> -
> - for (label = 0; ubbest == NULL&& label< vdevnum; label++)
> + grub_memset (ubbest, 0, sizeof (*ubbest));
> +
> + /* Establish some constants for where things are on the device: */
> +
> + /*
> + * Some eltorito stacks don't give us a size and we end up setting the
> + * size to MAXUINT, further some of these devices stop working once a single
> + * read past the end has been issued. Checking for a maximum part_length and
> + * skipping the backup labels at the end of the slice/partition/device
> + * avoids breaking down on such devices.
> + */
> + const int vdevnum =
> + grub_disk_get_size (dev->disk) == GRUB_DISK_SIZE_UNKNOWN ?
> + VDEV_LABELS / 2 : VDEV_LABELS;
> +
> + /* Size in bytes of the device (disk or partition) aligned to label size */
> + grub_uint64_t device_size =
> + grub_disk_get_size (dev->disk)<< GRUB_DISK_SECTOR_BITS;
> + const grub_uint64_t alignedbytes =
> + P2ALIGN (device_size, (grub_uint64_t) sizeof (vdev_label_t));
> +
> + for (label = 0; label< vdevnum; label++)
> {
> - grub_zfs_endian_t ub_endian = UNKNOWN_ENDIAN;
> - grub_dprintf ("zfs", "label %d\n", label);
> -
> - data->vdev_phys_sector
> - = label * (sizeof (vdev_label_t)>> SPA_MINBLOCKSHIFT)
> - + ((VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE)>> SPA_MINBLOCKSHIFT)
> - + (label< VDEV_LABELS / 2 ? 0 : grub_disk_get_size (dev->disk)
> - - VDEV_LABELS * (sizeof (vdev_label_t)>> SPA_MINBLOCKSHIFT));
> + grub_uint64_t labelstartbytes = vdev_label_start (alignedbytes, label);
> + grub_uint64_t labelstart = labelstartbytes>> GRUB_DISK_SECTOR_BITS;
> +
> + grub_dprintf ("zfs", "reading label %d at sector %llu (byte %llu)\n",
> + label, (unsigned long long) labelstart,
> + (unsigned long long) labelstartbytes);
> +
> + data->vdev_phys_sector = labelstart +
> + ((VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE)>> GRUB_DISK_SECTOR_BITS);
> +
> + err = check_pool_label (data);
> + if (err)
> + {
> + grub_dprintf ("zfs", "error checking label %d\n", label);
> + grub_errno = GRUB_ERR_NONE;
> + continue;
> + }
>
This seems like a lot of moving around but I don't see any real change
other than aligning the size down.
> /* Read in the uberblock ring (128K). */
> - err = grub_disk_read (data->disk, data->vdev_phys_sector
> - + (VDEV_PHYS_SIZE>> SPA_MINBLOCKSHIFT),
> - 0, VDEV_UBERBLOCK_RING, (char *) ub_array);
> - if (err)
> - {
> - grub_errno = GRUB_ERR_NONE;
> - continue;
> - }
> - grub_dprintf ("zfs", "label ok %d\n", label);
> -
> - ubbest = find_bestub (ub_array, data->vdev_phys_sector);
> - if (!ubbest)
> - {
> - grub_dprintf ("zfs", "No uberblock found\n");
> - grub_errno = GRUB_ERR_NONE;
> - continue;
> - }
> - ub_endian = (grub_zfs_to_cpu64 (ubbest->ubp_uberblock.ub_magic,
> - LITTLE_ENDIAN) == UBERBLOCK_MAGIC
> - ? LITTLE_ENDIAN : BIG_ENDIAN);
> - err = zio_read (&ubbest->ubp_uberblock.ub_rootbp,
> - ub_endian,
> - &osp,&ospsize, data);
> - if (err)
> - {
> - grub_dprintf ("zfs", "couldn't zio_read\n");
> - grub_errno = GRUB_ERR_NONE;
> - continue;
> - }
> -
> - if (ospsize< OBJSET_PHYS_SIZE_V14)
> - {
> - grub_dprintf ("zfs", "osp too small\n");
> - grub_free (osp);
> - continue;
> - }
> - grub_dprintf ("zfs", "ubbest %p\n", ubbest);
> -
> - err = check_pool_label (data);
> - if (err)
> - {
> - grub_errno = GRUB_ERR_NONE;
> - continue;
> - }
> -#if 0
> - if (find_best_root&&
> - vdev_uberblock_compare (&ubbest->ubp_uberblock,
> - &(current_uberblock))<= 0)
> - continue;
> -#endif
> - /* Got the MOS. Save it at the memory addr MOS. */
> - grub_memmove (&(data->mos.dn),&((objset_phys_t *) osp)->os_meta_dnode,
> - DNODE_SIZE);
> - data->mos.endian = (grub_zfs_to_cpu64 (ubbest->ubp_uberblock.ub_rootbp.blk_prop, ub_endian)>> 63)& 1;
> - grub_memmove (&(data->current_uberblock),
> - &ubbest->ubp_uberblock, sizeof (uberblock_t));
> - grub_free (ub_array);
> - grub_free (bh);
> - grub_free (osp);
> - return data;
> + err = grub_disk_read (data->disk,
> + data->vdev_phys_sector +
> + (VDEV_PHYS_SIZE>> GRUB_DISK_SECTOR_BITS),
> + 0, VDEV_UBERBLOCK_RING, ub_array);
> + if (err)
> + {
> + grub_dprintf ("zfs", "error reading uberblock ring for label %d\n", label);
> + grub_errno = GRUB_ERR_NONE;
> + continue;
> + }
> +
> + ubcur = find_bestub (ub_array, data);
> + if (!ubcur)
> + {
> + grub_dprintf ("zfs", "No good uberblocks found in label %d\n", label);
> + grub_errno = GRUB_ERR_NONE;
> + continue;
> + }
> +
> + if (vdev_uberblock_compare (ubcur, ubbest)> 0)
> + {
> + /* Looks like the block is good, so use it. */
> + grub_memcpy (ubbest, ubcur, sizeof (*ubbest));
> + bestlabel = label;
> + grub_dprintf ("zfs", "Current best uberblock found in label %d\n", label);
> + }
> }
> - grub_error (GRUB_ERR_BAD_FS, "couldn't find a valid label");
> - zfs_unmount (data);
> grub_free (ub_array);
> - grub_free (bh);
> +
> + /*
> + * We zero'd the structure to begin with. If we never assigned to it,
> + * magic will still be zero.
> + */
> + if (!ubbest->ub_magic)
> + {
> + grub_error (GRUB_ERR_BAD_FS, "couldn't find a valid ZFS label");
> + zfs_unmount (data);
> + grub_free (ubbest);
> + return 0;
> + }
> +
> + grub_dprintf ("zfs", "ubbest %p in label %d\n", ubbest, bestlabel);
> +
> + grub_zfs_endian_t ub_endian =
> + grub_zfs_to_cpu64 (ubbest->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC
> + ? LITTLE_ENDIAN : BIG_ENDIAN;
> + err = zio_read (&ubbest->ub_rootbp, ub_endian,&osp,&ospsize, data);
> +
> + if (err)
> + {
> + grub_error (GRUB_ERR_BAD_FS, "couldn't zio_read object directory");
> + zfs_unmount (data);
> + grub_free (ubbest);
> + return 0;
> + }
> +
> + if (ospsize< OBJSET_PHYS_SIZE_V14)
> + {
> + grub_error (GRUB_ERR_BAD_FS, "osp too small");
> + zfs_unmount (data);
> + grub_free (osp);
> + grub_free (ubbest);
> + return 0;
> + }
> +
> + /* Got the MOS. Save it at the memory addr MOS. */
> + grub_memmove (&(data->mos.dn),&((objset_phys_t *) osp)->os_meta_dnode, DNODE_SIZE);
> + data->mos.endian =
> + (grub_zfs_to_cpu64 (ubbest->ub_rootbp.blk_prop, ub_endian)>> 63)& 1;
> + grub_memmove (&(data->current_uberblock), ubbest, sizeof (uberblock_t));
> +
> grub_free (osp);
> + grub_free (ubbest);
>
> - return 0;
> + return (data);
> }
>
> grub_err_t
> @@ -2149,33 +2263,18 @@
> static grub_err_t
> zfs_uuid (grub_device_t device, char **uuid)
> {
> - char *nvlist;
> - int found;
> struct grub_zfs_data *data;
> - grub_uint64_t guid;
> - grub_err_t err;
> -
> - *uuid = 0;
>
> data = zfs_mount (device);
> if (! data)
> return grub_errno;
>
> - err = zfs_fetch_nvlist (data,&nvlist);
> - if (err)
> - {
> - zfs_unmount (data);
> - return err;
> - }
> -
> - found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_GUID,&guid);
> - if (! found)
> - return grub_errno;
> - grub_free (nvlist);
> - *uuid = grub_xasprintf ("%016llx", (long long unsigned) guid);
> + *uuid = grub_xasprintf ("%016llx", (long long unsigned) data->pool_guid);
> zfs_unmount (data);
> +
> if (! *uuid)
> return grub_errno;
> +
> return GRUB_ERR_NONE;
> }
>
>
> === modified file 'include/grub/zfs/uberblock_impl.h'
> --- include/grub/zfs/uberblock_impl.h 2010-12-01 21:55:26 +0000
> +++ include/grub/zfs/uberblock_impl.h 2012-01-19 11:28:09 +0000
> @@ -23,6 +23,8 @@
> #ifndef _SYS_UBERBLOCK_IMPL_H
> #define _SYS_UBERBLOCK_IMPL_H
>
> +#define UBMAX(a,b) ((a)> (b) ? (a) : (b))
> +
> /*
> * The uberblock version is incremented whenever an incompatible on-disk
> * format change is made to the SPA, DMU, or ZAP.
> @@ -45,16 +47,10 @@
> blkptr_t ub_rootbp; /* MOS objset_phys_t */
> } uberblock_t;
>
> -#define UBERBLOCK_SIZE (1ULL<< UBERBLOCK_SHIFT)
> -#define VDEV_UBERBLOCK_SHIFT UBERBLOCK_SHIFT
> -
> -/* XXX Uberblock_phys_t is no longer in the kernel zfs */
> -typedef struct uberblock_phys {
> - uberblock_t ubp_uberblock;
> - char ubp_pad[UBERBLOCK_SIZE - sizeof (uberblock_t) -
> - sizeof (zio_eck_t)];
> - zio_eck_t ubp_zec;
> -} uberblock_phys_t;
> -
> +#define VDEV_UBERBLOCK_SHIFT(as) UBMAX(as, UBERBLOCK_SHIFT)
> +#define UBERBLOCK_SIZE(as) (1ULL<< VDEV_UBERBLOCK_SHIFT(as))
> +
> +/* Number of uberblocks that can fit in the ring at a given ashift */
> +#define UBERBLOCK_COUNT(as) (VDEV_UBERBLOCK_RING>> VDEV_UBERBLOCK_SHIFT(as))
>
> #endif /* _SYS_UBERBLOCK_IMPL_H */
>
--
Regards
Vladimir 'φ-coder/phcoder' Serbinenko
[-- Attachment #2: zfs.diff --]
[-- Type: text/x-diff, Size: 6079 bytes --]
=== modified file 'grub-core/fs/zfs/zfs.c'
--- grub-core/fs/zfs/zfs.c 2012-01-14 14:44:34 +0000
+++ grub-core/fs/zfs/zfs.c 2012-01-22 13:44:45 +0000
@@ -192,6 +192,7 @@
enum { DEVICE_LEAF, DEVICE_MIRROR, DEVICE_RAIDZ } type;
grub_uint64_t id;
grub_uint64_t guid;
+ unsigned ashift;
/* Valid only for non-leafs. */
unsigned n_children;
@@ -199,7 +200,6 @@
/* Valid only for RAIDZ. */
unsigned nparity;
- unsigned ashift;
/* Valid only for leaf devices. */
grub_device_t dev;
@@ -466,12 +466,12 @@
* Three pieces of information are needed to verify an uberblock: the magic
* number, the version number, and the checksum.
*
- * Currently Implemented: version number, magic number
- * Need to Implement: checksum
+ * Currently Implemented: version number, magic number, checksum
*
*/
static grub_err_t
-uberblock_verify (uberblock_phys_t * ub, grub_uint64_t offset)
+uberblock_verify (uberblock_phys_t * ub, grub_uint64_t offset,
+ grub_size_t s)
{
uberblock_t *uber = &ub->ubp_uberblock;
grub_err_t err;
@@ -498,7 +498,7 @@
zc.zc_word[0] = grub_cpu_to_zfs64 (offset, endian);
err = zio_checksum_verify (zc, ZIO_CHECKSUM_LABEL, endian,
- (char *) ub, UBERBLOCK_SIZE);
+ (char *) ub, s);
return err;
}
@@ -510,28 +510,37 @@
* Failure - NULL
*/
static uberblock_phys_t *
-find_bestub (uberblock_phys_t * ub_array, grub_disk_addr_t sector)
+find_bestub (uberblock_phys_t * ub_array,
+ const struct grub_zfs_device_desc *desc)
{
- uberblock_phys_t *ubbest = NULL;
+ uberblock_phys_t *ubbest = NULL, *ubptr;
int i;
grub_disk_addr_t offset;
grub_err_t err = GRUB_ERR_NONE;
-
- for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++)
+ int ub_shift;
+
+ ub_shift = desc->ashift;
+ if (ub_shift < VDEV_UBERBLOCK_SHIFT)
+ ub_shift = VDEV_UBERBLOCK_SHIFT;
+
+ for (i = 0; i < (VDEV_UBERBLOCK_RING >> ub_shift); i++)
{
- offset = (sector << SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE
- + (i << VDEV_UBERBLOCK_SHIFT);
+ offset = (desc->vdev_phys_sector << SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE
+ + (i << ub_shift);
- err = uberblock_verify (&ub_array[i], offset);
+ ubptr = (uberblock_phys_t *) ((grub_properly_aligned_t *) ub_array
+ + ((i << ub_shift)
+ / sizeof (grub_properly_aligned_t)));
+ err = uberblock_verify (ubptr, offset, 1 << ub_shift);
if (err)
{
grub_errno = GRUB_ERR_NONE;
continue;
}
if (ubbest == NULL
- || vdev_uberblock_compare (&(ub_array[i].ubp_uberblock),
+ || vdev_uberblock_compare (&(ubptr->ubp_uberblock),
&(ubbest->ubp_uberblock)) > 0)
- ubbest = &ub_array[i];
+ ubbest = ubptr;
}
if (!ubbest)
grub_errno = err;
@@ -582,7 +591,8 @@
const char *nvlist,
struct grub_zfs_device_desc *fill,
struct grub_zfs_device_desc *insert,
- int *inserted)
+ int *inserted,
+ unsigned ashift)
{
char *type;
@@ -603,6 +613,19 @@
return grub_error (GRUB_ERR_BAD_FS, "couldn't find vdev id");
}
+ {
+ grub_uint64_t par;
+ if (grub_zfs_nvlist_lookup_uint64 (nvlist, "ashift", &par))
+ fill->ashift = par;
+ else if (ashift != 0xffffffff)
+ fill->ashift = ashift;
+ else
+ {
+ grub_free (type);
+ return grub_error (GRUB_ERR_BAD_FS, "couldn't find ashift");
+ }
+ }
+
if (grub_strcmp (type, VDEV_TYPE_DISK) == 0
|| grub_strcmp (type, VDEV_TYPE_FILE) == 0)
{
@@ -616,6 +639,7 @@
fill->original = insert->original;
if (!data->device_original)
data->device_original = fill;
+ insert->ashift = fill->ashift;
*inserted = 1;
}
@@ -641,12 +665,6 @@
return grub_error (GRUB_ERR_BAD_FS, "couldn't find raidz parity");
}
fill->nparity = par;
- if (!grub_zfs_nvlist_lookup_uint64 (nvlist, "ashift", &par))
- {
- grub_free (type);
- return grub_error (GRUB_ERR_BAD_FS, "couldn't find raidz ashift");
- }
- fill->ashift = par;
}
nelm = grub_zfs_nvlist_lookup_nvlist_array_get_nelm (nvlist,
@@ -675,7 +693,7 @@
(nvlist, ZPOOL_CONFIG_CHILDREN, i);
err = fill_vdev_info_real (data, child, &fill->children[i], insert,
- inserted);
+ inserted, fill->ashift);
grub_free (child);
@@ -710,7 +728,7 @@
for (i = 0; i < data->n_devices_attached; i++)
if (data->devices_attached[i].id == id)
return fill_vdev_info_real (data, nvlist, &data->devices_attached[i],
- diskdesc, inserted);
+ diskdesc, inserted, 0xffffffff);
data->n_devices_attached++;
if (data->n_devices_attached > data->n_devices_allocated)
@@ -733,7 +751,7 @@
return fill_vdev_info_real (data, nvlist,
&data->devices_attached[data->n_devices_attached - 1],
- diskdesc, inserted);
+ diskdesc, inserted, 0xffffffff);
}
/*
@@ -908,7 +926,8 @@
desc.vdev_phys_sector
= label * (sizeof (vdev_label_t) >> SPA_MINBLOCKSHIFT)
+ ((VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE) >> SPA_MINBLOCKSHIFT)
- + (label < VDEV_LABELS / 2 ? 0 : grub_disk_get_size (dev->disk)
+ + (label < VDEV_LABELS / 2 ? 0 :
+ ALIGN_DOWN (grub_disk_get_size (dev->disk), sizeof (vdev_label_t))
- VDEV_LABELS * (sizeof (vdev_label_t) >> SPA_MINBLOCKSHIFT));
/* Read in the uberblock ring (128K). */
@@ -922,7 +941,14 @@
}
grub_dprintf ("zfs", "label ok %d\n", label);
- ubbest = find_bestub (ub_array, desc.vdev_phys_sector);
+ err = check_pool_label (data, &desc, inserted);
+ if (err || !*inserted)
+ {
+ grub_errno = GRUB_ERR_NONE;
+ continue;
+ }
+
+ ubbest = find_bestub (ub_array, &desc);
if (!ubbest)
{
grub_dprintf ("zfs", "No uberblock found\n");
@@ -936,12 +962,6 @@
grub_memmove (&(data->current_uberblock),
&ubbest->ubp_uberblock, sizeof (uberblock_t));
- err = check_pool_label (data, &desc, inserted);
- if (err)
- {
- grub_errno = GRUB_ERR_NONE;
- continue;
- }
#if 0
if (find_best_root &&
vdev_uberblock_compare (&ubbest->ubp_uberblock,
next prev parent reply other threads:[~2012-01-22 14:18 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-09-19 18:45 [Patch] Robustly search for ZFS labels & uberblocks Zachary Bedell
2011-09-28 21:20 ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-19 11:36 ` Richard Laager
2012-01-22 14:18 ` Vladimir 'φ-coder/phcoder' Serbinenko [this message]
2012-01-22 20:31 ` Richard Laager
2012-01-24 7:12 ` Richard Laager
2012-01-27 19:04 ` Zachary Bedell
2012-01-27 22:22 ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-28 2:50 ` Richard Laager
2012-01-28 12:51 ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-28 16:50 ` Richard Laager
2012-01-28 17:06 ` Darik Horn
2012-01-28 17:39 ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-28 18:33 ` Richard Laager
2012-01-28 19:21 ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-29 22:42 ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-31 8:45 ` Richard Laager
2012-02-02 11:13 ` Richard Laager
2012-02-03 10:02 ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-02-03 9:52 ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-02-03 11:20 ` Richard Laager
2012-01-28 18:40 ` Darik Horn
2012-01-28 19:27 ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-30 1:22 ` Richard Laager
2012-01-30 1:43 ` Vladimir 'φ-coder/phcoder' Serbinenko
2011-11-03 14:45 ` Vladimir 'φ-coder/phcoder' Serbinenko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4F1C1ABD.1010303@gmail.com \
--to=phcoder@gmail.com \
--cc=grub-devel@gnu.org \
--cc=pendorbound@gmail.com \
--cc=rlaager@wiktel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).