grub-devel.gnu.org archive mirror
 help / color / mirror / Atom feed
From: "Vladimir 'φ-coder/phcoder' Serbinenko" <phcoder@gmail.com>
To: Richard Laager <rlaager@wiktel.com>
Cc: grub-devel@gnu.org, Zachary Bedell <pendorbound@gmail.com>
Subject: Re: [Patch] Robustly search for ZFS labels & uberblocks
Date: Sun, 22 Jan 2012 15:18:37 +0100	[thread overview]
Message-ID: <4F1C1ABD.1010303@gmail.com> (raw)
In-Reply-To: <1326973014.6387.224.camel@watermelon.coderich.net>

[-- Attachment #1: Type: text/plain, Size: 18895 bytes --]

I've cleaned this patch up (attached). Can you test it on these 4K FSes 
or tell me how to create one w/o really having such a disk?
On 19.01.2012 12:36, Richard Laager wrote:
>   static grub_err_t
> -uberblock_verify (uberblock_phys_t * ub, int offset)
> +uberblock_verify (uberblock_t * uber, int offset, struct grub_zfs_data *data)
>   {
> -  uberblock_t *uber =&ub->ubp_uberblock;
>     grub_err_t err;
>     grub_zfs_endian_t endian = UNKNOWN_ENDIAN;
>     zio_cksum_t zc;
> +  void *osp = NULL;
> +  grub_size_t ospsize;
> +
> +  if (uber->ub_txg<  data->label_txg)
> +    return grub_error (GRUB_ERR_BAD_FS,
> +	"ignoring partially written label: uber_txg<  label_txg");
>   
I've left this part out since outside loop already looks for the newest txg.
>     if (grub_zfs_to_cpu64 (uber->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC
>         &&  grub_zfs_to_cpu64 (uber->ub_version, LITTLE_ENDIAN)>  0
> @@ -361,7 +390,19 @@
>
>     zc.zc_word[0] = grub_cpu_to_zfs64 (offset, endian);
>     err = zio_checksum_verify (zc, ZIO_CHECKSUM_LABEL, endian,
> -			     (char *) ub, UBERBLOCK_SIZE);
> +			     (char *) uber, UBERBLOCK_SIZE(data->vdev_ashift));
> +
> +  if (!err)
> +    {
> +      /* Check that the data pointed by the rootbp is usable. */
> +      void *osp = NULL;
> +      grub_size_t ospsize;
> +      err = zio_read (&uber->ub_rootbp, endian,&osp,&ospsize, data);
> +      grub_free (osp);
> +
> +      if (!err&&  ospsize<  OBJSET_PHYS_SIZE_V14)
> +	return grub_error (GRUB_ERR_BAD_FS, "uberblock rootbp points to invalid data");
> +    }
>   
Left this one out: why would it happen that uberblock checksum is valid 
but the other data in it isn't?
>     return err;
>   }
> @@ -372,32 +413,40 @@
>    *    Success - Pointer to the best uberblock.
>    *    Failure - NULL
>    */
> -static uberblock_phys_t *
> -find_bestub (uberblock_phys_t * ub_array, grub_disk_addr_t sector)
> +static uberblock_t *
> +find_bestub (char *ub_array, struct grub_zfs_data *data)
>   {
> -  uberblock_phys_t *ubbest = NULL;
> -  int i;
> -  grub_disk_addr_t offset;
> +  const grub_disk_addr_t sector = data->vdev_phys_sector;
> +  uberblock_t *ubbest = NULL;
> +  uberblock_t *ubnext;
> +  unsigned int i, offset, pickedub = 0;
>     grub_err_t err = GRUB_ERR_NONE;
>
> -  for (i = 0; i<  (VDEV_UBERBLOCK_RING>>  VDEV_UBERBLOCK_SHIFT); i++)
> +  const unsigned int UBCOUNT = UBERBLOCK_COUNT (data->vdev_ashift);
> +  const grub_uint64_t UBBYTES = UBERBLOCK_SIZE (data->vdev_ashift);
> +
> +  for (i = 0; i<  UBCOUNT; i++)
>       {
> -      offset = (sector<<  SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE
> -	+ (i<<  VDEV_UBERBLOCK_SHIFT);
> +      ubnext = (uberblock_t *) (i * UBBYTES + ub_array);
> +      offset = (sector<<  SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE + (i * UBBYTES);
>   
This is wrong with respect to alignment invariants.
> -      err = uberblock_verify (&ub_array[i], offset);
> +      err = uberblock_verify (ubnext, offset, data);
>         if (err)
>   	{
>   	  grub_errno = GRUB_ERR_NONE;
>   	  continue;
>   	}
> -      if (ubbest == NULL
> -	  || vdev_uberblock_compare (&(ub_array[i].ubp_uberblock),
> -				&(ubbest->ubp_uberblock))>  0)
> -	ubbest =&ub_array[i];
> +      if (ubbest == NULL || vdev_uberblock_compare (ubnext, ubbest)>  0)
> +	{
> +	  ubbest = ubnext;
> +	  pickedub = i;
> +	}
Skipped, it sets the variable but never uses it.
>       }
>     if (!ubbest)
>       grub_errno = err;
> +  else
> +    grub_dprintf ("zfs", "Found best uberblock at idx %d, txg %llu\n",
> +		  pickedub, (unsigned long long) ubbest->ub_txg);
>   
Skipped, not needed.
>     return (ubbest);
>   }
> @@ -515,8 +564,18 @@
>   	  sector = DVA_OFFSET_TO_PHYS_SECTOR (offset);
>   	  err = grub_disk_read (data->disk, sector, 0, psize, buf);
>   	}
> +
>         if (!err)
> -	return GRUB_ERR_NONE;
> +	{
> +	  /* Check the underlying checksum before we rule this DVA as "good" */
> +	  grub_uint32_t checkalgo = (grub_zfs_to_cpu64 ((bp)->blk_prop, endian)>>  40)&  0xff;
> +	  err = zio_checksum_verify (bp->blk_cksum, checkalgo, endian, buf, psize);
> +	  if (!err)
> +	    return GRUB_ERR_NONE;
> +	}
> +
> +      /* If read failed or checksum bad, reset the error.  Hopefully we've got
> +       * some more DVA's to try. */
Skipped, already commented on this one.
>         grub_errno = GRUB_ERR_NONE;
>       }
>
> @@ -539,12 +598,9 @@
>     unsigned int comp;
>     char *compbuf = NULL;
>     grub_err_t err;
> -  zio_cksum_t zc = bp->blk_cksum;
> -  grub_uint32_t checksum;
>
>     *buf = NULL;
>
> -  checksum = (grub_zfs_to_cpu64((bp)->blk_prop, endian)>>  40)&  0xff;
>     comp = (grub_zfs_to_cpu64((bp)->blk_prop, endian)>>32)&  0xff;
>     lsize = (BP_IS_HOLE(bp) ? 0 :
>   	   (((grub_zfs_to_cpu64 ((bp)->blk_prop, endian)&  0xffff) + 1)
> @@ -580,15 +636,6 @@
>         return err;
>       }
>
> -  err = zio_checksum_verify (zc, checksum, endian, compbuf, psize);
> -  if (err)
> -    {
> -      grub_dprintf ("zfs", "incorrect checksum\n");
> -      grub_free (compbuf);
> -      *buf = NULL;
> -      return err;
> -    }
> -
>     if (comp != ZIO_COMPRESS_OFF)
>       {
>         *buf = grub_malloc (lsize);
> @@ -1864,11 +1911,9 @@
>   static grub_err_t
>   check_pool_label (struct grub_zfs_data *data)
>   {
> -  grub_uint64_t pool_state, txg = 0;
> -  char *nvlist;
> -#if 0
> -  char *nv;
> -#endif
> +  grub_uint64_t pool_state;
> +  char *nvlist;			/* for the pool */
> +  char *vdevnvlist;		/* for the vdev */
>     grub_uint64_t diskguid;
>     grub_uint64_t version;
>     int found;
> @@ -1898,7 +1943,9 @@
>       }
>     grub_dprintf ("zfs", "check 4 passed\n");
>
> -  found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_TXG,&txg);
> +  data->label_txg = 0;
> +  found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_TXG,
> +					&data->label_txg);
Not needed. Skipped.
>     if (!found)
>       {
>         grub_free (nvlist);
> @@ -1909,7 +1956,7 @@
>     grub_dprintf ("zfs", "check 6 passed\n");
>
>     /* not an active device */
> -  if (txg == 0)
> +  if (data->label_txg == 0)
>       {
>         grub_free (nvlist);
>         return grub_error (GRUB_ERR_BAD_FS, "zpool isn't active");
> @@ -1931,20 +1978,32 @@
>       {
>         grub_free (nvlist);
>         return grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET,
> -			 "too new version %llu>  %llu",
> +			 "SPA version too new %llu>  %llu",
>   			 (unsigned long long) version,
>   			 (unsigned long long) SPA_VERSION);
>       }
>     grub_dprintf ("zfs", "check 9 passed\n");
> -#if 0
> -  if (nvlist_lookup_value (nvlist, ZPOOL_CONFIG_VDEV_TREE,&nv,
> -			   DATA_TYPE_NVLIST, NULL))
> +
> +  vdevnvlist = grub_zfs_nvlist_lookup_nvlist (nvlist, ZPOOL_CONFIG_VDEV_TREE);
> +  if (!vdevnvlist)
>       {
> -      grub_free (vdev);
> -      return (GRUB_ERR_BAD_FS);
> +      grub_free (nvlist);
> +      if (!grub_errno)
> +	grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_VDEV_TREE " not found");
> +      return grub_errno;
>       }
>     grub_dprintf ("zfs", "check 10 passed\n");
> -#endif
> +
> +  found = grub_zfs_nvlist_lookup_uint64 (vdevnvlist, ZPOOL_CONFIG_ASHIFT,
> +					&data->vdev_ashift);
> +  grub_free (vdevnvlist);
> +  if (!found)
> +    {
> +      grub_free (nvlist);
> +      if (!grub_errno)
> +	grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_ASHIFT " not found");
> +      return grub_errno;
> +    }
>
>     found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_GUID,&diskguid);
>     if (! found)
> @@ -1956,11 +2015,42 @@
>       }
>     grub_dprintf ("zfs", "check 11 passed\n");
>
> +  found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_GUID,&data->pool_guid);
> +  if (! found)
> +    {
> +      grub_free (nvlist);
> +      if (! grub_errno)
> +        grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_POOL_GUID " not found");
> +      return grub_errno;
> +    }
> +   grub_dprintf ("zfs", "check 12 passed\n");
> +
> +
This check is already in. Skipped.
>     grub_free (nvlist);
>
> +  grub_dprintf ("zfs", "ZFS Pool GUID: %llu (%016llx) Label: GUID: %llu (%016llx), txg: %llu, SPA v%llu, ashift: %llu\n",
> +    (unsigned long long) data->pool_guid,
> +    (unsigned long long) data->pool_guid,
> +    (unsigned long long) diskguid,
> +    (unsigned long long) diskguid,
> +    (unsigned long long) data->label_txg,
> +    (unsigned long long) version,
> +    (unsigned long long) data->vdev_ashift);
> +
Useless. Skipped.
>     return GRUB_ERR_NONE;
>   }
>
> +/*
> + * vdev_label_start returns the physical disk offset (in bytes) of
> + * label "l".
> + */
> +static grub_uint64_t vdev_label_start (grub_uint64_t psize, int l)
> +{
> +  return (l * sizeof (vdev_label_t) + (l<  VDEV_LABELS / 2 ?
> +				       0 : psize -
> +				       VDEV_LABELS * sizeof (vdev_label_t)));
> +}
> +
>   static void
>   zfs_unmount (struct grub_zfs_data *data)
>   {
> @@ -1979,13 +2069,13 @@
>   zfs_mount (grub_device_t dev)
>   {
>     struct grub_zfs_data *data = 0;
> -  int label = 0;
> -  uberblock_phys_t *ub_array, *ubbest = NULL;
> -  vdev_boot_header_t *bh;
> +  int label = 0, bestlabel = -1;
> +  char *ub_array;
> +  uberblock_t *ubbest;
> +  uberblock_t *ubcur = NULL;
>     void *osp = 0;
>     grub_size_t ospsize;
>     grub_err_t err;
> -  int vdevnum;
>
>     if (! dev->disk)
>       {
> @@ -1997,11 +2087,6 @@
>     if (!data)
>       return 0;
>     grub_memset (data, 0, sizeof (*data));
> -#if 0
> -  /* if it's our first time here, zero the best uberblock out */
> -  if (data->best_drive == 0&&  data->best_part == 0&&  find_best_root)
> -    grub_memset (&current_uberblock, 0, sizeof (uberblock_t));
> -#endif
>
>     data->disk = dev->disk;
>
> @@ -2012,100 +2097,129 @@
>         return 0;
>       }
>
> -  bh = grub_malloc (VDEV_BOOT_HEADER_SIZE);
> -  if (!bh)
> +  ubbest = grub_malloc (sizeof (*ubbest));
> +  if (!ubbest)
>       {
>         zfs_unmount (data);
> -      grub_free (ub_array);
>         return 0;
>       }
> -
> -  vdevnum = VDEV_LABELS;
> -
> -  /* Don't check back labels on CDROM.  */
> -  if (grub_disk_get_size (dev->disk) == GRUB_DISK_SIZE_UNKNOWN)
> -    vdevnum = VDEV_LABELS / 2;
> -
> -  for (label = 0; ubbest == NULL&&  label<  vdevnum; label++)
> +  grub_memset (ubbest, 0, sizeof (*ubbest));
> +
> +  /* Establish some constants for where things are on the device: */
> +
> +  /*
> +   *  Some eltorito stacks don't give us a size and we end up setting the
> +   * size to MAXUINT, further some of these devices stop working once a single
> +   * read past the end has been issued. Checking for a maximum part_length and
> +   * skipping the backup labels at the end of the slice/partition/device
> +   * avoids breaking down on such devices.
> +   */
> +  const int vdevnum =
> +    grub_disk_get_size (dev->disk) == GRUB_DISK_SIZE_UNKNOWN ?
> +    VDEV_LABELS / 2 : VDEV_LABELS;
> +
> +  /* Size in bytes of the device (disk or partition) aligned to label size */
> +  grub_uint64_t device_size =
> +	grub_disk_get_size (dev->disk)<<  GRUB_DISK_SECTOR_BITS;
> +  const grub_uint64_t alignedbytes =
> +    P2ALIGN (device_size, (grub_uint64_t) sizeof (vdev_label_t));
> +
> +  for (label = 0; label<  vdevnum; label++)
>       {
> -      grub_zfs_endian_t ub_endian = UNKNOWN_ENDIAN;
> -      grub_dprintf ("zfs", "label %d\n", label);
> -
> -      data->vdev_phys_sector
> -	= label * (sizeof (vdev_label_t)>>  SPA_MINBLOCKSHIFT)
> -	+ ((VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE)>>  SPA_MINBLOCKSHIFT)
> -	+ (label<  VDEV_LABELS / 2 ? 0 : grub_disk_get_size (dev->disk)
> -	   - VDEV_LABELS * (sizeof (vdev_label_t)>>  SPA_MINBLOCKSHIFT));
> +      grub_uint64_t labelstartbytes = vdev_label_start (alignedbytes, label);
> +      grub_uint64_t labelstart = labelstartbytes>>  GRUB_DISK_SECTOR_BITS;
> +
> +      grub_dprintf ("zfs", "reading label %d at sector %llu (byte %llu)\n",
> +		    label, (unsigned long long) labelstart,
> +		    (unsigned long long) labelstartbytes);
> +
> +      data->vdev_phys_sector = labelstart +
> +		((VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE)>>  GRUB_DISK_SECTOR_BITS);
> +
> +      err = check_pool_label (data);
> +      if (err)
> +	{
> +	  grub_dprintf ("zfs", "error checking label %d\n", label);
> +	  grub_errno = GRUB_ERR_NONE;
> +	  continue;
> +	}
>   
This seems like a lot of moving around but I don't see any real change 
other than aligning the size down.
>         /* Read in the uberblock ring (128K). */
> -      err = grub_disk_read (data->disk, data->vdev_phys_sector
> -			    + (VDEV_PHYS_SIZE>>  SPA_MINBLOCKSHIFT),
> -			    0, VDEV_UBERBLOCK_RING, (char *) ub_array);
> -      if (err)
> -	{
> -	  grub_errno = GRUB_ERR_NONE;
> -	  continue;
> -	}
> -      grub_dprintf ("zfs", "label ok %d\n", label);
> -
> -      ubbest = find_bestub (ub_array, data->vdev_phys_sector);
> -      if (!ubbest)
> -	{
> -	  grub_dprintf ("zfs", "No uberblock found\n");
> -	  grub_errno = GRUB_ERR_NONE;
> -	  continue;
> -	}
> -      ub_endian = (grub_zfs_to_cpu64 (ubbest->ubp_uberblock.ub_magic,
> -				     LITTLE_ENDIAN) == UBERBLOCK_MAGIC
> -		   ? LITTLE_ENDIAN : BIG_ENDIAN);
> -      err = zio_read (&ubbest->ubp_uberblock.ub_rootbp,
> -		      ub_endian,
> -		&osp,&ospsize, data);
> -      if (err)
> -	{
> -	  grub_dprintf ("zfs", "couldn't zio_read\n");
> -	  grub_errno = GRUB_ERR_NONE;
> -	  continue;
> -	}
> -
> -      if (ospsize<  OBJSET_PHYS_SIZE_V14)
> -	{
> -	  grub_dprintf ("zfs", "osp too small\n");
> -	  grub_free (osp);
> -	  continue;
> -	}
> -      grub_dprintf ("zfs", "ubbest %p\n", ubbest);
> -
> -      err = check_pool_label (data);
> -      if (err)
> -	{
> -	  grub_errno = GRUB_ERR_NONE;
> -	  continue;
> -	}
> -#if 0
> -      if (find_best_root&&
> -	  vdev_uberblock_compare (&ubbest->ubp_uberblock,
> -				&(current_uberblock))<= 0)
> -	continue;
> -#endif
> -      /* Got the MOS. Save it at the memory addr MOS. */
> -      grub_memmove (&(data->mos.dn),&((objset_phys_t *) osp)->os_meta_dnode,
> -		    DNODE_SIZE);
> -      data->mos.endian = (grub_zfs_to_cpu64 (ubbest->ubp_uberblock.ub_rootbp.blk_prop, ub_endian)>>  63)&  1;
> -      grub_memmove (&(data->current_uberblock),
> -		&ubbest->ubp_uberblock, sizeof (uberblock_t));
> -      grub_free (ub_array);
> -      grub_free (bh);
> -      grub_free (osp);
> -      return data;
> +      err = grub_disk_read (data->disk,
> +			    data->vdev_phys_sector +
> +			    (VDEV_PHYS_SIZE>>  GRUB_DISK_SECTOR_BITS),
> +			    0, VDEV_UBERBLOCK_RING, ub_array);
> +      if (err)
> +	{
> +	  grub_dprintf ("zfs", "error reading uberblock ring for label %d\n", label);
> +	  grub_errno = GRUB_ERR_NONE;
> +	  continue;
> +	}
> +
> +      ubcur = find_bestub (ub_array, data);
> +      if (!ubcur)
> +	{
> +	  grub_dprintf ("zfs", "No good uberblocks found in label %d\n", label);
> +	  grub_errno = GRUB_ERR_NONE;
> +	  continue;
> +	}
> +
> +      if (vdev_uberblock_compare (ubcur, ubbest)>  0)
> +	{
> +	  /* Looks like the block is good, so use it. */
> +	  grub_memcpy (ubbest, ubcur, sizeof (*ubbest));
> +	  bestlabel = label;
> +	  grub_dprintf ("zfs", "Current best uberblock found in label %d\n", label);
> +	}
>       }
> -  grub_error (GRUB_ERR_BAD_FS, "couldn't find a valid label");
> -  zfs_unmount (data);
>     grub_free (ub_array);
> -  grub_free (bh);
> +
> +  /*
> +   * We zero'd the structure to begin with.  If we never assigned to it,
> +   * magic will still be zero.
> +   */
> +  if (!ubbest->ub_magic)
> +    {
> +      grub_error (GRUB_ERR_BAD_FS, "couldn't find a valid ZFS label");
> +      zfs_unmount (data);
> +      grub_free (ubbest);
> +      return 0;
> +    }
> +
> +  grub_dprintf ("zfs", "ubbest %p in label %d\n", ubbest, bestlabel);
> +
> +  grub_zfs_endian_t ub_endian =
> +    grub_zfs_to_cpu64 (ubbest->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC
> +    ? LITTLE_ENDIAN : BIG_ENDIAN;
> +  err = zio_read (&ubbest->ub_rootbp, ub_endian,&osp,&ospsize, data);
> +
> +  if (err)
> +    {
> +      grub_error (GRUB_ERR_BAD_FS, "couldn't zio_read object directory");
> +      zfs_unmount (data);
> +      grub_free (ubbest);
> +      return 0;
> +    }
> +
> +  if (ospsize<  OBJSET_PHYS_SIZE_V14)
> +    {
> +      grub_error (GRUB_ERR_BAD_FS, "osp too small");
> +      zfs_unmount (data);
> +      grub_free (osp);
> +      grub_free (ubbest);
> +      return 0;
> +    }
> +
> +  /* Got the MOS. Save it at the memory addr MOS. */
> +  grub_memmove (&(data->mos.dn),&((objset_phys_t *) osp)->os_meta_dnode, DNODE_SIZE);
> +  data->mos.endian =
> +		(grub_zfs_to_cpu64 (ubbest->ub_rootbp.blk_prop, ub_endian)>>  63)&  1;
> +  grub_memmove (&(data->current_uberblock), ubbest, sizeof (uberblock_t));
> +
>     grub_free (osp);
> +  grub_free (ubbest);
>
> -  return 0;
> +  return (data);
>   }
>
>   grub_err_t
> @@ -2149,33 +2263,18 @@
>   static grub_err_t
>   zfs_uuid (grub_device_t device, char **uuid)
>   {
> -  char *nvlist;
> -  int found;
>     struct grub_zfs_data *data;
> -  grub_uint64_t guid;
> -  grub_err_t err;
> -
> -  *uuid = 0;
>
>     data = zfs_mount (device);
>     if (! data)
>       return grub_errno;
>
> -  err = zfs_fetch_nvlist (data,&nvlist);
> -  if (err)
> -    {
> -      zfs_unmount (data);
> -      return err;
> -    }
> -
> -  found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_GUID,&guid);
> -  if (! found)
> -    return grub_errno;
> -  grub_free (nvlist);
> -  *uuid = grub_xasprintf ("%016llx", (long long unsigned) guid);
> +  *uuid = grub_xasprintf ("%016llx", (long long unsigned) data->pool_guid);
>     zfs_unmount (data);
> +
>     if (! *uuid)
>       return grub_errno;
> +
>     return GRUB_ERR_NONE;
>   }
>
>
> === modified file 'include/grub/zfs/uberblock_impl.h'
> --- include/grub/zfs/uberblock_impl.h	2010-12-01 21:55:26 +0000
> +++ include/grub/zfs/uberblock_impl.h	2012-01-19 11:28:09 +0000
> @@ -23,6 +23,8 @@
>   #ifndef _SYS_UBERBLOCK_IMPL_H
>   #define	_SYS_UBERBLOCK_IMPL_H
>
> +#define UBMAX(a,b) ((a)>  (b) ? (a) : (b))
> +
>   /*
>    * The uberblock version is incremented whenever an incompatible on-disk
>    * format change is made to the SPA, DMU, or ZAP.
> @@ -45,16 +47,10 @@
>   	blkptr_t	ub_rootbp;	/* MOS objset_phys_t		*/
>   } uberblock_t;
>
> -#define	UBERBLOCK_SIZE		(1ULL<<  UBERBLOCK_SHIFT)
> -#define	VDEV_UBERBLOCK_SHIFT	UBERBLOCK_SHIFT
> -
> -/* XXX Uberblock_phys_t is no longer in the kernel zfs */
> -typedef struct uberblock_phys {
> -	uberblock_t	ubp_uberblock;
> -	char		ubp_pad[UBERBLOCK_SIZE - sizeof (uberblock_t) -
> -				sizeof (zio_eck_t)];
> -	zio_eck_t	ubp_zec;
> -} uberblock_phys_t;
> -
> +#define	VDEV_UBERBLOCK_SHIFT(as)	UBMAX(as, UBERBLOCK_SHIFT)
> +#define	UBERBLOCK_SIZE(as)		(1ULL<<  VDEV_UBERBLOCK_SHIFT(as))
> +
> +/* Number of uberblocks that can fit in the ring at a given ashift */
> +#define UBERBLOCK_COUNT(as) (VDEV_UBERBLOCK_RING>>  VDEV_UBERBLOCK_SHIFT(as))
>
>   #endif	/* _SYS_UBERBLOCK_IMPL_H */
>


-- 
Regards
Vladimir 'φ-coder/phcoder' Serbinenko


[-- Attachment #2: zfs.diff --]
[-- Type: text/x-diff, Size: 6079 bytes --]

=== modified file 'grub-core/fs/zfs/zfs.c'
--- grub-core/fs/zfs/zfs.c	2012-01-14 14:44:34 +0000
+++ grub-core/fs/zfs/zfs.c	2012-01-22 13:44:45 +0000
@@ -192,6 +192,7 @@
   enum { DEVICE_LEAF, DEVICE_MIRROR, DEVICE_RAIDZ } type;
   grub_uint64_t id;
   grub_uint64_t guid;
+  unsigned ashift;
 
   /* Valid only for non-leafs.  */
   unsigned n_children;
@@ -199,7 +200,6 @@
 
   /* Valid only for RAIDZ.  */
   unsigned nparity;
-  unsigned ashift;
 
   /* Valid only for leaf devices.  */
   grub_device_t dev;
@@ -466,12 +466,12 @@
  * Three pieces of information are needed to verify an uberblock: the magic
  * number, the version number, and the checksum.
  *
- * Currently Implemented: version number, magic number
- * Need to Implement: checksum
+ * Currently Implemented: version number, magic number, checksum
  *
  */
 static grub_err_t
-uberblock_verify (uberblock_phys_t * ub, grub_uint64_t offset)
+uberblock_verify (uberblock_phys_t * ub, grub_uint64_t offset,
+		  grub_size_t s)
 {
   uberblock_t *uber = &ub->ubp_uberblock;
   grub_err_t err;
@@ -498,7 +498,7 @@
 
   zc.zc_word[0] = grub_cpu_to_zfs64 (offset, endian);
   err = zio_checksum_verify (zc, ZIO_CHECKSUM_LABEL, endian,
-			     (char *) ub, UBERBLOCK_SIZE);
+			     (char *) ub, s);
 
   return err;
 }
@@ -510,28 +510,37 @@
  *    Failure - NULL
  */
 static uberblock_phys_t *
-find_bestub (uberblock_phys_t * ub_array, grub_disk_addr_t sector)
+find_bestub (uberblock_phys_t * ub_array,
+	     const struct grub_zfs_device_desc *desc)
 {
-  uberblock_phys_t *ubbest = NULL;
+  uberblock_phys_t *ubbest = NULL, *ubptr;
   int i;
   grub_disk_addr_t offset;
   grub_err_t err = GRUB_ERR_NONE;
-
-  for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++)
+  int ub_shift;
+
+  ub_shift = desc->ashift;
+  if (ub_shift < VDEV_UBERBLOCK_SHIFT)
+    ub_shift = VDEV_UBERBLOCK_SHIFT;
+
+  for (i = 0; i < (VDEV_UBERBLOCK_RING >> ub_shift); i++)
     {
-      offset = (sector << SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE
-	+ (i << VDEV_UBERBLOCK_SHIFT);
+      offset = (desc->vdev_phys_sector << SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE
+	+ (i << ub_shift);
 
-      err = uberblock_verify (&ub_array[i], offset);
+      ubptr = (uberblock_phys_t *) ((grub_properly_aligned_t *) ub_array
+				    + ((i << ub_shift)
+				       / sizeof (grub_properly_aligned_t)));
+      err = uberblock_verify (ubptr, offset, 1 << ub_shift);
       if (err)
 	{
 	  grub_errno = GRUB_ERR_NONE;
 	  continue;
 	}
       if (ubbest == NULL 
-	  || vdev_uberblock_compare (&(ub_array[i].ubp_uberblock),
+	  || vdev_uberblock_compare (&(ubptr->ubp_uberblock),
 				     &(ubbest->ubp_uberblock)) > 0)
-	ubbest = &ub_array[i];
+	ubbest = ubptr;
     }
   if (!ubbest)
     grub_errno = err;
@@ -582,7 +591,8 @@
 		     const char *nvlist,
 		     struct grub_zfs_device_desc *fill,
 		     struct grub_zfs_device_desc *insert,
-		     int *inserted)
+		     int *inserted,
+		     unsigned ashift)
 {
   char *type;
 
@@ -603,6 +613,19 @@
       return grub_error (GRUB_ERR_BAD_FS, "couldn't find vdev id");
     }
 
+  {
+    grub_uint64_t par;
+    if (grub_zfs_nvlist_lookup_uint64 (nvlist, "ashift", &par))
+      fill->ashift = par;
+    else if (ashift != 0xffffffff)
+      fill->ashift = ashift;
+    else
+      {
+	grub_free (type);
+	return grub_error (GRUB_ERR_BAD_FS, "couldn't find ashift");
+      }
+  }
+
   if (grub_strcmp (type, VDEV_TYPE_DISK) == 0
       || grub_strcmp (type, VDEV_TYPE_FILE) == 0)
     {
@@ -616,6 +639,7 @@
 	  fill->original = insert->original;
 	  if (!data->device_original)
 	    data->device_original = fill;
+	  insert->ashift = fill->ashift;
 	  *inserted = 1;
 	}
 
@@ -641,12 +665,6 @@
 	      return grub_error (GRUB_ERR_BAD_FS, "couldn't find raidz parity");
 	    }
 	  fill->nparity = par;
-	  if (!grub_zfs_nvlist_lookup_uint64 (nvlist, "ashift", &par))
-	    {
-	      grub_free (type);
-	      return grub_error (GRUB_ERR_BAD_FS, "couldn't find raidz ashift");
-	    }
-	  fill->ashift = par;
 	}
 
       nelm = grub_zfs_nvlist_lookup_nvlist_array_get_nelm (nvlist,
@@ -675,7 +693,7 @@
 	    (nvlist, ZPOOL_CONFIG_CHILDREN, i);
 
 	  err = fill_vdev_info_real (data, child, &fill->children[i], insert,
-				     inserted);
+				     inserted, fill->ashift);
 
 	  grub_free (child);
 
@@ -710,7 +728,7 @@
   for (i = 0; i < data->n_devices_attached; i++)
     if (data->devices_attached[i].id == id)
       return fill_vdev_info_real (data, nvlist, &data->devices_attached[i],
-				  diskdesc, inserted);
+				  diskdesc, inserted, 0xffffffff);
 
   data->n_devices_attached++;
   if (data->n_devices_attached > data->n_devices_allocated)
@@ -733,7 +751,7 @@
 
   return fill_vdev_info_real (data, nvlist,
 			      &data->devices_attached[data->n_devices_attached - 1],
-			      diskdesc, inserted);
+			      diskdesc, inserted, 0xffffffff);
 }
 
 /*
@@ -908,7 +926,8 @@
       desc.vdev_phys_sector
 	= label * (sizeof (vdev_label_t) >> SPA_MINBLOCKSHIFT)
 	+ ((VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE) >> SPA_MINBLOCKSHIFT)
-	+ (label < VDEV_LABELS / 2 ? 0 : grub_disk_get_size (dev->disk)
+	+ (label < VDEV_LABELS / 2 ? 0 : 
+	   ALIGN_DOWN (grub_disk_get_size (dev->disk), sizeof (vdev_label_t))
 	   - VDEV_LABELS * (sizeof (vdev_label_t) >> SPA_MINBLOCKSHIFT));
 
       /* Read in the uberblock ring (128K). */
@@ -922,7 +941,14 @@
 	}
       grub_dprintf ("zfs", "label ok %d\n", label);
 
-      ubbest = find_bestub (ub_array, desc.vdev_phys_sector);
+      err = check_pool_label (data, &desc, inserted);
+      if (err || !*inserted)
+	{
+	  grub_errno = GRUB_ERR_NONE;
+	  continue;
+	}
+
+      ubbest = find_bestub (ub_array, &desc);
       if (!ubbest)
 	{
 	  grub_dprintf ("zfs", "No uberblock found\n");
@@ -936,12 +962,6 @@
 	grub_memmove (&(data->current_uberblock),
 		      &ubbest->ubp_uberblock, sizeof (uberblock_t));
 
-      err = check_pool_label (data, &desc, inserted);
-      if (err)
-	{
-	  grub_errno = GRUB_ERR_NONE;
-	  continue;
-	}
 #if 0
       if (find_best_root &&
 	  vdev_uberblock_compare (&ubbest->ubp_uberblock,


  reply	other threads:[~2012-01-22 14:18 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-09-19 18:45 [Patch] Robustly search for ZFS labels & uberblocks Zachary Bedell
2011-09-28 21:20 ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-19 11:36   ` Richard Laager
2012-01-22 14:18     ` Vladimir 'φ-coder/phcoder' Serbinenko [this message]
2012-01-22 20:31       ` Richard Laager
2012-01-24  7:12       ` Richard Laager
2012-01-27 19:04       ` Zachary Bedell
2012-01-27 22:22         ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-28  2:50         ` Richard Laager
2012-01-28 12:51           ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-28 16:50             ` Richard Laager
2012-01-28 17:06               ` Darik Horn
2012-01-28 17:39                 ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-28 18:33             ` Richard Laager
2012-01-28 19:21               ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-29 22:42               ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-31  8:45                 ` Richard Laager
2012-02-02 11:13                   ` Richard Laager
2012-02-03 10:02                     ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-02-03  9:52                   ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-02-03 11:20                     ` Richard Laager
2012-01-28 18:40             ` Darik Horn
2012-01-28 19:27               ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-30  1:22             ` Richard Laager
2012-01-30  1:43               ` Vladimir 'φ-coder/phcoder' Serbinenko
2011-11-03 14:45 ` Vladimir 'φ-coder/phcoder' Serbinenko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4F1C1ABD.1010303@gmail.com \
    --to=phcoder@gmail.com \
    --cc=grub-devel@gnu.org \
    --cc=pendorbound@gmail.com \
    --cc=rlaager@wiktel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).