All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Vladimir 'φ-coder/phcoder' Serbinenko" <phcoder@gmail.com>
To: Richard Laager <rlaager@wiktel.com>
Cc: grub-devel@gnu.org, Zachary Bedell <pendorbound@gmail.com>
Subject: Re: [Patch] Robustly search for ZFS labels & uberblocks
Date: Sun, 22 Jan 2012 15:18:37 +0100	[thread overview]
Message-ID: <4F1C1ABD.1010303@gmail.com> (raw)
In-Reply-To: <1326973014.6387.224.camel@watermelon.coderich.net>

[-- Attachment #1: Type: text/plain, Size: 18895 bytes --]

I've cleaned this patch up (attached). Can you test it on these 4K FSes 
or tell me how to create one w/o really having such a disk?
On 19.01.2012 12:36, Richard Laager wrote:
>   static grub_err_t
> -uberblock_verify (uberblock_phys_t * ub, int offset)
> +uberblock_verify (uberblock_t * uber, int offset, struct grub_zfs_data *data)
>   {
> -  uberblock_t *uber =&ub->ubp_uberblock;
>     grub_err_t err;
>     grub_zfs_endian_t endian = UNKNOWN_ENDIAN;
>     zio_cksum_t zc;
> +  void *osp = NULL;
> +  grub_size_t ospsize;
> +
> +  if (uber->ub_txg<  data->label_txg)
> +    return grub_error (GRUB_ERR_BAD_FS,
> +	"ignoring partially written label: uber_txg<  label_txg");
>   
I've left this part out since outside loop already looks for the newest txg.
>     if (grub_zfs_to_cpu64 (uber->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC
>         &&  grub_zfs_to_cpu64 (uber->ub_version, LITTLE_ENDIAN)>  0
> @@ -361,7 +390,19 @@
>
>     zc.zc_word[0] = grub_cpu_to_zfs64 (offset, endian);
>     err = zio_checksum_verify (zc, ZIO_CHECKSUM_LABEL, endian,
> -			     (char *) ub, UBERBLOCK_SIZE);
> +			     (char *) uber, UBERBLOCK_SIZE(data->vdev_ashift));
> +
> +  if (!err)
> +    {
> +      /* Check that the data pointed by the rootbp is usable. */
> +      void *osp = NULL;
> +      grub_size_t ospsize;
> +      err = zio_read (&uber->ub_rootbp, endian,&osp,&ospsize, data);
> +      grub_free (osp);
> +
> +      if (!err&&  ospsize<  OBJSET_PHYS_SIZE_V14)
> +	return grub_error (GRUB_ERR_BAD_FS, "uberblock rootbp points to invalid data");
> +    }
>   
Left this one out: why would it happen that uberblock checksum is valid 
but the other data in it isn't?
>     return err;
>   }
> @@ -372,32 +413,40 @@
>    *    Success - Pointer to the best uberblock.
>    *    Failure - NULL
>    */
> -static uberblock_phys_t *
> -find_bestub (uberblock_phys_t * ub_array, grub_disk_addr_t sector)
> +static uberblock_t *
> +find_bestub (char *ub_array, struct grub_zfs_data *data)
>   {
> -  uberblock_phys_t *ubbest = NULL;
> -  int i;
> -  grub_disk_addr_t offset;
> +  const grub_disk_addr_t sector = data->vdev_phys_sector;
> +  uberblock_t *ubbest = NULL;
> +  uberblock_t *ubnext;
> +  unsigned int i, offset, pickedub = 0;
>     grub_err_t err = GRUB_ERR_NONE;
>
> -  for (i = 0; i<  (VDEV_UBERBLOCK_RING>>  VDEV_UBERBLOCK_SHIFT); i++)
> +  const unsigned int UBCOUNT = UBERBLOCK_COUNT (data->vdev_ashift);
> +  const grub_uint64_t UBBYTES = UBERBLOCK_SIZE (data->vdev_ashift);
> +
> +  for (i = 0; i<  UBCOUNT; i++)
>       {
> -      offset = (sector<<  SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE
> -	+ (i<<  VDEV_UBERBLOCK_SHIFT);
> +      ubnext = (uberblock_t *) (i * UBBYTES + ub_array);
> +      offset = (sector<<  SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE + (i * UBBYTES);
>   
This is wrong with respect to alignment invariants.
> -      err = uberblock_verify (&ub_array[i], offset);
> +      err = uberblock_verify (ubnext, offset, data);
>         if (err)
>   	{
>   	  grub_errno = GRUB_ERR_NONE;
>   	  continue;
>   	}
> -      if (ubbest == NULL
> -	  || vdev_uberblock_compare (&(ub_array[i].ubp_uberblock),
> -				&(ubbest->ubp_uberblock))>  0)
> -	ubbest =&ub_array[i];
> +      if (ubbest == NULL || vdev_uberblock_compare (ubnext, ubbest)>  0)
> +	{
> +	  ubbest = ubnext;
> +	  pickedub = i;
> +	}
Skipped, it sets the variable but never uses it.
>       }
>     if (!ubbest)
>       grub_errno = err;
> +  else
> +    grub_dprintf ("zfs", "Found best uberblock at idx %d, txg %llu\n",
> +		  pickedub, (unsigned long long) ubbest->ub_txg);
>   
Skipped, not needed.
>     return (ubbest);
>   }
> @@ -515,8 +564,18 @@
>   	  sector = DVA_OFFSET_TO_PHYS_SECTOR (offset);
>   	  err = grub_disk_read (data->disk, sector, 0, psize, buf);
>   	}
> +
>         if (!err)
> -	return GRUB_ERR_NONE;
> +	{
> +	  /* Check the underlying checksum before we rule this DVA as "good" */
> +	  grub_uint32_t checkalgo = (grub_zfs_to_cpu64 ((bp)->blk_prop, endian)>>  40)&  0xff;
> +	  err = zio_checksum_verify (bp->blk_cksum, checkalgo, endian, buf, psize);
> +	  if (!err)
> +	    return GRUB_ERR_NONE;
> +	}
> +
> +      /* If read failed or checksum bad, reset the error.  Hopefully we've got
> +       * some more DVA's to try. */
Skipped, already commented on this one.
>         grub_errno = GRUB_ERR_NONE;
>       }
>
> @@ -539,12 +598,9 @@
>     unsigned int comp;
>     char *compbuf = NULL;
>     grub_err_t err;
> -  zio_cksum_t zc = bp->blk_cksum;
> -  grub_uint32_t checksum;
>
>     *buf = NULL;
>
> -  checksum = (grub_zfs_to_cpu64((bp)->blk_prop, endian)>>  40)&  0xff;
>     comp = (grub_zfs_to_cpu64((bp)->blk_prop, endian)>>32)&  0xff;
>     lsize = (BP_IS_HOLE(bp) ? 0 :
>   	   (((grub_zfs_to_cpu64 ((bp)->blk_prop, endian)&  0xffff) + 1)
> @@ -580,15 +636,6 @@
>         return err;
>       }
>
> -  err = zio_checksum_verify (zc, checksum, endian, compbuf, psize);
> -  if (err)
> -    {
> -      grub_dprintf ("zfs", "incorrect checksum\n");
> -      grub_free (compbuf);
> -      *buf = NULL;
> -      return err;
> -    }
> -
>     if (comp != ZIO_COMPRESS_OFF)
>       {
>         *buf = grub_malloc (lsize);
> @@ -1864,11 +1911,9 @@
>   static grub_err_t
>   check_pool_label (struct grub_zfs_data *data)
>   {
> -  grub_uint64_t pool_state, txg = 0;
> -  char *nvlist;
> -#if 0
> -  char *nv;
> -#endif
> +  grub_uint64_t pool_state;
> +  char *nvlist;			/* for the pool */
> +  char *vdevnvlist;		/* for the vdev */
>     grub_uint64_t diskguid;
>     grub_uint64_t version;
>     int found;
> @@ -1898,7 +1943,9 @@
>       }
>     grub_dprintf ("zfs", "check 4 passed\n");
>
> -  found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_TXG,&txg);
> +  data->label_txg = 0;
> +  found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_TXG,
> +					&data->label_txg);
Not needed. Skipped.
>     if (!found)
>       {
>         grub_free (nvlist);
> @@ -1909,7 +1956,7 @@
>     grub_dprintf ("zfs", "check 6 passed\n");
>
>     /* not an active device */
> -  if (txg == 0)
> +  if (data->label_txg == 0)
>       {
>         grub_free (nvlist);
>         return grub_error (GRUB_ERR_BAD_FS, "zpool isn't active");
> @@ -1931,20 +1978,32 @@
>       {
>         grub_free (nvlist);
>         return grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET,
> -			 "too new version %llu>  %llu",
> +			 "SPA version too new %llu>  %llu",
>   			 (unsigned long long) version,
>   			 (unsigned long long) SPA_VERSION);
>       }
>     grub_dprintf ("zfs", "check 9 passed\n");
> -#if 0
> -  if (nvlist_lookup_value (nvlist, ZPOOL_CONFIG_VDEV_TREE,&nv,
> -			   DATA_TYPE_NVLIST, NULL))
> +
> +  vdevnvlist = grub_zfs_nvlist_lookup_nvlist (nvlist, ZPOOL_CONFIG_VDEV_TREE);
> +  if (!vdevnvlist)
>       {
> -      grub_free (vdev);
> -      return (GRUB_ERR_BAD_FS);
> +      grub_free (nvlist);
> +      if (!grub_errno)
> +	grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_VDEV_TREE " not found");
> +      return grub_errno;
>       }
>     grub_dprintf ("zfs", "check 10 passed\n");
> -#endif
> +
> +  found = grub_zfs_nvlist_lookup_uint64 (vdevnvlist, ZPOOL_CONFIG_ASHIFT,
> +					&data->vdev_ashift);
> +  grub_free (vdevnvlist);
> +  if (!found)
> +    {
> +      grub_free (nvlist);
> +      if (!grub_errno)
> +	grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_ASHIFT " not found");
> +      return grub_errno;
> +    }
>
>     found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_GUID,&diskguid);
>     if (! found)
> @@ -1956,11 +2015,42 @@
>       }
>     grub_dprintf ("zfs", "check 11 passed\n");
>
> +  found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_GUID,&data->pool_guid);
> +  if (! found)
> +    {
> +      grub_free (nvlist);
> +      if (! grub_errno)
> +        grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_POOL_GUID " not found");
> +      return grub_errno;
> +    }
> +   grub_dprintf ("zfs", "check 12 passed\n");
> +
> +
This check is already in. Skipped.
>     grub_free (nvlist);
>
> +  grub_dprintf ("zfs", "ZFS Pool GUID: %llu (%016llx) Label: GUID: %llu (%016llx), txg: %llu, SPA v%llu, ashift: %llu\n",
> +    (unsigned long long) data->pool_guid,
> +    (unsigned long long) data->pool_guid,
> +    (unsigned long long) diskguid,
> +    (unsigned long long) diskguid,
> +    (unsigned long long) data->label_txg,
> +    (unsigned long long) version,
> +    (unsigned long long) data->vdev_ashift);
> +
Useless. Skipped.
>     return GRUB_ERR_NONE;
>   }
>
> +/*
> + * vdev_label_start returns the physical disk offset (in bytes) of
> + * label "l".
> + */
> +static grub_uint64_t vdev_label_start (grub_uint64_t psize, int l)
> +{
> +  return (l * sizeof (vdev_label_t) + (l<  VDEV_LABELS / 2 ?
> +				       0 : psize -
> +				       VDEV_LABELS * sizeof (vdev_label_t)));
> +}
> +
>   static void
>   zfs_unmount (struct grub_zfs_data *data)
>   {
> @@ -1979,13 +2069,13 @@
>   zfs_mount (grub_device_t dev)
>   {
>     struct grub_zfs_data *data = 0;
> -  int label = 0;
> -  uberblock_phys_t *ub_array, *ubbest = NULL;
> -  vdev_boot_header_t *bh;
> +  int label = 0, bestlabel = -1;
> +  char *ub_array;
> +  uberblock_t *ubbest;
> +  uberblock_t *ubcur = NULL;
>     void *osp = 0;
>     grub_size_t ospsize;
>     grub_err_t err;
> -  int vdevnum;
>
>     if (! dev->disk)
>       {
> @@ -1997,11 +2087,6 @@
>     if (!data)
>       return 0;
>     grub_memset (data, 0, sizeof (*data));
> -#if 0
> -  /* if it's our first time here, zero the best uberblock out */
> -  if (data->best_drive == 0&&  data->best_part == 0&&  find_best_root)
> -    grub_memset (&current_uberblock, 0, sizeof (uberblock_t));
> -#endif
>
>     data->disk = dev->disk;
>
> @@ -2012,100 +2097,129 @@
>         return 0;
>       }
>
> -  bh = grub_malloc (VDEV_BOOT_HEADER_SIZE);
> -  if (!bh)
> +  ubbest = grub_malloc (sizeof (*ubbest));
> +  if (!ubbest)
>       {
>         zfs_unmount (data);
> -      grub_free (ub_array);
>         return 0;
>       }
> -
> -  vdevnum = VDEV_LABELS;
> -
> -  /* Don't check back labels on CDROM.  */
> -  if (grub_disk_get_size (dev->disk) == GRUB_DISK_SIZE_UNKNOWN)
> -    vdevnum = VDEV_LABELS / 2;
> -
> -  for (label = 0; ubbest == NULL&&  label<  vdevnum; label++)
> +  grub_memset (ubbest, 0, sizeof (*ubbest));
> +
> +  /* Establish some constants for where things are on the device: */
> +
> +  /*
> +   *  Some eltorito stacks don't give us a size and we end up setting the
> +   * size to MAXUINT, further some of these devices stop working once a single
> +   * read past the end has been issued. Checking for a maximum part_length and
> +   * skipping the backup labels at the end of the slice/partition/device
> +   * avoids breaking down on such devices.
> +   */
> +  const int vdevnum =
> +    grub_disk_get_size (dev->disk) == GRUB_DISK_SIZE_UNKNOWN ?
> +    VDEV_LABELS / 2 : VDEV_LABELS;
> +
> +  /* Size in bytes of the device (disk or partition) aligned to label size */
> +  grub_uint64_t device_size =
> +	grub_disk_get_size (dev->disk)<<  GRUB_DISK_SECTOR_BITS;
> +  const grub_uint64_t alignedbytes =
> +    P2ALIGN (device_size, (grub_uint64_t) sizeof (vdev_label_t));
> +
> +  for (label = 0; label<  vdevnum; label++)
>       {
> -      grub_zfs_endian_t ub_endian = UNKNOWN_ENDIAN;
> -      grub_dprintf ("zfs", "label %d\n", label);
> -
> -      data->vdev_phys_sector
> -	= label * (sizeof (vdev_label_t)>>  SPA_MINBLOCKSHIFT)
> -	+ ((VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE)>>  SPA_MINBLOCKSHIFT)
> -	+ (label<  VDEV_LABELS / 2 ? 0 : grub_disk_get_size (dev->disk)
> -	   - VDEV_LABELS * (sizeof (vdev_label_t)>>  SPA_MINBLOCKSHIFT));
> +      grub_uint64_t labelstartbytes = vdev_label_start (alignedbytes, label);
> +      grub_uint64_t labelstart = labelstartbytes>>  GRUB_DISK_SECTOR_BITS;
> +
> +      grub_dprintf ("zfs", "reading label %d at sector %llu (byte %llu)\n",
> +		    label, (unsigned long long) labelstart,
> +		    (unsigned long long) labelstartbytes);
> +
> +      data->vdev_phys_sector = labelstart +
> +		((VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE)>>  GRUB_DISK_SECTOR_BITS);
> +
> +      err = check_pool_label (data);
> +      if (err)
> +	{
> +	  grub_dprintf ("zfs", "error checking label %d\n", label);
> +	  grub_errno = GRUB_ERR_NONE;
> +	  continue;
> +	}
>   
This seems like a lot of moving around but I don't see any real change 
other than aligning the size down.
>         /* Read in the uberblock ring (128K). */
> -      err = grub_disk_read (data->disk, data->vdev_phys_sector
> -			    + (VDEV_PHYS_SIZE>>  SPA_MINBLOCKSHIFT),
> -			    0, VDEV_UBERBLOCK_RING, (char *) ub_array);
> -      if (err)
> -	{
> -	  grub_errno = GRUB_ERR_NONE;
> -	  continue;
> -	}
> -      grub_dprintf ("zfs", "label ok %d\n", label);
> -
> -      ubbest = find_bestub (ub_array, data->vdev_phys_sector);
> -      if (!ubbest)
> -	{
> -	  grub_dprintf ("zfs", "No uberblock found\n");
> -	  grub_errno = GRUB_ERR_NONE;
> -	  continue;
> -	}
> -      ub_endian = (grub_zfs_to_cpu64 (ubbest->ubp_uberblock.ub_magic,
> -				     LITTLE_ENDIAN) == UBERBLOCK_MAGIC
> -		   ? LITTLE_ENDIAN : BIG_ENDIAN);
> -      err = zio_read (&ubbest->ubp_uberblock.ub_rootbp,
> -		      ub_endian,
> -		&osp,&ospsize, data);
> -      if (err)
> -	{
> -	  grub_dprintf ("zfs", "couldn't zio_read\n");
> -	  grub_errno = GRUB_ERR_NONE;
> -	  continue;
> -	}
> -
> -      if (ospsize<  OBJSET_PHYS_SIZE_V14)
> -	{
> -	  grub_dprintf ("zfs", "osp too small\n");
> -	  grub_free (osp);
> -	  continue;
> -	}
> -      grub_dprintf ("zfs", "ubbest %p\n", ubbest);
> -
> -      err = check_pool_label (data);
> -      if (err)
> -	{
> -	  grub_errno = GRUB_ERR_NONE;
> -	  continue;
> -	}
> -#if 0
> -      if (find_best_root&&
> -	  vdev_uberblock_compare (&ubbest->ubp_uberblock,
> -				&(current_uberblock))<= 0)
> -	continue;
> -#endif
> -      /* Got the MOS. Save it at the memory addr MOS. */
> -      grub_memmove (&(data->mos.dn),&((objset_phys_t *) osp)->os_meta_dnode,
> -		    DNODE_SIZE);
> -      data->mos.endian = (grub_zfs_to_cpu64 (ubbest->ubp_uberblock.ub_rootbp.blk_prop, ub_endian)>>  63)&  1;
> -      grub_memmove (&(data->current_uberblock),
> -		&ubbest->ubp_uberblock, sizeof (uberblock_t));
> -      grub_free (ub_array);
> -      grub_free (bh);
> -      grub_free (osp);
> -      return data;
> +      err = grub_disk_read (data->disk,
> +			    data->vdev_phys_sector +
> +			    (VDEV_PHYS_SIZE>>  GRUB_DISK_SECTOR_BITS),
> +			    0, VDEV_UBERBLOCK_RING, ub_array);
> +      if (err)
> +	{
> +	  grub_dprintf ("zfs", "error reading uberblock ring for label %d\n", label);
> +	  grub_errno = GRUB_ERR_NONE;
> +	  continue;
> +	}
> +
> +      ubcur = find_bestub (ub_array, data);
> +      if (!ubcur)
> +	{
> +	  grub_dprintf ("zfs", "No good uberblocks found in label %d\n", label);
> +	  grub_errno = GRUB_ERR_NONE;
> +	  continue;
> +	}
> +
> +      if (vdev_uberblock_compare (ubcur, ubbest)>  0)
> +	{
> +	  /* Looks like the block is good, so use it. */
> +	  grub_memcpy (ubbest, ubcur, sizeof (*ubbest));
> +	  bestlabel = label;
> +	  grub_dprintf ("zfs", "Current best uberblock found in label %d\n", label);
> +	}
>       }
> -  grub_error (GRUB_ERR_BAD_FS, "couldn't find a valid label");
> -  zfs_unmount (data);
>     grub_free (ub_array);
> -  grub_free (bh);
> +
> +  /*
> +   * We zero'd the structure to begin with.  If we never assigned to it,
> +   * magic will still be zero.
> +   */
> +  if (!ubbest->ub_magic)
> +    {
> +      grub_error (GRUB_ERR_BAD_FS, "couldn't find a valid ZFS label");
> +      zfs_unmount (data);
> +      grub_free (ubbest);
> +      return 0;
> +    }
> +
> +  grub_dprintf ("zfs", "ubbest %p in label %d\n", ubbest, bestlabel);
> +
> +  grub_zfs_endian_t ub_endian =
> +    grub_zfs_to_cpu64 (ubbest->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC
> +    ? LITTLE_ENDIAN : BIG_ENDIAN;
> +  err = zio_read (&ubbest->ub_rootbp, ub_endian,&osp,&ospsize, data);
> +
> +  if (err)
> +    {
> +      grub_error (GRUB_ERR_BAD_FS, "couldn't zio_read object directory");
> +      zfs_unmount (data);
> +      grub_free (ubbest);
> +      return 0;
> +    }
> +
> +  if (ospsize<  OBJSET_PHYS_SIZE_V14)
> +    {
> +      grub_error (GRUB_ERR_BAD_FS, "osp too small");
> +      zfs_unmount (data);
> +      grub_free (osp);
> +      grub_free (ubbest);
> +      return 0;
> +    }
> +
> +  /* Got the MOS. Save it at the memory addr MOS. */
> +  grub_memmove (&(data->mos.dn),&((objset_phys_t *) osp)->os_meta_dnode, DNODE_SIZE);
> +  data->mos.endian =
> +		(grub_zfs_to_cpu64 (ubbest->ub_rootbp.blk_prop, ub_endian)>>  63)&  1;
> +  grub_memmove (&(data->current_uberblock), ubbest, sizeof (uberblock_t));
> +
>     grub_free (osp);
> +  grub_free (ubbest);
>
> -  return 0;
> +  return (data);
>   }
>
>   grub_err_t
> @@ -2149,33 +2263,18 @@
>   static grub_err_t
>   zfs_uuid (grub_device_t device, char **uuid)
>   {
> -  char *nvlist;
> -  int found;
>     struct grub_zfs_data *data;
> -  grub_uint64_t guid;
> -  grub_err_t err;
> -
> -  *uuid = 0;
>
>     data = zfs_mount (device);
>     if (! data)
>       return grub_errno;
>
> -  err = zfs_fetch_nvlist (data,&nvlist);
> -  if (err)
> -    {
> -      zfs_unmount (data);
> -      return err;
> -    }
> -
> -  found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_GUID,&guid);
> -  if (! found)
> -    return grub_errno;
> -  grub_free (nvlist);
> -  *uuid = grub_xasprintf ("%016llx", (long long unsigned) guid);
> +  *uuid = grub_xasprintf ("%016llx", (long long unsigned) data->pool_guid);
>     zfs_unmount (data);
> +
>     if (! *uuid)
>       return grub_errno;
> +
>     return GRUB_ERR_NONE;
>   }
>
>
> === modified file 'include/grub/zfs/uberblock_impl.h'
> --- include/grub/zfs/uberblock_impl.h	2010-12-01 21:55:26 +0000
> +++ include/grub/zfs/uberblock_impl.h	2012-01-19 11:28:09 +0000
> @@ -23,6 +23,8 @@
>   #ifndef _SYS_UBERBLOCK_IMPL_H
>   #define	_SYS_UBERBLOCK_IMPL_H
>
> +#define UBMAX(a,b) ((a)>  (b) ? (a) : (b))
> +
>   /*
>    * The uberblock version is incremented whenever an incompatible on-disk
>    * format change is made to the SPA, DMU, or ZAP.
> @@ -45,16 +47,10 @@
>   	blkptr_t	ub_rootbp;	/* MOS objset_phys_t		*/
>   } uberblock_t;
>
> -#define	UBERBLOCK_SIZE		(1ULL<<  UBERBLOCK_SHIFT)
> -#define	VDEV_UBERBLOCK_SHIFT	UBERBLOCK_SHIFT
> -
> -/* XXX Uberblock_phys_t is no longer in the kernel zfs */
> -typedef struct uberblock_phys {
> -	uberblock_t	ubp_uberblock;
> -	char		ubp_pad[UBERBLOCK_SIZE - sizeof (uberblock_t) -
> -				sizeof (zio_eck_t)];
> -	zio_eck_t	ubp_zec;
> -} uberblock_phys_t;
> -
> +#define	VDEV_UBERBLOCK_SHIFT(as)	UBMAX(as, UBERBLOCK_SHIFT)
> +#define	UBERBLOCK_SIZE(as)		(1ULL<<  VDEV_UBERBLOCK_SHIFT(as))
> +
> +/* Number of uberblocks that can fit in the ring at a given ashift */
> +#define UBERBLOCK_COUNT(as) (VDEV_UBERBLOCK_RING>>  VDEV_UBERBLOCK_SHIFT(as))
>
>   #endif	/* _SYS_UBERBLOCK_IMPL_H */
>


-- 
Regards
Vladimir 'φ-coder/phcoder' Serbinenko


[-- Attachment #2: zfs.diff --]
[-- Type: text/x-diff, Size: 6079 bytes --]

=== modified file 'grub-core/fs/zfs/zfs.c'
--- grub-core/fs/zfs/zfs.c	2012-01-14 14:44:34 +0000
+++ grub-core/fs/zfs/zfs.c	2012-01-22 13:44:45 +0000
@@ -192,6 +192,7 @@
   enum { DEVICE_LEAF, DEVICE_MIRROR, DEVICE_RAIDZ } type;
   grub_uint64_t id;
   grub_uint64_t guid;
+  unsigned ashift;
 
   /* Valid only for non-leafs.  */
   unsigned n_children;
@@ -199,7 +200,6 @@
 
   /* Valid only for RAIDZ.  */
   unsigned nparity;
-  unsigned ashift;
 
   /* Valid only for leaf devices.  */
   grub_device_t dev;
@@ -466,12 +466,12 @@
  * Three pieces of information are needed to verify an uberblock: the magic
  * number, the version number, and the checksum.
  *
- * Currently Implemented: version number, magic number
- * Need to Implement: checksum
+ * Currently Implemented: version number, magic number, checksum
  *
  */
 static grub_err_t
-uberblock_verify (uberblock_phys_t * ub, grub_uint64_t offset)
+uberblock_verify (uberblock_phys_t * ub, grub_uint64_t offset,
+		  grub_size_t s)
 {
   uberblock_t *uber = &ub->ubp_uberblock;
   grub_err_t err;
@@ -498,7 +498,7 @@
 
   zc.zc_word[0] = grub_cpu_to_zfs64 (offset, endian);
   err = zio_checksum_verify (zc, ZIO_CHECKSUM_LABEL, endian,
-			     (char *) ub, UBERBLOCK_SIZE);
+			     (char *) ub, s);
 
   return err;
 }
@@ -510,28 +510,37 @@
  *    Failure - NULL
  */
 static uberblock_phys_t *
-find_bestub (uberblock_phys_t * ub_array, grub_disk_addr_t sector)
+find_bestub (uberblock_phys_t * ub_array,
+	     const struct grub_zfs_device_desc *desc)
 {
-  uberblock_phys_t *ubbest = NULL;
+  uberblock_phys_t *ubbest = NULL, *ubptr;
   int i;
   grub_disk_addr_t offset;
   grub_err_t err = GRUB_ERR_NONE;
-
-  for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++)
+  int ub_shift;
+
+  ub_shift = desc->ashift;
+  if (ub_shift < VDEV_UBERBLOCK_SHIFT)
+    ub_shift = VDEV_UBERBLOCK_SHIFT;
+
+  for (i = 0; i < (VDEV_UBERBLOCK_RING >> ub_shift); i++)
     {
-      offset = (sector << SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE
-	+ (i << VDEV_UBERBLOCK_SHIFT);
+      offset = (desc->vdev_phys_sector << SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE
+	+ (i << ub_shift);
 
-      err = uberblock_verify (&ub_array[i], offset);
+      ubptr = (uberblock_phys_t *) ((grub_properly_aligned_t *) ub_array
+				    + ((i << ub_shift)
+				       / sizeof (grub_properly_aligned_t)));
+      err = uberblock_verify (ubptr, offset, 1 << ub_shift);
       if (err)
 	{
 	  grub_errno = GRUB_ERR_NONE;
 	  continue;
 	}
       if (ubbest == NULL 
-	  || vdev_uberblock_compare (&(ub_array[i].ubp_uberblock),
+	  || vdev_uberblock_compare (&(ubptr->ubp_uberblock),
 				     &(ubbest->ubp_uberblock)) > 0)
-	ubbest = &ub_array[i];
+	ubbest = ubptr;
     }
   if (!ubbest)
     grub_errno = err;
@@ -582,7 +591,8 @@
 		     const char *nvlist,
 		     struct grub_zfs_device_desc *fill,
 		     struct grub_zfs_device_desc *insert,
-		     int *inserted)
+		     int *inserted,
+		     unsigned ashift)
 {
   char *type;
 
@@ -603,6 +613,19 @@
       return grub_error (GRUB_ERR_BAD_FS, "couldn't find vdev id");
     }
 
+  {
+    grub_uint64_t par;
+    if (grub_zfs_nvlist_lookup_uint64 (nvlist, "ashift", &par))
+      fill->ashift = par;
+    else if (ashift != 0xffffffff)
+      fill->ashift = ashift;
+    else
+      {
+	grub_free (type);
+	return grub_error (GRUB_ERR_BAD_FS, "couldn't find ashift");
+      }
+  }
+
   if (grub_strcmp (type, VDEV_TYPE_DISK) == 0
       || grub_strcmp (type, VDEV_TYPE_FILE) == 0)
     {
@@ -616,6 +639,7 @@
 	  fill->original = insert->original;
 	  if (!data->device_original)
 	    data->device_original = fill;
+	  insert->ashift = fill->ashift;
 	  *inserted = 1;
 	}
 
@@ -641,12 +665,6 @@
 	      return grub_error (GRUB_ERR_BAD_FS, "couldn't find raidz parity");
 	    }
 	  fill->nparity = par;
-	  if (!grub_zfs_nvlist_lookup_uint64 (nvlist, "ashift", &par))
-	    {
-	      grub_free (type);
-	      return grub_error (GRUB_ERR_BAD_FS, "couldn't find raidz ashift");
-	    }
-	  fill->ashift = par;
 	}
 
       nelm = grub_zfs_nvlist_lookup_nvlist_array_get_nelm (nvlist,
@@ -675,7 +693,7 @@
 	    (nvlist, ZPOOL_CONFIG_CHILDREN, i);
 
 	  err = fill_vdev_info_real (data, child, &fill->children[i], insert,
-				     inserted);
+				     inserted, fill->ashift);
 
 	  grub_free (child);
 
@@ -710,7 +728,7 @@
   for (i = 0; i < data->n_devices_attached; i++)
     if (data->devices_attached[i].id == id)
       return fill_vdev_info_real (data, nvlist, &data->devices_attached[i],
-				  diskdesc, inserted);
+				  diskdesc, inserted, 0xffffffff);
 
   data->n_devices_attached++;
   if (data->n_devices_attached > data->n_devices_allocated)
@@ -733,7 +751,7 @@
 
   return fill_vdev_info_real (data, nvlist,
 			      &data->devices_attached[data->n_devices_attached - 1],
-			      diskdesc, inserted);
+			      diskdesc, inserted, 0xffffffff);
 }
 
 /*
@@ -908,7 +926,8 @@
       desc.vdev_phys_sector
 	= label * (sizeof (vdev_label_t) >> SPA_MINBLOCKSHIFT)
 	+ ((VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE) >> SPA_MINBLOCKSHIFT)
-	+ (label < VDEV_LABELS / 2 ? 0 : grub_disk_get_size (dev->disk)
+	+ (label < VDEV_LABELS / 2 ? 0 : 
+	   ALIGN_DOWN (grub_disk_get_size (dev->disk), sizeof (vdev_label_t))
 	   - VDEV_LABELS * (sizeof (vdev_label_t) >> SPA_MINBLOCKSHIFT));
 
       /* Read in the uberblock ring (128K). */
@@ -922,7 +941,14 @@
 	}
       grub_dprintf ("zfs", "label ok %d\n", label);
 
-      ubbest = find_bestub (ub_array, desc.vdev_phys_sector);
+      err = check_pool_label (data, &desc, inserted);
+      if (err || !*inserted)
+	{
+	  grub_errno = GRUB_ERR_NONE;
+	  continue;
+	}
+
+      ubbest = find_bestub (ub_array, &desc);
       if (!ubbest)
 	{
 	  grub_dprintf ("zfs", "No uberblock found\n");
@@ -936,12 +962,6 @@
 	grub_memmove (&(data->current_uberblock),
 		      &ubbest->ubp_uberblock, sizeof (uberblock_t));
 
-      err = check_pool_label (data, &desc, inserted);
-      if (err)
-	{
-	  grub_errno = GRUB_ERR_NONE;
-	  continue;
-	}
 #if 0
       if (find_best_root &&
 	  vdev_uberblock_compare (&ubbest->ubp_uberblock,


  reply	other threads:[~2012-01-22 14:18 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-09-19 18:45 [Patch] Robustly search for ZFS labels & uberblocks Zachary Bedell
2011-09-28 21:20 ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-19 11:36   ` Richard Laager
2012-01-22 14:18     ` Vladimir 'φ-coder/phcoder' Serbinenko [this message]
2012-01-22 20:31       ` Richard Laager
2012-01-24  7:12       ` Richard Laager
2012-01-27 19:04       ` Zachary Bedell
2012-01-27 22:22         ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-28  2:50         ` Richard Laager
2012-01-28 12:51           ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-28 16:50             ` Richard Laager
2012-01-28 17:06               ` Darik Horn
2012-01-28 17:39                 ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-28 18:33             ` Richard Laager
2012-01-28 19:21               ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-29 22:42               ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-31  8:45                 ` Richard Laager
2012-02-02 11:13                   ` Richard Laager
2012-02-03 10:02                     ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-02-03  9:52                   ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-02-03 11:20                     ` Richard Laager
2012-01-28 18:40             ` Darik Horn
2012-01-28 19:27               ` Vladimir 'φ-coder/phcoder' Serbinenko
2012-01-30  1:22             ` Richard Laager
2012-01-30  1:43               ` Vladimir 'φ-coder/phcoder' Serbinenko
2011-11-03 14:45 ` Vladimir 'φ-coder/phcoder' Serbinenko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4F1C1ABD.1010303@gmail.com \
    --to=phcoder@gmail.com \
    --cc=grub-devel@gnu.org \
    --cc=pendorbound@gmail.com \
    --cc=rlaager@wiktel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.