All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Roger Pau Monné" <roger.pau@citrix.com>
To: Demi Marie Obenour <demi@invisiblethingslab.com>
Cc: "Jens Axboe" <axboe@kernel.dk>,
	"Mike Snitzer" <snitzer@kernel.org>,
	"Marek Marczykowski-Górecki" <marmarek@invisiblethingslab.com>,
	linux-kernel@vger.kernel.org, linux-block@vger.kernel.org,
	dm-devel@redhat.com, xen-devel@lists.xenproject.org,
	"Alasdair Kergon" <agk@redhat.com>
Subject: Re: [dm-devel] [PATCH v2 13/16] xen-blkback: Implement diskseq checks
Date: Tue, 6 Jun 2023 10:25:47 +0200	[thread overview]
Message-ID: <ZH7tizoYl8YVFN9B@Air-de-Roger> (raw)
In-Reply-To: <20230530203116.2008-14-demi@invisiblethingslab.com>

On Tue, May 30, 2023 at 04:31:13PM -0400, Demi Marie Obenour wrote:
> This allows specifying a disk sequence number in XenStore.  If it does
> not match the disk sequence number of the underlying device, the device
> will not be exported and a warning will be logged.  Userspace can use
> this to eliminate race conditions due to major/minor number reuse.
> Old kernels do not support the new syntax, but a later patch will allow
> userspace to discover that the new syntax is supported.
> 
> Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
> ---
>  drivers/block/xen-blkback/xenbus.c | 112 +++++++++++++++++++++++------
>  1 file changed, 89 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
> index 4807af1d58059394d7a992335dabaf2bc3901721..9c3eb148fbd802c74e626c3d7bcd69dcb09bd921 100644
> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -24,6 +24,7 @@ struct backend_info {
>  	struct xenbus_watch	backend_watch;
>  	unsigned		major;
>  	unsigned		minor;
> +	unsigned long long	diskseq;

Since diskseq is declared as u64 in gendisk, better use the same type
here too?

>  	char			*mode;
>  };
>  
> @@ -479,7 +480,7 @@ static void xen_vbd_free(struct xen_vbd *vbd)
>  
>  static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
>  			  unsigned major, unsigned minor, int readonly,
> -			  int cdrom)
> +			  bool cdrom, u64 diskseq)
>  {
>  	struct xen_vbd *vbd;
>  	struct block_device *bdev;
> @@ -507,6 +508,26 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
>  		xen_vbd_free(vbd);
>  		return -ENOENT;
>  	}
> +
> +	if (diskseq) {
> +		struct gendisk *disk = bdev->bd_disk;

const.

> +
> +		if (unlikely(disk == NULL)) {
> +			pr_err("%s: device %08x has no gendisk\n",
> +			       __func__, vbd->pdevice);
> +			xen_vbd_free(vbd);
> +			return -EFAULT;

ENODEV or ENOENT might be more accurate IMO.

> +		}
> +
> +		if (unlikely(disk->diskseq != diskseq)) {
> +			pr_warn("%s: device %08x has incorrect sequence "
> +				"number 0x%llx (expected 0x%llx)\n",

I prefer %#llx, and likely pr_err like above.  Also I think it's now
preferred to not split printed lines, so that `grep "has incorrect
sequence number" ...` can find the instance.

> +				__func__, vbd->pdevice, disk->diskseq, diskseq);
> +			xen_vbd_free(vbd);
> +			return -ENODEV;
> +		}
> +	}
> +
>  	vbd->size = vbd_sz(vbd);
>  
>  	if (cdrom || disk_to_cdi(vbd->bdev->bd_disk))
> @@ -707,6 +728,9 @@ static void backend_changed(struct xenbus_watch *watch,
>  	int cdrom = 0;
>  	unsigned long handle;
>  	char *device_type;
> +	char *diskseq_str = NULL;

const, and I think there's no need to init to NULL.

> +	int diskseq_len;

unsigned int

> +	unsigned long long diskseq;

u64

>  
>  	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
>  
> @@ -725,10 +749,46 @@ static void backend_changed(struct xenbus_watch *watch,
>  		return;
>  	}
>  
> -	if (be->major | be->minor) {
> -		if (be->major != major || be->minor != minor)
> -			pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
> -				be->major, be->minor, major, minor);
> +	diskseq_str = xenbus_read(XBT_NIL, dev->nodename, "diskseq", &diskseq_len);
> +	if (IS_ERR(diskseq_str)) {
> +		int err = PTR_ERR(diskseq_str);
> +		diskseq_str = NULL;
> +
> +		/*
> +		 * If this does not exist, it means legacy userspace that does not
> +		 * support diskseq.
> +		 */
> +		if (unlikely(!XENBUS_EXIST_ERR(err))) {
> +			xenbus_dev_fatal(dev, err, "reading diskseq");
> +			return;
> +		}
> +		diskseq = 0;
> +	} else if (diskseq_len <= 0) {
> +		xenbus_dev_fatal(dev, -EFAULT, "diskseq must not be empty");
> +		goto fail;
> +	} else if (diskseq_len > 16) {
> +		xenbus_dev_fatal(dev, -ERANGE, "diskseq too long: got %d but limit is 16",
> +				 diskseq_len);
> +		goto fail;
> +	} else if (diskseq_str[0] == '0') {
> +		xenbus_dev_fatal(dev, -ERANGE, "diskseq must not start with '0'");
> +		goto fail;
> +	} else {
> +		char *diskseq_end;
> +		diskseq = simple_strtoull(diskseq_str, &diskseq_end, 16);
> +		if (diskseq_end != diskseq_str + diskseq_len) {
> +			xenbus_dev_fatal(dev, -EINVAL, "invalid diskseq");
> +			goto fail;
> +		}
> +		kfree(diskseq_str);
> +		diskseq_str = NULL;
> +	}

Won't it be simpler to use xenbus_scanf() with %llx formatter?

Also, we might want to fetch "physical-device" and "diskseq" inside
the same xenstore transaction.

Also, you tie this logic to the "physical-device" watch, which
strictly implies that the "diskseq" node must be written to xenstore
before the "physical-device" node.  This seems fragile, but I don't
see much better optiono since the "diskseq" is optional.

The node and its behaviour should be documented in blkif.h.

> +	if (be->major | be->minor | be->diskseq) {
> +		if (be->major != major || be->minor != minor || be->diskseq != diskseq)
> +			pr_warn("changing physical device (from %x:%x:%llx to %x:%x:%llx)"
> +				" not supported.\n",
> +				be->major, be->minor, be->diskseq, major, minor, diskseq);
>  		return;

You are leaking diskseq_str here, and in all the error cases between
here and up to the call to xen_vbd_create().

It might be better to simnply free diskseq_str once you are done with
the processing, and have set diskseq.

Otherwise see my suggestion of using xenbus_scanf().

Thanks, Roger.

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel


WARNING: multiple messages have this Message-ID (diff)
From: "Roger Pau Monné" <roger.pau@citrix.com>
To: Demi Marie Obenour <demi@invisiblethingslab.com>
Cc: "Jens Axboe" <axboe@kernel.dk>,
	"Alasdair Kergon" <agk@redhat.com>,
	"Mike Snitzer" <snitzer@kernel.org>,
	dm-devel@redhat.com,
	"Marek Marczykowski-Górecki" <marmarek@invisiblethingslab.com>,
	linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
	xen-devel@lists.xenproject.org
Subject: Re: [PATCH v2 13/16] xen-blkback: Implement diskseq checks
Date: Tue, 6 Jun 2023 10:25:47 +0200	[thread overview]
Message-ID: <ZH7tizoYl8YVFN9B@Air-de-Roger> (raw)
In-Reply-To: <20230530203116.2008-14-demi@invisiblethingslab.com>

On Tue, May 30, 2023 at 04:31:13PM -0400, Demi Marie Obenour wrote:
> This allows specifying a disk sequence number in XenStore.  If it does
> not match the disk sequence number of the underlying device, the device
> will not be exported and a warning will be logged.  Userspace can use
> this to eliminate race conditions due to major/minor number reuse.
> Old kernels do not support the new syntax, but a later patch will allow
> userspace to discover that the new syntax is supported.
> 
> Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
> ---
>  drivers/block/xen-blkback/xenbus.c | 112 +++++++++++++++++++++++------
>  1 file changed, 89 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
> index 4807af1d58059394d7a992335dabaf2bc3901721..9c3eb148fbd802c74e626c3d7bcd69dcb09bd921 100644
> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -24,6 +24,7 @@ struct backend_info {
>  	struct xenbus_watch	backend_watch;
>  	unsigned		major;
>  	unsigned		minor;
> +	unsigned long long	diskseq;

Since diskseq is declared as u64 in gendisk, better use the same type
here too?

>  	char			*mode;
>  };
>  
> @@ -479,7 +480,7 @@ static void xen_vbd_free(struct xen_vbd *vbd)
>  
>  static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
>  			  unsigned major, unsigned minor, int readonly,
> -			  int cdrom)
> +			  bool cdrom, u64 diskseq)
>  {
>  	struct xen_vbd *vbd;
>  	struct block_device *bdev;
> @@ -507,6 +508,26 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
>  		xen_vbd_free(vbd);
>  		return -ENOENT;
>  	}
> +
> +	if (diskseq) {
> +		struct gendisk *disk = bdev->bd_disk;

const.

> +
> +		if (unlikely(disk == NULL)) {
> +			pr_err("%s: device %08x has no gendisk\n",
> +			       __func__, vbd->pdevice);
> +			xen_vbd_free(vbd);
> +			return -EFAULT;

ENODEV or ENOENT might be more accurate IMO.

> +		}
> +
> +		if (unlikely(disk->diskseq != diskseq)) {
> +			pr_warn("%s: device %08x has incorrect sequence "
> +				"number 0x%llx (expected 0x%llx)\n",

I prefer %#llx, and likely pr_err like above.  Also I think it's now
preferred to not split printed lines, so that `grep "has incorrect
sequence number" ...` can find the instance.

> +				__func__, vbd->pdevice, disk->diskseq, diskseq);
> +			xen_vbd_free(vbd);
> +			return -ENODEV;
> +		}
> +	}
> +
>  	vbd->size = vbd_sz(vbd);
>  
>  	if (cdrom || disk_to_cdi(vbd->bdev->bd_disk))
> @@ -707,6 +728,9 @@ static void backend_changed(struct xenbus_watch *watch,
>  	int cdrom = 0;
>  	unsigned long handle;
>  	char *device_type;
> +	char *diskseq_str = NULL;

const, and I think there's no need to init to NULL.

> +	int diskseq_len;

unsigned int

> +	unsigned long long diskseq;

u64

>  
>  	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
>  
> @@ -725,10 +749,46 @@ static void backend_changed(struct xenbus_watch *watch,
>  		return;
>  	}
>  
> -	if (be->major | be->minor) {
> -		if (be->major != major || be->minor != minor)
> -			pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
> -				be->major, be->minor, major, minor);
> +	diskseq_str = xenbus_read(XBT_NIL, dev->nodename, "diskseq", &diskseq_len);
> +	if (IS_ERR(diskseq_str)) {
> +		int err = PTR_ERR(diskseq_str);
> +		diskseq_str = NULL;
> +
> +		/*
> +		 * If this does not exist, it means legacy userspace that does not
> +		 * support diskseq.
> +		 */
> +		if (unlikely(!XENBUS_EXIST_ERR(err))) {
> +			xenbus_dev_fatal(dev, err, "reading diskseq");
> +			return;
> +		}
> +		diskseq = 0;
> +	} else if (diskseq_len <= 0) {
> +		xenbus_dev_fatal(dev, -EFAULT, "diskseq must not be empty");
> +		goto fail;
> +	} else if (diskseq_len > 16) {
> +		xenbus_dev_fatal(dev, -ERANGE, "diskseq too long: got %d but limit is 16",
> +				 diskseq_len);
> +		goto fail;
> +	} else if (diskseq_str[0] == '0') {
> +		xenbus_dev_fatal(dev, -ERANGE, "diskseq must not start with '0'");
> +		goto fail;
> +	} else {
> +		char *diskseq_end;
> +		diskseq = simple_strtoull(diskseq_str, &diskseq_end, 16);
> +		if (diskseq_end != diskseq_str + diskseq_len) {
> +			xenbus_dev_fatal(dev, -EINVAL, "invalid diskseq");
> +			goto fail;
> +		}
> +		kfree(diskseq_str);
> +		diskseq_str = NULL;
> +	}

Won't it be simpler to use xenbus_scanf() with %llx formatter?

Also, we might want to fetch "physical-device" and "diskseq" inside
the same xenstore transaction.

Also, you tie this logic to the "physical-device" watch, which
strictly implies that the "diskseq" node must be written to xenstore
before the "physical-device" node.  This seems fragile, but I don't
see much better optiono since the "diskseq" is optional.

The node and its behaviour should be documented in blkif.h.

> +	if (be->major | be->minor | be->diskseq) {
> +		if (be->major != major || be->minor != minor || be->diskseq != diskseq)
> +			pr_warn("changing physical device (from %x:%x:%llx to %x:%x:%llx)"
> +				" not supported.\n",
> +				be->major, be->minor, be->diskseq, major, minor, diskseq);
>  		return;

You are leaking diskseq_str here, and in all the error cases between
here and up to the call to xen_vbd_create().

It might be better to simnply free diskseq_str once you are done with
the processing, and have set diskseq.

Otherwise see my suggestion of using xenbus_scanf().

Thanks, Roger.

  reply	other threads:[~2023-06-07  6:49 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-30 20:31 [dm-devel] [PATCH v2 00/16] Diskseq support in loop, device-mapper, and blkback Demi Marie Obenour
2023-05-30 20:31 ` Demi Marie Obenour
2023-05-30 20:31 ` [dm-devel] [PATCH v2 01/16] device-mapper: Check that target specs are sufficiently aligned Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-05-30 20:31 ` [dm-devel] [PATCH v2 02/16] device-mapper: Avoid pointer arithmetic overflow Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-05-30 20:31 ` [dm-devel] [PATCH v2 03/16] device-mapper: do not allow targets to overlap 'struct dm_ioctl' Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-05-30 20:31 ` [dm-devel] [PATCH v2 04/16] device-mapper: Better error message for too-short target spec Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-05-30 20:31 ` [dm-devel] [PATCH v2 05/16] device-mapper: Target parameters must not overlap next " Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-05-30 20:31 ` [dm-devel] [PATCH v2 06/16] device-mapper: Avoid double-fetch of version Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-05-30 20:31 ` [dm-devel] [PATCH v2 07/16] device-mapper: Allow userspace to opt-in to strict parameter checks Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-05-30 20:31 ` [dm-devel] [PATCH v2 08/16] device-mapper: Allow userspace to provide expected diskseq Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-05-30 20:31 ` [dm-devel] [PATCH v2 09/16] device-mapper: Allow userspace to suppress uevent generation Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-05-30 20:31 ` [dm-devel] [PATCH v2 10/16] device-mapper: Refuse to create device named "control" Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-05-30 20:31 ` [dm-devel] [PATCH v2 11/16] device-mapper: "." and ".." are not valid symlink names Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-05-30 20:31 ` [dm-devel] [PATCH v2 12/16] device-mapper: inform caller about already-existing device Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-05-30 20:31 ` [dm-devel] [PATCH v2 13/16] xen-blkback: Implement diskseq checks Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-06-06  8:25   ` Roger Pau Monné [this message]
2023-06-06  8:25     ` Roger Pau Monné
2023-06-06 17:01     ` [dm-devel] " Demi Marie Obenour
2023-06-06 17:01       ` Demi Marie Obenour
2023-06-07  8:20       ` [dm-devel] " Roger Pau Monné
2023-06-07  8:20         ` Roger Pau Monné
2023-06-07 16:14         ` [dm-devel] " Demi Marie Obenour
2023-06-07 16:14           ` Demi Marie Obenour
2023-06-08  8:29           ` [dm-devel] " Roger Pau Monné
2023-06-08  8:29             ` Roger Pau Monné
2023-06-08 15:33             ` [dm-devel] " Demi Marie Obenour
2023-06-08 15:33               ` Demi Marie Obenour
2023-06-09 15:13               ` [dm-devel] " Roger Pau Monné
2023-06-09 15:13                 ` Roger Pau Monné
2023-06-09 16:55                 ` [dm-devel] " Demi Marie Obenour
2023-06-09 16:55                   ` Demi Marie Obenour
2023-06-12  8:09                   ` [dm-devel] " Roger Pau Monné
2023-06-12  8:09                     ` Roger Pau Monné
2023-06-21  1:14                     ` [dm-devel] " Demi Marie Obenour
2023-06-21  1:14                       ` Demi Marie Obenour
2023-06-21 10:07                       ` [dm-devel] " Roger Pau Monné
2023-06-21 10:07                         ` Roger Pau Monné
2023-05-30 20:31 ` [dm-devel] [PATCH v2 14/16] block, loop: Increment diskseq when releasing a loop device Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-05-30 20:31 ` [dm-devel] [PATCH v2 15/16] xen-blkback: Minor cleanups Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-06-06  8:36   ` [dm-devel] " Roger Pau Monné
2023-06-06  8:36     ` Roger Pau Monné
2023-05-30 20:31 ` [dm-devel] [PATCH v2 16/16] xen-blkback: Inform userspace that device has been opened Demi Marie Obenour
2023-05-30 20:31   ` Demi Marie Obenour
2023-06-06  9:15   ` [dm-devel] " Roger Pau Monné
2023-06-06  9:15     ` Roger Pau Monné
2023-06-06 17:31     ` [dm-devel] " Demi Marie Obenour
2023-06-06 17:31       ` Demi Marie Obenour
2023-06-07  8:44       ` [dm-devel] " Roger Pau Monné
2023-06-07  8:44         ` Roger Pau Monné
2023-06-07 16:29         ` [dm-devel] " Demi Marie Obenour
2023-06-07 16:29           ` Demi Marie Obenour
2023-06-08  9:11           ` [dm-devel] " Roger Pau Monné
2023-06-08  9:11             ` Roger Pau Monné
2023-06-08 15:23             ` [dm-devel] " Demi Marie Obenour
2023-06-08 15:23               ` Demi Marie Obenour
2023-06-08 10:08   ` [dm-devel] " Roger Pau Monné
2023-06-08 10:08     ` Roger Pau Monné
2023-06-08 15:24     ` [dm-devel] " Demi Marie Obenour
2023-06-08 15:24       ` Demi Marie Obenour
2023-05-31 13:06 ` [dm-devel] [PATCH v2 00/16] Diskseq support in loop, device-mapper, and blkback Christoph Hellwig
2023-05-31 13:06   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZH7tizoYl8YVFN9B@Air-de-Roger \
    --to=roger.pau@citrix.com \
    --cc=agk@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=demi@invisiblethingslab.com \
    --cc=dm-devel@redhat.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=marmarek@invisiblethingslab.com \
    --cc=snitzer@kernel.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.