linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ming Lei <ming.lei@redhat.com>
To: Andreas Hindborg <nmi@metaspace.dk>
Cc: linux-block@vger.kernel.org,
	Andreas Hindborg <a.hindborg@samsung.com>,
	Hans Holmberg <Hans.Holmberg@wdc.com>,
	Matias Bjorling <Matias.Bjorling@wdc.com>,
	Niklas Cassel <Niklas.Cassel@wdc.com>,
	kernel test robot <lkp@intel.com>, Jens Axboe <axboe@kernel.dk>,
	open list <linux-kernel@vger.kernel.org>,
	Damien Le Moal <damien.lemoal@opensource.wdc.com>
Subject: Re: [PATCH v2] block: ublk: enable zoned storage support
Date: Thu, 2 Mar 2023 10:50:44 +0800	[thread overview]
Message-ID: <ZAAPBFfqP671N4ue@T590> (raw)
In-Reply-To: <20230224200502.391570-1-nmi@metaspace.dk>

On Fri, Feb 24, 2023 at 09:05:01PM +0100, Andreas Hindborg wrote:
> Add zoned storage support to ublk: report_zones and operations:
>  - REQ_OP_ZONE_OPEN
>  - REQ_OP_ZONE_CLOSE
>  - REQ_OP_ZONE_FINISH
>  - REQ_OP_ZONE_RESET
> 
> This allows implementation of zoned storage devices in user space. An
> example user space implementation based on ubdsrv is available [1].
> 
> [1] https://github.com/metaspace/ubdsrv/commit/14a2b708f74f70cfecb076d92e680dc718cc1f6d

As I suggested, please write one simple & clean zoned target for verifying
the interface, and better to not add to tgt_null.c.

> 
> Signed-off-by: Andreas Hindborg <a.hindborg@samsung.com>
> ---
> Changes since v1:
>  - Fixed conditional compilation bug
>  - Refactored to collect conditional code additions together
>  - Fixed style errors
>  - Zero stack allocated value used for zone report
> 
> Reported-by: Niklas Cassel <Niklas.Cassel@wdc.com>
> Reported-by: kernel test robot <lkp@intel.com>
> Link: https://lore.kernel.org/oe-kbuild-all/202302250222.XOrw9j6z-lkp@intel.com/
> v1: https://lore.kernel.org/linux-block/20230224125950.214779-1-nmi@metaspace.dk/
> 
>  drivers/block/ublk_drv.c      | 150 ++++++++++++++++++++++++++++++++--
>  include/uapi/linux/ublk_cmd.h |  18 ++++
>  2 files changed, 162 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 6368b56eacf1..37e516903867 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -20,6 +20,7 @@
>  #include <linux/major.h>
>  #include <linux/wait.h>
>  #include <linux/blkdev.h>
> +#include <linux/blkzoned.h>
>  #include <linux/init.h>
>  #include <linux/swap.h>
>  #include <linux/slab.h>
> @@ -51,10 +52,12 @@
>  		| UBLK_F_URING_CMD_COMP_IN_TASK \
>  		| UBLK_F_NEED_GET_DATA \
>  		| UBLK_F_USER_RECOVERY \
> -		| UBLK_F_USER_RECOVERY_REISSUE)
> +		| UBLK_F_USER_RECOVERY_REISSUE \
> +		| UBLK_F_ZONED)
>  
>  /* All UBLK_PARAM_TYPE_* should be included here */
> -#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD)
> +#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD \
> +			     | UBLK_PARAM_TYPE_ZONED)
>  
>  struct ublk_rq_data {
>  	struct llist_node node;
> @@ -187,6 +190,98 @@ static DEFINE_MUTEX(ublk_ctl_mutex);
>  
>  static struct miscdevice ublk_misc;
>  
> +#ifdef CONFIG_BLK_DEV_ZONED
> +static void ublk_set_nr_zones(struct ublk_device *ub)
> +{
> +	const struct ublk_param_basic *p = &ub->params.basic;
> +
> +	if (ub->dev_info.flags & UBLK_F_ZONED && p->chunk_sectors)
> +		ub->ub_disk->nr_zones = p->dev_sectors / p->chunk_sectors;
> +}
> +
> +static void ublk_dev_param_zoned_apply(struct ublk_device *ub)
> +{
> +	const struct ublk_param_zoned *p = &ub->params.zoned;
> +
> +	if (ub->dev_info.flags & UBLK_F_ZONED) {
> +		disk_set_max_active_zones(ub->ub_disk, p->max_active_zones);
> +		disk_set_max_open_zones(ub->ub_disk, p->max_open_zones);
> +	}
> +}
> +
> +static int ublk_revalidate_disk_zones(struct gendisk *disk)
> +{
> +	return blk_revalidate_disk_zones(disk, NULL);
> +}
> +
> +static int ublk_report_zones(struct gendisk *disk, sector_t sector,
> +			     unsigned int nr_zones, report_zones_cb cb,
> +			     void *data)
> +{
> +	struct ublk_device *ub;
> +	unsigned int zone_size;
> +	unsigned int first_zone;
> +	int ret = 0;
> +
> +	ub = disk->private_data;
> +
> +	if (!(ub->dev_info.flags & UBLK_F_ZONED))
> +		return -EINVAL;
> +
> +	zone_size = disk->queue->limits.chunk_sectors;
> +	first_zone = sector >> ilog2(zone_size);
> +	nr_zones = min(ub->ub_disk->nr_zones - first_zone, nr_zones);
> +
> +	for (unsigned int i = 0; i < nr_zones; i++) {

The local variable 'i' needs to be declared in the front part
of this function body.

> +		struct request *req;
> +		blk_status_t status;
> +		struct blk_zone info = {0};
> +
> +		req = blk_mq_alloc_request(disk->queue, REQ_OP_DRV_IN, 0);
> +
> +		if (IS_ERR(req)) {
> +			ret = PTR_ERR(req);
> +			goto out;
> +		}
> +
> +		req->__sector = sector;

Why is req->__sector set?

> +
> +		ret = blk_rq_map_kern(disk->queue, req, &info, sizeof(info),
> +				      GFP_KERNEL);
> +
> +		if (ret)
> +			goto out;
> +
> +		status = blk_execute_rq(req, 0);
> +		ret = blk_status_to_errno(status);
> +		if (ret)
> +			goto out;
> +
> +		blk_mq_free_request(req);
> +
> +		ret = cb(&info, i, data);
> +		if (ret)
> +			goto out;
> +
> +		/* A zero length zone means don't ask for more zones */
> +		if (!info.len) {
> +			nr_zones = i;
> +			break;
> +		}
> +
> +		sector += zone_size;
> +	}

I'd suggest to report as many as possible zones in one command, and
the dev_info.max_io_buf_bytes is the max allowed buffer size for one
command, please refer to nvme_ns_report_zones().

Also we are going to extend ublk in the multiple LUN/NS style, and I
guess that won't be one issue since ->report_zones() is always done on
disk level, right?

> +	ret = nr_zones;
> +
> + out:
> +	return ret;
> +}
> +#else
> +void ublk_set_nr_zones(struct ublk_device *ub);
> +void ublk_dev_param_zoned_apply(struct ublk_device *ub);
> +int ublk_revalidate_disk_zones(struct gendisk *disk);
> +#endif
> +
>  static void ublk_dev_param_basic_apply(struct ublk_device *ub)
>  {
>  	struct request_queue *q = ub->ub_disk->queue;
> @@ -212,6 +307,9 @@ static void ublk_dev_param_basic_apply(struct ublk_device *ub)
>  		set_disk_ro(ub->ub_disk, true);
>  
>  	set_capacity(ub->ub_disk, p->dev_sectors);
> +
> +	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
> +		ublk_set_nr_zones(ub);
>  }
>  
>  static void ublk_dev_param_discard_apply(struct ublk_device *ub)
> @@ -268,6 +366,9 @@ static int ublk_apply_params(struct ublk_device *ub)
>  	if (ub->params.types & UBLK_PARAM_TYPE_DISCARD)
>  		ublk_dev_param_discard_apply(ub);
>  
> +	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && (ub->params.types & UBLK_PARAM_TYPE_ZONED))
> +		ublk_dev_param_zoned_apply(ub);
> +
>  	return 0;
>  }
>  
> @@ -361,9 +462,13 @@ static void ublk_free_disk(struct gendisk *disk)
>  	put_device(&ub->cdev_dev);
>  }
>  
> +
>  static const struct block_device_operations ub_fops = {
> -	.owner =	THIS_MODULE,
> -	.free_disk =	ublk_free_disk,
> +	.owner = THIS_MODULE,
> +	.free_disk = ublk_free_disk,
> +#ifdef CONFIG_BLK_DEV_ZONED
> +	.report_zones = ublk_report_zones,
> +#endif

Define one null ublk_report_zones in #else branch of the above #ifdef, then we
can save one #ifdef.

>  };
>  
>  #define UBLK_MAX_PIN_PAGES	32
> @@ -499,7 +604,7 @@ static int ublk_unmap_io(const struct ublk_queue *ubq,
>  {
>  	const unsigned int rq_bytes = blk_rq_bytes(req);
>  
> -	if (req_op(req) == REQ_OP_READ && ublk_rq_has_data(req)) {
> +	if ((req_op(req) == REQ_OP_READ || req_op(req) == REQ_OP_DRV_IN) && ublk_rq_has_data(req)) {
>  		struct ublk_map_data data = {
>  			.ubq	=	ubq,
>  			.rq	=	req,
> @@ -566,6 +671,26 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req)
>  	case REQ_OP_WRITE_ZEROES:
>  		ublk_op = UBLK_IO_OP_WRITE_ZEROES;
>  		break;
> +#ifdef CONFIG_BLK_DEV_ZONED
> +	case REQ_OP_ZONE_OPEN:
> +		ublk_op = UBLK_IO_OP_ZONE_OPEN;
> +		break;
> +	case REQ_OP_ZONE_CLOSE:
> +		ublk_op = UBLK_IO_OP_ZONE_CLOSE;
> +		break;
> +	case REQ_OP_ZONE_FINISH:
> +		ublk_op = UBLK_IO_OP_ZONE_FINISH;
> +		break;
> +	case REQ_OP_ZONE_RESET:
> +		ublk_op = UBLK_IO_OP_ZONE_RESET;
> +		break;
> +	case REQ_OP_DRV_IN:
> +		ublk_op = UBLK_IO_OP_DRV_IN;
> +		break;
> +	case REQ_OP_ZONE_APPEND:
> +		/* We do not support zone append yet */
> +		fallthrough;
> +#endif

The above '#ifdef' is needn't, since  OP_ZONE should be defined no
matter if CONFIG_BLK_DEV_ZONED is enabled or not.

>  	default:
>  		return BLK_STS_IOERR;
>  	}
> @@ -612,7 +737,8 @@ static void ublk_complete_rq(struct request *req)
>  	 *
>  	 * Both the two needn't unmap.
>  	 */
> -	if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) {
> +	if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE &&
> +	    req_op(req) != REQ_OP_DRV_IN) {
>  		blk_mq_end_request(req, BLK_STS_OK);
>  		return;
>  	}
> @@ -1535,6 +1661,15 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
>  	if (ret)
>  		goto out_put_disk;
>  
> +	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
> +	    ub->dev_info.flags & UBLK_F_ZONED) {
> +		disk_set_zoned(disk, BLK_ZONED_HM);
> +		blk_queue_required_elevator_features(disk->queue, ELEVATOR_F_ZBD_SEQ_WRITE);
> +		ret = ublk_revalidate_disk_zones(disk);
> +		if (ret)
> +			goto out_put_disk;
> +	}
> +
>  	get_device(&ub->cdev_dev);
>  	ret = add_disk(disk);
>  	if (ret) {
> @@ -1673,6 +1808,9 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
>  	if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK))
>  		ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK;
>  
> +	if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
> +		ub->dev_info.flags &= ~UBLK_F_ZONED;
> +
>  	/* We are not ready to support zero copy */
>  	ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
>  
> diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
> index 8f88e3a29998..074b97821575 100644
> --- a/include/uapi/linux/ublk_cmd.h
> +++ b/include/uapi/linux/ublk_cmd.h
> @@ -78,6 +78,10 @@
>  #define UBLK_F_USER_RECOVERY	(1UL << 3)
>  
>  #define UBLK_F_USER_RECOVERY_REISSUE	(1UL << 4)
> +/*
> + * Enable zoned device support
> + */
> +#define UBLK_F_ZONED (1ULL << 5)
>  
>  /* device state */
>  #define UBLK_S_DEV_DEAD	0
> @@ -129,6 +133,12 @@ struct ublksrv_ctrl_dev_info {
>  #define		UBLK_IO_OP_DISCARD	3
>  #define		UBLK_IO_OP_WRITE_SAME	4
>  #define		UBLK_IO_OP_WRITE_ZEROES	5
> +#define		UBLK_IO_OP_ZONE_OPEN		10
> +#define		UBLK_IO_OP_ZONE_CLOSE		11
> +#define		UBLK_IO_OP_ZONE_FINISH		12
> +#define		UBLK_IO_OP_ZONE_APPEND		13
> +#define		UBLK_IO_OP_ZONE_RESET		15
> +#define		UBLK_IO_OP_DRV_IN		34
>  
>  #define		UBLK_IO_F_FAILFAST_DEV		(1U << 8)
>  #define		UBLK_IO_F_FAILFAST_TRANSPORT	(1U << 9)
> @@ -214,6 +224,12 @@ struct ublk_param_discard {
>  	__u16	reserved0;
>  };
>  
> +struct ublk_param_zoned {
> +	__u64	max_open_zones;
> +	__u64	max_active_zones;
> +	__u64	max_append_size;
> +};

Is the above zoned parameter enough for future extension?
Does ZNS need extra parameter? Or some zoned new(important) features?

I highly suggest to reserve some fields for extension, given
it is one ABI interface, which is supposed to be defined well
enough from the beginning.


Thanks, 
Ming


  parent reply	other threads:[~2023-03-02  2:51 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-24 20:05 [PATCH v2] block: ublk: enable zoned storage support Andreas Hindborg
2023-02-27 10:20 ` Niklas Cassel
2023-02-27 11:59   ` Andreas Hindborg
2023-02-27 14:29     ` Niklas Cassel
2023-02-27 14:41       ` Andreas Hindborg
2023-02-27 15:20 ` Minwoo Im
2023-02-27 15:36   ` Andreas Hindborg
2023-02-28  9:52 ` Hans Holmberg
2023-03-01  7:28   ` Andreas Hindborg
2023-03-02  2:50 ` Ming Lei [this message]
2023-03-02  7:31   ` Andreas Hindborg
2023-03-02  8:19     ` Damien Le Moal
2023-03-02  8:32     ` Ming Lei
2023-03-02  9:01       ` Ming Lei
2023-03-02  9:14         ` Ming Lei
2023-03-02 10:07           ` Andreas Hindborg
2023-03-02 13:16             ` Ming Lei
2023-03-02 13:28               ` Andreas Hindborg
2023-03-03  2:59                 ` Ming Lei
2023-03-03  8:27                   ` Andreas Hindborg
2023-03-03 11:47                     ` Ming Lei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZAAPBFfqP671N4ue@T590 \
    --to=ming.lei@redhat.com \
    --cc=Hans.Holmberg@wdc.com \
    --cc=Matias.Bjorling@wdc.com \
    --cc=Niklas.Cassel@wdc.com \
    --cc=a.hindborg@samsung.com \
    --cc=axboe@kernel.dk \
    --cc=damien.lemoal@opensource.wdc.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lkp@intel.com \
    --cc=nmi@metaspace.dk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).