public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed
From: Pankaj Raghav <pankydev8@gmail.com>
To: Dmitry Fomichev <dmitry.fomichev@wdc.com>
Cc: "Jens Axboe" <axboe@kernel.dk>,
	"Damien Le Moal" <damien.lemoal@opensource.wdc.com>,
	"Stefan Hajnoczi" <stefanha@gmail.com>,
	"Hannes Reinecke" <hare@suse.de>,
	"Sam Li" <faithilikerun@gmail.com>,
	linux-block@vger.kernel.org, virtio-dev@lists.oasis-open.org,
	"Pankaj Raghav" <p.raghav@samsung.com>,
	"Javier González" <javier.gonz@samsung.com>,
	k.jensen@samsung.com
Subject: Re: [PATCH 3/3] virtio-blk: add support for zoned block devices
Date: Mon, 19 Sep 2022 15:41:33 +0200	[thread overview]
Message-ID: <20220919133853.2xsamyvr66qeogko@quentin> (raw)
In-Reply-To: <20220919022921.946344-4-dmitry.fomichev@wdc.com>

Hi Dmitry,

On Sun, Sep 18, 2022 at 10:29:21PM -0400, Dmitry Fomichev wrote:
> The zone-specific code in the patch is heavily influenced by NVMe ZNS
> code in drivers/nvme/host/zns.c, but it is simpler because the proposed
> virtio ZBD draft only covers the zoned device features that are
> relevant to the zoned functionality provided by Linux block layer.
> 
There is a parallel work going on to support non-po2 zone sizes in Linux
block layer and drivers[1]. I don't see any reason why we shouldn't make
the calculations generic here instead of putting the constraint on zone
sectors to be po2 as the virtio spec also supports it.

I took a quick look, and changing the calculations from po2 specific to
generic will not be in the hot path and can be trivially changed. I have
suggested the changes inline to make the virtio blk driver zone size 
agnostic. I haven't tested the changes but it is very
similar to the changes I did in the drivers/nvme/host/zns.c in my patch
series[2].

[1] https://lore.kernel.org/linux-block/20220912082204.51189-1-p.raghav@samsung.com/
[2] https://lore.kernel.org/linux-block/20220912082204.51189-6-p.raghav@samsung.com/

> Co-developed-by: Stefan Hajnoczi <stefanha@gmail.com>
> Signed-off-by: Stefan Hajnoczi <stefanha@gmail.com>
> Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>
> ---
>  drivers/block/virtio_blk.c      | 381 ++++++++++++++++++++++++++++++--
>  include/uapi/linux/virtio_blk.h | 106 +++++++++
>  2 files changed, 469 insertions(+), 18 deletions(-)
> 
<snip>
> +#ifdef CONFIG_BLK_DEV_ZONED
> +static void *virtblk_alloc_report_buffer(struct virtio_blk *vblk,
> +					  unsigned int nr_zones,
> +					  unsigned int zone_sectors,
> +					  size_t *buflen)
> +{
> +	struct request_queue *q = vblk->disk->queue;
> +	size_t bufsize;
> +	void *buf;
> +
-	nr_zones = min_t(unsigned int, nr_zones,
-			 get_capacity(vblk->disk) >> ilog2(zone_sectors));

+	nr_zones = min_t(unsigned int, nr_zones,
+			 div64_u64(get_capacity(vblk->disk), zone_sectors));

> +
> +	bufsize = sizeof(struct virtio_blk_zone_report) +
> +		nr_zones * sizeof(struct virtio_blk_zone_descriptor);
> +	bufsize = min_t(size_t, bufsize,
> +			queue_max_hw_sectors(q) << SECTOR_SHIFT);
> +	bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
> +
> +	while (bufsize >= sizeof(struct virtio_blk_zone_report)) {
> +		buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
> +		if (buf) {
> +			*buflen = bufsize;
> +			return buf;
> +		}
> +		bufsize >>= 1;
> +	}
> +
> +	return NULL;
> +}
> +
> +static int virtblk_submit_zone_report(struct virtio_blk *vblk,
> +				       char *report_buf, size_t report_len,
> +				       sector_t sector)
> +{
> +	struct request_queue *q = vblk->disk->queue;
> +	struct request *req;
> +	struct virtblk_req *vbr;
> +	int err;
> +
> +	req = blk_mq_alloc_request(q, REQ_OP_DRV_IN, 0);
> +	if (IS_ERR(req))
> +		return PTR_ERR(req);
> +
> +	vbr = blk_mq_rq_to_pdu(req);
> +	vbr->in_hdr_len = sizeof(vbr->status);
> +	vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_ZONE_REPORT);
> +	vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, sector);
> +
> +	err = blk_rq_map_kern(q, req, report_buf, report_len, GFP_KERNEL);
> +	if (err)
> +		goto out;
> +
> +	blk_execute_rq(req, false);
> +	err = blk_status_to_errno(virtblk_result(vbr->status));
> +out:
> +	blk_mq_free_request(req);
> +	return err;
> +}
> +
> +static int virtblk_parse_zone(struct virtio_blk *vblk,
> +			       struct virtio_blk_zone_descriptor *entry,
> +			       unsigned int idx, unsigned int zone_sectors,
> +			       report_zones_cb cb, void *data)
> +{
> +	struct blk_zone zone = { };
> +
> +	if (entry->z_type != VIRTIO_BLK_ZT_SWR &&
> +	    entry->z_type != VIRTIO_BLK_ZT_SWP &&
> +	    entry->z_type != VIRTIO_BLK_ZT_CONV) {
> +		dev_err(&vblk->vdev->dev, "invalid zone type %#x\n",
> +			entry->z_type);
> +		return -EINVAL;
> +	}
> +
> +	zone.type = entry->z_type;
> +	zone.cond = entry->z_state;
> +	zone.len = zone_sectors;
> +	zone.capacity = le64_to_cpu(entry->z_cap);
> +	zone.start = le64_to_cpu(entry->z_start);
> +	if (zone.cond == BLK_ZONE_COND_FULL)
> +		zone.wp = zone.start + zone.len;
> +	else
> +		zone.wp = le64_to_cpu(entry->z_wp);
> +
> +	return cb(&zone, idx, data);
> +}
> +
> +static int virtblk_report_zones(struct gendisk *disk, sector_t sector,
> +				 unsigned int nr_zones, report_zones_cb cb,
> +				 void *data)
> +{
> +	struct virtio_blk *vblk = disk->private_data;
> +	struct virtio_blk_zone_report *report;
> +	unsigned int zone_sectors = vblk->zone_sectors;
> +	unsigned int nz, i;
> +	int ret, zone_idx = 0;
> +	size_t buflen;
+	u64 remainder;
> +
> +	if (WARN_ON_ONCE(!vblk->zone_sectors))
> +		return -EOPNOTSUPP;
> +
> +	report = virtblk_alloc_report_buffer(vblk, nr_zones,
> +					     zone_sectors, &buflen);
> +	if (!report)
> +		return -ENOMEM;
> +
-	sector &= ~(zone_sectors - 1);

+	div64_u64_rem(sector, zone_sectors, &remainder);
+	sector -= remainder;
> +	while (zone_idx < nr_zones && sector < get_capacity(vblk->disk)) {
> +		memset(report, 0, buflen);
> +
> +		ret = virtblk_submit_zone_report(vblk, (char *)report,
> +						 buflen, sector);
> +		if (ret) {
> +			if (ret > 0)
> +				ret = -EIO;
> +			goto out_free;
> +		}
> +
> +		nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones);
> +		if (!nz)
> +			break;
> +
> +		for (i = 0; i < nz && zone_idx < nr_zones; i++) {
> +			ret = virtblk_parse_zone(vblk, &report->zones[i],
> +						 zone_idx, zone_sectors, cb, data);
> +			if (ret)
> +				goto out_free;
> +			zone_idx++;
> +		}
> +
> +		sector += zone_sectors * nz;
> +	}
> +
> +	if (zone_idx > 0)
> +		ret = zone_idx;
> +	else
> +		ret = -EINVAL;
> +out_free:
> +	kvfree(report);
> +	return ret;
> +}
> +
<snip>
> +static int virtblk_probe_zoned_device(struct virtio_device *vdev,
> +				       struct virtio_blk *vblk,
> +				       struct request_queue *q)
> +{
<snip>
> +	blk_queue_physical_block_size(q, le32_to_cpu(v));
> +	blk_queue_io_min(q, le32_to_cpu(v));
> +
> +	dev_dbg(&vdev->dev, "write granularity = %u\n", le32_to_cpu(v));
> +
-	/*
-	 * virtio ZBD specification doesn't require zones to be a power of
-	 * two sectors in size, but the code in this driver expects that.
-	 */
-	virtio_cread(vdev, struct virtio_blk_config, zoned.zone_sectors, &v);
-	if (v == 0 || !is_power_of_2(v)) {
-		dev_err(&vdev->dev,
-			"zoned device with non power of two zone size %u\n", v);
-		return -ENODEV;
-	}
> +
> +	dev_dbg(&vdev->dev, "zone sectors = %u\n", le32_to_cpu(v));
> +	vblk->zone_sectors = le32_to_cpu(v);
> +
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> +		dev_warn(&vblk->vdev->dev,
> +			 "ignoring negotiated F_DISCARD for zoned device\n");
> +		blk_queue_max_discard_sectors(q, 0);
> +	}
> +
> +	ret = blk_revalidate_disk_zones(vblk->disk, NULL);
> +	if (!ret) {
> +		virtio_cread(vdev, struct virtio_blk_config,
> +			     zoned.max_append_sectors, &v);
> +		if (!v) {
> +			dev_warn(&vdev->dev, "zero max_append_sectors reported\n");
> +			return -ENODEV;
> +		}
> +		blk_queue_max_zone_append_sectors(q, le32_to_cpu(v));
> +		dev_dbg(&vdev->dev, "max append sectors = %u\n", le32_to_cpu(v));
> +
> +	}
> +
> +	return ret;
> +}
> +

-- 
Pankaj Raghav

  reply	other threads:[~2022-09-19 13:41 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-19  2:29 [PATCH 0/3] virtio-blk: support zoned block devices Dmitry Fomichev
2022-09-19  2:29 ` [PATCH 1/3] virtio-blk: use a helper to handle request queuing errors Dmitry Fomichev
2022-09-19  2:29 ` [PATCH 2/3] virtio-blk: add a placeholder for secure erase config Dmitry Fomichev
2022-09-19  2:29 ` [PATCH 3/3] virtio-blk: add support for zoned block devices Dmitry Fomichev
2022-09-19 13:41   ` Pankaj Raghav [this message]
2022-09-19 23:59     ` Damien Le Moal
2022-09-20  1:51       ` Dmitry Fomichev
2022-09-20  8:04         ` Pankaj Raghav
2022-09-20  7:43 ` [PATCH 0/3] virtio-blk: support " Christoph Hellwig
2022-09-20 10:41   ` Stefan Hajnoczi
2022-09-21  2:33     ` Dmitry Fomichev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220919133853.2xsamyvr66qeogko@quentin \
    --to=pankydev8@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=damien.lemoal@opensource.wdc.com \
    --cc=dmitry.fomichev@wdc.com \
    --cc=faithilikerun@gmail.com \
    --cc=hare@suse.de \
    --cc=javier.gonz@samsung.com \
    --cc=k.jensen@samsung.com \
    --cc=linux-block@vger.kernel.org \
    --cc=p.raghav@samsung.com \
    --cc=stefanha@gmail.com \
    --cc=virtio-dev@lists.oasis-open.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox