public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 0/8] ZBC host-managed device support
@ 2015-07-31 13:36 Hannes Reinecke
  2015-07-31 13:36 ` [PATCH 1/8] blk-sysfs: Add 'chunk_sectors' to sysfs attributes Hannes Reinecke
                   ` (7 more replies)
  0 siblings, 8 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
	Hannes Reinecke

Hi all,

Here is a first stab at supporting ZBC host-managed devices.
For the uninitiated: ZBC host-managed devices (aka shingled or SMR drives)
have partitioned the drive into several zones, most of which require
sequential writes.
To handle this the patchset implements a rbtree for the zone information,
which will be filled out during device detection.
The rbtree is hooked to the request queue, as it's envisioned that other
layers might want to access it, too.
(Like the device-mapper target I'm working at :-).

Additionally the devices have the habit of spewing out read errors for
uninitialized areas, making working with those devices awkward as the
libata error handling takes ages to recover. To handle that I've implemented
a new BLKPREP return value 'BLKPREP_DONE', allowing for short-circuit
those commands (as they won't return any value anyway).

Oh, this patchset is based on my earlier patchset
'libata: ZAC host-aware device support'.
The entire patchset(s) can be found at

git.kernel.org:/pub/scm/linux/kernel/git/hare/scsi-devel branch zbc.v1

As usual, comments and reviews are welcome.

Hannes Reinecke (8):
  blk-sysfs: Add 'chunk_sectors' to sysfs attributes
  block: update chunk_sectors in blk_stack_limits()
  sd: Set chunk_sectors to zone size
  sd: Implement new RESET_WP provisioning mode
  block: Implement support for zoned block devices
  block: Add 'zoned' sysfs queue attribute
  block: Introduce BLKPREP_DONE
  sd: Implement support for ZBC devices

 block/Kconfig           |   9 ++
 block/Makefile          |   1 +
 block/blk-core.c        |  13 ++
 block/blk-settings.c    |   3 +
 block/blk-sysfs.c       |  74 +++++++++
 block/blk-zoned.c       |  70 +++++++++
 drivers/scsi/Kconfig    |   8 +
 drivers/scsi/Makefile   |   1 +
 drivers/scsi/scsi_lib.c |   3 +-
 drivers/scsi/sd.c       | 245 ++++++++++++++++++++++++++++--
 drivers/scsi/sd.h       |  36 +++++
 drivers/scsi/sd_zbc.c   | 390 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h  |  48 ++++++
 13 files changed, 886 insertions(+), 15 deletions(-)
 create mode 100644 block/blk-zoned.c
 create mode 100644 drivers/scsi/sd_zbc.c

-- 
1.8.5.2


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/8] blk-sysfs: Add 'chunk_sectors' to sysfs attributes
  2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
  2015-07-31 13:36 ` [PATCH 2/8] block: update chunk_sectors in blk_stack_limits() Hannes Reinecke
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
	Hannes Reinecke

The queue limits already have a 'chunk_sectors' setting, so
we should be presenting it via sysfs.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 block/blk-sysfs.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 6264b38..e419f1f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -130,6 +130,26 @@ static ssize_t queue_physical_block_size_show(struct request_queue *q, char *pag
 	return queue_var_show(queue_physical_block_size(q), page);
 }
 
+static ssize_t queue_chunk_sectors_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(q->limits.chunk_sectors, page);
+}
+
+static ssize_t
+queue_chunk_sectors_store(struct request_queue *q, const char *page, size_t count)
+{
+	unsigned long chunk_sectors;
+
+	ssize_t ret = queue_var_store(&chunk_sectors, page, count);
+	if (ret < 0)
+		return ret;
+	spin_lock_irq(q->queue_lock);
+	blk_queue_chunk_sectors(q, chunk_sectors);
+	spin_unlock_irq(q->queue_lock);
+
+	return ret;
+}
+
 static ssize_t queue_io_min_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(queue_io_min(q), page);
@@ -345,6 +365,12 @@ static struct queue_sysfs_entry queue_physical_block_size_entry = {
 	.show = queue_physical_block_size_show,
 };
 
+static struct queue_sysfs_entry queue_chunk_sectors_entry = {
+	.attr = {.name = "chunk_sectors", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_chunk_sectors_show,
+	.store = queue_chunk_sectors_store,
+};
+
 static struct queue_sysfs_entry queue_io_min_entry = {
 	.attr = {.name = "minimum_io_size", .mode = S_IRUGO },
 	.show = queue_io_min_show,
@@ -417,6 +443,7 @@ static struct attribute *default_attrs[] = {
 	&queue_hw_sector_size_entry.attr,
 	&queue_logical_block_size_entry.attr,
 	&queue_physical_block_size_entry.attr,
+	&queue_chunk_sectors_entry.attr,
 	&queue_io_min_entry.attr,
 	&queue_io_opt_entry.attr,
 	&queue_discard_granularity_entry.attr,
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/8] block: update chunk_sectors in blk_stack_limits()
  2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
  2015-07-31 13:36 ` [PATCH 1/8] blk-sysfs: Add 'chunk_sectors' to sysfs attributes Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
  2015-07-31 13:36 ` [PATCH 3/8] sd: Set chunk_sectors to zone size Hannes Reinecke
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
	Hannes Reinecke, Hannes Reinecke

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 block/blk-settings.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 12600bf..63aa067 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -647,6 +647,9 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			t->discard_granularity;
 	}
 
+	if (b->chunk_sectors)
+		t->chunk_sectors = max(t->chunk_sectors, b->chunk_sectors);
+
 	return ret;
 }
 EXPORT_SYMBOL(blk_stack_limits);
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/8] sd: Set chunk_sectors to zone size
  2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
  2015-07-31 13:36 ` [PATCH 1/8] blk-sysfs: Add 'chunk_sectors' to sysfs attributes Hannes Reinecke
  2015-07-31 13:36 ` [PATCH 2/8] block: update chunk_sectors in blk_stack_limits() Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
  2015-07-31 13:36 ` [PATCH 4/8] sd: Implement new RESET_WP provisioning mode Hannes Reinecke
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
	Hannes Reinecke, Hannes Reinecke

For ZBC devices I/O must not cross zone boundaries, so setup
the 'chunk_sectors' block queue setting to the zone size.
This is only valid for REPORT ZONES SAME type 2 or 3;
for other types the zone sizes might be different
for individual zones. So issue a warning if the type is
found to be different.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/sd.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/sd.h |  1 +
 2 files changed, 77 insertions(+)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3b2fcb4..b5d412a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1861,6 +1861,45 @@ sd_spinup_disk(struct scsi_disk *sdkp)
 	}
 }
 
+static int
+sd_zbc_report_zones(struct scsi_disk *sdkp, sector_t start_lba,
+		    unsigned char *buffer, int bufflen )
+{
+	struct scsi_device *sdp = sdkp->device;
+	const int timeout = sdp->request_queue->rq_timeout
+		* SD_FLUSH_TIMEOUT_MULTIPLIER;
+	struct scsi_sense_hdr sshdr;
+	unsigned char cmd[16];
+	int result;
+
+	if (!scsi_device_online(sdp)) {
+		sd_printk(KERN_INFO, sdkp, "device not online\n");
+		return -ENODEV;
+	}
+
+	sd_printk(KERN_INFO, sdkp, "REPORT ZONES lba %zu len %d\n",
+		  start_lba, bufflen);
+
+	memset(cmd, 0, 16);
+	cmd[0] = ZBC_IN;
+	cmd[1] = ZI_REPORT_ZONES;
+	put_unaligned_be64(start_lba, &cmd[2]);
+	put_unaligned_be32(bufflen, &cmd[10]);
+	memset(buffer, 0, bufflen);
+
+	result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
+				  buffer, bufflen, &sshdr,
+				  timeout, SD_MAX_RETRIES, NULL);
+
+	if (result) {
+		sd_printk(KERN_NOTICE, sdkp,
+			  "REPORT ZONES lba %zu failed with %d/%d\n",
+			  start_lba, host_byte(result), driver_byte(result));
+
+		return -EIO;
+	}
+	return 0;
+}
 
 /*
  * Determine whether disk supports Data Integrity Field.
@@ -2631,6 +2670,7 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp)
 		queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, sdkp->disk->queue);
 	}
 
+	sdkp->zoned = (buffer[8] >> 4) & 3;
  out:
 	kfree(buffer);
 }
@@ -2692,6 +2732,41 @@ static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer)
 		sdkp->ws10 = 1;
 }
 
+static void sd_read_zones(struct scsi_disk *sdkp, unsigned char *buffer)
+{
+	int retval;
+	unsigned char *desc;
+	u32 rep_len;
+	u8 same;
+	u64 zone_len;
+
+	if (sdkp->zoned != 1)
+		/* Device managed, no special handling required */
+		return;
+
+	retval = sd_zbc_report_zones(sdkp, 0, buffer, SD_BUF_SIZE);
+	if (retval < 0)
+		return;
+
+	rep_len = get_unaligned_be32(&buffer[0]);
+	same = buffer[4] & 0xf;
+	if (same != 2 && same != 3) {
+		sd_printk(KERN_WARNING, sdkp,
+			  "REPORT ZONES SAME type %d not supported\n", same);
+		return;
+	}
+	if (rep_len < 64) {
+		sd_printk(KERN_WARNING, sdkp,
+			  "REPORT ZONES report invalid length %u\n",
+			  rep_len);
+		return;
+	}
+	/* Read the zone length from the first zone descriptor */
+	desc = &buffer[64];
+	zone_len = get_unaligned_be64(&desc[8]);
+	blk_queue_chunk_sectors(sdkp->disk->queue, zone_len);
+}
+
 static int sd_try_extended_inquiry(struct scsi_device *sdp)
 {
 	/* Attempt VPD inquiry if the device blacklist explicitly calls
@@ -2757,6 +2832,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
 		sd_read_cache_type(sdkp, buffer);
 		sd_read_app_tag_own(sdkp, buffer);
 		sd_read_write_same(sdkp, buffer);
+		sd_read_zones(sdkp, buffer);
 	}
 
 	sdkp->first_scan = 0;
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 63ba5ca..6500d51 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -93,6 +93,7 @@ struct scsi_disk {
 	unsigned	lbpvpd : 1;
 	unsigned	ws10 : 1;
 	unsigned	ws16 : 1;
+	unsigned	zoned: 2;	/* ZONED field */
 };
 #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev)
 
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/8] sd: Implement new RESET_WP provisioning mode
  2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
                   ` (2 preceding siblings ...)
  2015-07-31 13:36 ` [PATCH 3/8] sd: Set chunk_sectors to zone size Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
  2015-07-31 13:36 ` [PATCH 5/8] block: Implement support for zoned block devices Hannes Reinecke
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
	Hannes Reinecke

We can map the RESET WRITE POINTER command onto a 'discard'
request.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/sd.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++---------
 drivers/scsi/sd.h |  1 +
 2 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b5d412a..f909684 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -367,6 +367,7 @@ static const char *lbp_mode[] = {
 	[SD_LBP_WS16]		= "writesame_16",
 	[SD_LBP_WS10]		= "writesame_10",
 	[SD_LBP_ZERO]		= "writesame_zero",
+	[SD_ZBC_RESET_WP]	= "reset_wp",
 	[SD_LBP_DISABLE]	= "disabled",
 };
 
@@ -389,6 +390,13 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
+	if (sdkp->zoned == 1) {
+		if (!strncmp(buf, lbp_mode[SD_ZBC_RESET_WP], 20)) {
+			sd_config_discard(sdkp, SD_ZBC_RESET_WP);
+			return count;
+		}
+		return -EINVAL;
+	}
 	if (sdp->type != TYPE_DISK)
 		return -EINVAL;
 
@@ -668,6 +676,12 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 		q->limits.discard_zeroes_data = sdkp->lbprz;
 		break;
 
+	case SD_ZBC_RESET_WP:
+		max_blocks = min_not_zero(sdkp->max_unmap_blocks,
+					  (u32)SD_MAX_WS16_BLOCKS);
+		q->limits.discard_zeroes_data = 1;
+		break;
+
 	case SD_LBP_ZERO:
 		max_blocks = min_not_zero(sdkp->max_ws_blocks,
 					  (u32)SD_MAX_WS10_BLOCKS);
@@ -696,16 +710,18 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
 	unsigned int nr_sectors = blk_rq_sectors(rq);
 	unsigned int nr_bytes = blk_rq_bytes(rq);
 	unsigned int len;
-	int ret;
+	int ret = 0;
 	char *buf;
-	struct page *page;
+	struct page *page = NULL;
 
 	sector >>= ilog2(sdp->sector_size) - 9;
 	nr_sectors >>= ilog2(sdp->sector_size) - 9;
 
-	page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
-	if (!page)
-		return BLKPREP_DEFER;
+	if (sdkp->provisioning_mode != SD_ZBC_RESET_WP) {
+		page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+		if (!page)
+			return BLKPREP_DEFER;
+	}
 
 	switch (sdkp->provisioning_mode) {
 	case SD_LBP_UNMAP:
@@ -745,6 +761,16 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
 		len = sdkp->device->sector_size;
 		break;
 
+	case SD_ZBC_RESET_WP:
+		cmd->cmd_len = 16;
+		cmd->cmnd[0] = ZBC_OUT;
+		cmd->cmnd[1] = ZO_RESET_WRITE_POINTER;
+		put_unaligned_be64(sector, &cmd->cmnd[2]);
+		/* Reset Write Pointer doesn't have a payload */
+		len = 0;
+		cmd->sc_data_direction = DMA_NONE;
+		break;
+
 	default:
 		ret = BLKPREP_KILL;
 		goto out;
@@ -764,12 +790,14 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
 	 * discarded on disk. This allows us to report completion on the full
 	 * amount of blocks described by the request.
 	 */
-	blk_add_request_payload(rq, page, len);
-	ret = scsi_init_io(cmd);
+	if (len) {
+		blk_add_request_payload(rq, page, len);
+		ret = scsi_init_io(cmd);
+	}
 	rq->__data_len = nr_bytes;
 
 out:
-	if (ret != BLKPREP_OK)
+	if (page && ret != BLKPREP_OK)
 		__free_page(page);
 	return ret;
 }
@@ -1136,7 +1164,8 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
 {
 	struct request *rq = SCpnt->request;
 
-	if (rq->cmd_flags & REQ_DISCARD)
+	if (rq->cmd_flags & REQ_DISCARD &&
+	    rq->completion_data)
 		__free_page(rq->completion_data);
 
 	if (SCpnt->cmnd != rq->cmd) {
@@ -1657,6 +1686,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 	int sense_deferred = 0;
 	unsigned char op = SCpnt->cmnd[0];
 	unsigned char unmap = SCpnt->cmnd[1] & 8;
+	unsigned char sa = SCpnt->cmnd[1] & 0xf;
 
 	if (req->cmd_flags & REQ_DISCARD || req->cmd_flags & REQ_WRITE_SAME) {
 		if (!result) {
@@ -1708,6 +1738,10 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 			case UNMAP:
 				sd_config_discard(sdkp, SD_LBP_DISABLE);
 				break;
+			case ZBC_OUT:
+				if (sa == ZO_RESET_WRITE_POINTER)
+					sd_config_discard(sdkp, SD_LBP_DISABLE);
+				break;
 			case WRITE_SAME_16:
 			case WRITE_SAME:
 				if (unmap)
@@ -2764,6 +2798,10 @@ static void sd_read_zones(struct scsi_disk *sdkp, unsigned char *buffer)
 	/* Read the zone length from the first zone descriptor */
 	desc = &buffer[64];
 	zone_len = get_unaligned_be64(&desc[8]);
+	sdkp->max_unmap_blocks = zone_len;
+	sdkp->unmap_alignment = zone_len;
+	sdkp->unmap_granularity = zone_len;
+	sd_config_discard(sdkp, SD_ZBC_RESET_WP);
 	blk_queue_chunk_sectors(sdkp->disk->queue, zone_len);
 }
 
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 6500d51..4edcf54 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -56,6 +56,7 @@ enum {
 	SD_LBP_WS16,		/* Use WRITE SAME(16) with UNMAP bit */
 	SD_LBP_WS10,		/* Use WRITE SAME(10) with UNMAP bit */
 	SD_LBP_ZERO,		/* Use WRITE SAME(10) with zero payload */
+	SD_ZBC_RESET_WP,	/* Use RESET WRITE POINTER */
 	SD_LBP_DISABLE,		/* Discard disabled due to failed cmd */
 };
 
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 5/8] block: Implement support for zoned block devices
  2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
                   ` (3 preceding siblings ...)
  2015-07-31 13:36 ` [PATCH 4/8] sd: Implement new RESET_WP provisioning mode Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
  2015-07-31 13:36 ` [PATCH 6/8] block: Add 'zoned' sysfs queue attribute Hannes Reinecke
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
	Hannes Reinecke

Implement a RB-Tree holding the zone information and
add support functions for maintaining the RB-Tree.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 block/Kconfig          |  9 +++++++
 block/Makefile         |  1 +
 block/blk-core.c       |  5 ++++
 block/blk-zoned.c      | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h | 47 +++++++++++++++++++++++++++++++++
 5 files changed, 132 insertions(+)
 create mode 100644 block/blk-zoned.c

diff --git a/block/Kconfig b/block/Kconfig
index 161491d..bc9c28ae 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -100,6 +100,15 @@ config BLK_DEV_THROTTLING
 
 	See Documentation/cgroups/blkio-controller.txt for more information.
 
+config BLK_DEV_ZONED
+	bool "Zoned block device support"
+	default n
+	---help---
+	Block layer zoned block device support. This option enables
+	support for zoned block (ZAC/ZBC) devices.
+
+	Say yes here if you have a ZAC or ZBC storage device.
+
 config BLK_CMDLINE_PARSER
 	bool "Block device command line partition parser"
 	default n
diff --git a/block/Makefile b/block/Makefile
index 00ecc97..171b572 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_BLK_DEV_BSG)	+= bsg.o
 obj-$(CONFIG_BLK_DEV_BSGLIB)	+= bsg-lib.o
 obj-$(CONFIG_BLK_CGROUP)	+= blk-cgroup.o
 obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o
+obj-$(CONFIG_BLK_DEV_ZONED)	+= blk-zoned.o
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
 obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
 obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
diff --git a/block/blk-core.c b/block/blk-core.c
index 627ed0c..82d8ce9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -573,6 +573,8 @@ void blk_cleanup_queue(struct request_queue *q)
 	if (q->mq_ops)
 		blk_mq_free_queue(q);
 
+	blk_drop_zones(q);
+
 	spin_lock_irq(lock);
 	if (q->queue_lock != &q->__queue_lock)
 		q->queue_lock = &q->__queue_lock;
@@ -664,6 +666,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 #ifdef CONFIG_BLK_CGROUP
 	INIT_LIST_HEAD(&q->blkg_list);
 #endif
+#ifdef CONFIG_BLK_DEV_ZONED
+	q->zones = RB_ROOT;
+#endif
 	INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
 
 	kobject_init(&q->kobj, &blk_queue_ktype);
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
new file mode 100644
index 0000000..975e863
--- /dev/null
+++ b/block/blk-zoned.c
@@ -0,0 +1,70 @@
+/*
+ * Zoned block device handling
+ *
+ * Copyright (c) 2015, Hannes Reinecke
+ * Copyright (c) 2015, SUSE Linux GmbH
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/rbtree.h>
+
+struct blk_zone *blk_lookup_zone(struct request_queue *q, sector_t lba)
+{
+	struct rb_root *root = &q->zones;
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct blk_zone *zone = container_of(node, struct blk_zone,
+						     node);
+
+		if (lba < zone->start)
+			node = node->rb_left;
+		else if (lba >= zone->start + zone->len)
+			node = node->rb_right;
+		else
+			return zone;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(blk_lookup_zone);
+
+struct blk_zone *blk_insert_zone(struct request_queue *q, struct blk_zone *data)
+{
+	struct rb_root *root = &q->zones;
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct blk_zone *this = container_of(*new, struct blk_zone,
+						     node);
+		parent = *new;
+		if (data->start + data->len <= this->start)
+			new = &((*new)->rb_left);
+		else if (data->start >= this->start + this->len)
+			new = &((*new)->rb_right);
+		else {
+			/* Return existing zone */
+			return this;
+		}
+	}
+	/* Add new node and rebalance tree. */
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(blk_insert_zone);
+
+void blk_drop_zones(struct request_queue *q)
+{
+	struct rb_root *root = &q->zones;
+	struct blk_zone *zone, *next;
+
+	rbtree_postorder_for_each_entry_safe(zone, next, root, node) {
+		kfree(zone);
+	}
+	q->zones = RB_ROOT;
+}
+EXPORT_SYMBOL_GPL(blk_drop_zones);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d4068c1..746ea82 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -255,6 +255,50 @@ struct blk_queue_tag {
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
 
+#ifdef CONFIG_BLK_DEV_ZONED
+enum blk_zone_type {
+	BLK_ZONE_TYPE_UNKNOWN,
+	BLK_ZONE_TYPE_CONVENTIONAL,
+	BLK_ZONE_TYPE_SEQWRITE_REQ,
+	BLK_ZONE_TYPE_SEQWRITE_PREF,
+	BLK_ZONE_TYPE_RESERVED,
+};
+
+enum blk_zone_state {
+	BLK_ZONE_UNKNOWN,
+	BLK_ZONE_NO_WP,
+	BLK_ZONE_OPEN,
+	BLK_ZONE_READONLY,
+	BLK_ZONE_OFFLINE,
+	BLK_ZONE_BUSY,
+};
+
+struct blk_zone {
+	struct rb_node node;
+	spinlock_t lock;
+	uint64_t start;
+	uint64_t len;
+	uint64_t wp;
+	enum blk_zone_type type;
+	enum blk_zone_state state;
+	void *private_data;
+};
+
+#define blk_zone_is_smr(z) ((z)->type == BLK_ZONE_TYPE_SEQWRITE_REQ ||	\
+			    (z)->type == BLK_ZONE_TYPE_SEQWRITE_PREF)
+
+#define blk_zone_is_cmr(z) ((z)->type == BLK_ZONE_TYPE_CONVENTIONAL)
+#define blk_zone_is_full(z) ((z)->wp == (z)->start + (z)->len)
+#define blk_zone_is_empty(z) ((z)->wp == (z)->start)
+
+extern struct blk_zone *blk_lookup_zone(struct request_queue *, sector_t);
+extern struct blk_zone *blk_insert_zone(struct request_queue *,
+					struct blk_zone *);
+extern void blk_drop_zones(struct request_queue *);
+#else
+static inline void blk_drop_zones(struct request_queue *q) { };
+#endif
+
 struct queue_limits {
 	unsigned long		bounce_pfn;
 	unsigned long		seg_boundary_mask;
@@ -420,6 +464,9 @@ struct request_queue {
 
 	struct queue_limits	limits;
 
+#ifdef CONFIG_BLK_DEV_ZONED
+	struct rb_root		zones;
+#endif
 	/*
 	 * sg stuff
 	 */
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 6/8] block: Add 'zoned' sysfs queue attribute
  2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
                   ` (4 preceding siblings ...)
  2015-07-31 13:36 ` [PATCH 5/8] block: Implement support for zoned block devices Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
  2015-07-31 13:36 ` [PATCH 7/8] block: Introduce BLKPREP_DONE Hannes Reinecke
  2015-07-31 13:36 ` [PATCH 8/8] sd: Implement support for ZBC devices Hannes Reinecke
  7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
	Hannes Reinecke

Add a sysfs queue attribute 'zoned' to display the zone layout
for zoned devices.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 block/blk-sysfs.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e419f1f..5e2ba53 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -211,6 +211,43 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
 	return queue_var_show(max_hw_sectors_kb, (page));
 }
 
+#ifdef CONFIG_BLK_DEV_ZONED
+static ssize_t queue_zoned_show(struct request_queue *q, char *page)
+{
+	struct rb_node *node;
+	struct blk_zone *zone;
+	ssize_t offset = 0, end = 0;
+	int size = 0, num = 0;
+	enum blk_zone_type type = BLK_ZONE_TYPE_UNKNOWN;
+
+	for (node = rb_first(&q->zones); node; node = rb_next(node)) {
+		zone = rb_entry(node, struct blk_zone, node);
+		if (zone->type != type ||
+		    zone->len != size ||
+		    end != zone->start) {
+			if (size != 0)
+				offset += sprintf(page + offset, "%u\n", num);
+			/* We can only store one page ... */
+			if (offset + 42 > PAGE_SIZE) {
+				offset += sprintf(page + offset, "...\n");
+				return offset;
+			}
+			size = zone->len;
+			type = zone->type;
+			offset += sprintf(page + offset, "%llu %u %d ",
+					  zone->start, size, type);
+			num = 0;
+			end = zone->start + size;
+		} else
+			end += zone->len;
+		num++;
+	}
+	if (num > 0)
+		offset += sprintf(page + offset, "%u\n", num);
+	return offset > 0 ? offset : -EINVAL;
+}
+#endif
+
 #define QUEUE_SYSFS_BIT_FNS(name, flag, neg)				\
 static ssize_t								\
 queue_show_##name(struct request_queue *q, char *page)			\
@@ -401,6 +438,13 @@ static struct queue_sysfs_entry queue_write_same_max_entry = {
 	.show = queue_write_same_max_show,
 };
 
+#ifdef CONFIG_BLK_DEV_ZONED
+static struct queue_sysfs_entry queue_zoned_entry = {
+	.attr = {.name = "zoned", .mode = S_IRUGO },
+	.show = queue_zoned_show,
+};
+#endif
+
 static struct queue_sysfs_entry queue_nonrot_entry = {
 	.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_show_nonrot,
@@ -451,6 +495,9 @@ static struct attribute *default_attrs[] = {
 	&queue_discard_zeroes_data_entry.attr,
 	&queue_write_same_max_entry.attr,
 	&queue_nonrot_entry.attr,
+#ifdef CONFIG_BLK_DEV_ZONED
+	&queue_zoned_entry.attr,
+#endif
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
 	&queue_iostats_entry.attr,
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 7/8] block: Introduce BLKPREP_DONE
  2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
                   ` (5 preceding siblings ...)
  2015-07-31 13:36 ` [PATCH 6/8] block: Add 'zoned' sysfs queue attribute Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
  2015-07-31 13:36 ` [PATCH 8/8] sd: Implement support for ZBC devices Hannes Reinecke
  7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
	Hannes Reinecke

Add a new blkprep return code BLKPREP_DONE to signal completion
without I/O error.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 block/blk-core.c        | 8 ++++++++
 drivers/scsi/scsi_lib.c | 3 ++-
 include/linux/blkdev.h  | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 82d8ce9..ecdfe1a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2359,6 +2359,14 @@ struct request *blk_peek_request(struct request_queue *q)
 			 */
 			blk_start_request(rq);
 			__blk_end_request_all(rq, -EIO);
+		} else if (ret == BLKPREP_DONE) {
+			rq->cmd_flags |= REQ_QUIET;
+			/*
+			 * Mark this request as started so we don't trigger
+			 * any debug logic in the end I/O path.
+			 */
+			blk_start_request(rq);
+			__blk_end_request_all(rq, 0);
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
 			break;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b1a2631..830a70b 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1342,8 +1342,9 @@ scsi_prep_return(struct request_queue *q, struct request *req, int ret)
 	struct scsi_device *sdev = q->queuedata;
 
 	switch (ret) {
+	case BLKPREP_DONE:
 	case BLKPREP_KILL:
-		req->errors = DID_NO_CONNECT << 16;
+		req->errors = (ret == BLKPREP_KILL) ? DID_NO_CONNECT << 16 : 0;
 		/* release the command and kill it */
 		if (req->special) {
 			struct scsi_cmnd *cmd = req->special;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 746ea82..86d787d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -728,6 +728,7 @@ static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
 #define BLKPREP_OK		0	/* serve it */
 #define BLKPREP_KILL		1	/* fatal error, kill */
 #define BLKPREP_DEFER		2	/* leave on queue */
+#define BLKPREP_DONE		3	/* complete w/o error */
 
 extern unsigned long blk_max_low_pfn, blk_max_pfn;
 
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 8/8] sd: Implement support for ZBC devices
  2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
                   ` (6 preceding siblings ...)
  2015-07-31 13:36 ` [PATCH 7/8] block: Introduce BLKPREP_DONE Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
  7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
	Hannes Reinecke

Implement ZBC support functions to read in the zone information
and setup the zone tree.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/Kconfig  |   8 ++
 drivers/scsi/Makefile |   1 +
 drivers/scsi/sd.c     | 125 ++++++++++++++--
 drivers/scsi/sd.h     |  34 +++++
 drivers/scsi/sd_zbc.c | 390 ++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 547 insertions(+), 11 deletions(-)
 create mode 100644 drivers/scsi/sd_zbc.c

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 456e1567..4135448 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -200,6 +200,14 @@ config SCSI_ENCLOSURE
 	  it has an enclosure device.  Selecting this option will just allow
 	  certain enclosure conditions to be reported and is not required.
 
+config SCSI_ZBC
+	bool "SCSI ZBC (zoned block commands) Support"
+	depends on SCSI && BLK_DEV_ZONED
+	help
+	  Enable support for ZBC (zoned block commands) devices.
+
+	  If unsure say N.
+
 config SCSI_CONSTANTS
 	bool "Verbose SCSI error reporting (kernel size +=75K)"
 	depends on SCSI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 91209e3..8893305 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -176,6 +176,7 @@ hv_storvsc-y			:= storvsc_drv.o
 
 sd_mod-objs	:= sd.o
 sd_mod-$(CONFIG_BLK_DEV_INTEGRITY) += sd_dif.o
+sd_mod-$(CONFIG_SCSI_ZBC) += sd_zbc.o
 
 sr_mod-objs	:= sr.o sr_ioctl.o sr_vendor.o
 ncr53c8xx-flags-$(CONFIG_SCSI_ZALON) \
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index f909684..3f20f86 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -91,6 +91,7 @@ MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK15_MAJOR);
 MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK);
 MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD);
 MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC);
+MODULE_ALIAS_SCSI_DEVICE(TYPE_ZBC);
 
 #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
 #define SD_MINORS	16
@@ -161,7 +162,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
 	static const char temp[] = "temporary ";
 	int len;
 
-	if (sdp->type != TYPE_DISK)
+	if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC)
 		/* no cache control on RBC devices; theoretically they
 		 * can do it, but there's probably so many exceptions
 		 * it's not worth the risk */
@@ -259,7 +260,7 @@ allow_restart_store(struct device *dev, struct device_attribute *attr,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
-	if (sdp->type != TYPE_DISK)
+	if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC)
 		return -EINVAL;
 
 	sdp->allow_restart = simple_strtoul(buf, NULL, 10);
@@ -390,7 +391,7 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
-	if (sdkp->zoned == 1) {
+	if (sdkp->zoned == 1 || sdp->type == TYPE_ZBC) {
 		if (!strncmp(buf, lbp_mode[SD_ZBC_RESET_WP], 20)) {
 			sd_config_discard(sdkp, SD_ZBC_RESET_WP);
 			return count;
@@ -464,7 +465,7 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
-	if (sdp->type != TYPE_DISK)
+	if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC)
 		return -EINVAL;
 
 	err = kstrtoul(buf, 10, &max);
@@ -713,6 +714,10 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
 	int ret = 0;
 	char *buf;
 	struct page *page = NULL;
+#ifdef CONFIG_SCSI_ZBC
+	struct blk_zone *zone;
+	unsigned long flags;
+#endif
 
 	sector >>= ilog2(sdp->sector_size) - 9;
 	nr_sectors >>= ilog2(sdp->sector_size) - 9;
@@ -762,6 +767,52 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
 		break;
 
 	case SD_ZBC_RESET_WP:
+#ifdef CONFIG_SCSI_ZBC
+		zone = blk_lookup_zone(rq->q, sector);
+		if (!zone) {
+			ret = BLKPREP_KILL;
+			goto out;
+		}
+		spin_lock_irqsave(&zone->lock, flags);
+		if (zone->state == BLK_ZONE_BUSY) {
+			sd_printk(KERN_INFO, sdkp,
+				  "Discarding busy zone %llu/%llu\n",
+				  zone->start, zone->len);
+			spin_unlock_irqrestore(&zone->lock, flags);
+			ret = BLKPREP_DEFER;
+			goto out;
+		}
+		if (!blk_zone_is_smr(zone)) {
+			sd_printk(KERN_INFO, sdkp,
+				  "Discarding %s zone %llu/%llu\n",
+				  blk_zone_is_cmr(zone) ? "CMR" : "unknown",
+				  zone->start, zone->len);
+			spin_unlock_irqrestore(&zone->lock, flags);
+			ret = BLKPREP_DONE;
+			goto out;
+		}
+		if (blk_zone_is_empty(zone)) {
+			spin_unlock_irqrestore(&zone->lock, flags);
+			ret = BLKPREP_DONE;
+			goto out;
+		}
+		if (zone->start != sector ||
+		    zone->len < nr_sectors) {
+			sd_printk(KERN_INFO, sdkp,
+				  "Misaligned RESET WP, start %llu/%zu "
+				  "len %llu/%u\n",
+				  zone->start, sector, zone->len, nr_sectors);
+			spin_unlock_irqrestore(&zone->lock, flags);
+			ret = BLKPREP_KILL;
+			goto out;
+		}
+		/*
+		 * Opportunistic setting, needs to be fixed up
+		 * if RESET WRITE POINTER fails.
+		 */
+		zone->wp = zone->start;
+		spin_unlock_irqrestore(&zone->lock, flags);
+#endif
 		cmd->cmd_len = 16;
 		cmd->cmnd[0] = ZBC_OUT;
 		cmd->cmnd[1] = ZO_RESET_WRITE_POINTER;
@@ -1016,6 +1067,13 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 			this_count = this_count >> 3;
 		}
 	}
+
+	if (sdkp->zoned || sdp->type == TYPE_ZBC) {
+		ret = sd_zbc_lookup_zone(sdkp, rq, block, this_count);
+		if (ret != BLKPREP_OK)
+			goto out;
+	}
+
 	if (rq_data_dir(rq) == WRITE) {
 		SCpnt->cmnd[0] = WRITE_6;
 
@@ -1693,6 +1751,13 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 			good_bytes = blk_rq_bytes(req);
 			scsi_set_resid(SCpnt, 0);
 		} else {
+#ifdef CONFIG_SCSI_ZBC
+			if (op == ZBC_OUT)
+				/* RESET WRITE POINTER failed */
+				sd_zbc_update_zones(sdkp,
+						    blk_rq_pos(req),
+						    512, true);
+#endif
 			good_bytes = 0;
 			scsi_set_resid(SCpnt, blk_rq_bytes(req));
 		}
@@ -1756,6 +1821,26 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 				}
 			}
 		}
+		if (sshdr.asc == 0x21) {
+			/*
+			 * ZBC: read beyond the write pointer position.
+			 * Clear out error and return the buffer as-is.
+			 */
+			if (sshdr.ascq == 0x06) {
+				good_bytes = blk_rq_bytes(req);
+				scsi_set_resid(SCpnt, 0);
+			}
+#ifdef CONFIG_SCSI_ZBC
+			/*
+			 * ZBC: Unaligned write command.
+			 * Write did not start a write pointer position.
+			 */
+			if (sshdr.ascq == 0x04)
+				sd_zbc_update_zones(sdkp,
+						    blk_rq_pos(req),
+						    512, true);
+#endif
+		}
 		break;
 	default:
 		break;
@@ -1895,9 +1980,8 @@ sd_spinup_disk(struct scsi_disk *sdkp)
 	}
 }
 
-static int
-sd_zbc_report_zones(struct scsi_disk *sdkp, sector_t start_lba,
-		    unsigned char *buffer, int bufflen )
+int sd_zbc_report_zones(struct scsi_disk *sdkp, sector_t start_lba,
+			unsigned char *buffer, int bufflen )
 {
 	struct scsi_device *sdp = sdkp->device;
 	const int timeout = sdp->request_queue->rq_timeout
@@ -2575,7 +2659,7 @@ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer)
 	struct scsi_mode_data data;
 	struct scsi_sense_hdr sshdr;
 
-	if (sdp->type != TYPE_DISK)
+	if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC)
 		return;
 
 	if (sdkp->protection_type == 0)
@@ -2768,14 +2852,18 @@ static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer)
 
 static void sd_read_zones(struct scsi_disk *sdkp, unsigned char *buffer)
 {
+	struct scsi_device *sdp = sdkp->device;
 	int retval;
 	unsigned char *desc;
 	u32 rep_len;
 	u8 same;
 	u64 zone_len;
 
-	if (sdkp->zoned != 1)
-		/* Device managed, no special handling required */
+	if (sdkp->zoned != 1 && sdp->type != TYPE_ZBC)
+		/*
+		 * Device managed or normal SCSI disk,
+		 * no special handling required
+		 */
 		return;
 
 	retval = sd_zbc_report_zones(sdkp, 0, buffer, SD_BUF_SIZE);
@@ -2875,6 +2963,9 @@ static int sd_revalidate_disk(struct gendisk *disk)
 
 	sdkp->first_scan = 0;
 
+	if (sdkp->zoned || sdp->type == TYPE_ZBC)
+		sd_zbc_setup(sdkp, buffer, SD_BUF_SIZE);
+
 	/*
 	 * We now have all cache related info, determine how we deal
 	 * with flush requests.
@@ -3047,9 +3138,16 @@ static int sd_probe(struct device *dev)
 
 	scsi_autopm_get_device(sdp);
 	error = -ENODEV;
-	if (sdp->type != TYPE_DISK && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC)
+	if (sdp->type != TYPE_DISK &&
+	    sdp->type != TYPE_ZBC &&
+	    sdp->type != TYPE_MOD &&
+	    sdp->type != TYPE_RBC)
 		goto out;
 
+#ifndef CONFIG_SCSI_ZBC
+	if (sdp->type == TYPE_ZBC)
+		goto out;
+#endif
 	SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp,
 					"sd_probe\n"));
 
@@ -3153,6 +3251,8 @@ static int sd_remove(struct device *dev)
 	del_gendisk(sdkp->disk);
 	sd_shutdown(dev);
 
+	sd_zbc_remove(sdkp);
+
 	blk_register_region(devt, SD_MINORS, NULL,
 			    sd_default_probe, NULL, NULL);
 
@@ -3183,6 +3283,9 @@ static void scsi_disk_release(struct device *dev)
 	spin_unlock(&sd_index_lock);
 
 	blk_integrity_unregister(disk);
+#ifdef CONFIG_SCSI_ZBC
+	drain_workqueue(sdkp->zone_work_q);
+#endif
 	disk->private_data = NULL;
 	put_disk(disk);
 	put_device(&sdkp->device->sdev_gendev);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 4edcf54..e911306 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -65,6 +65,10 @@ struct scsi_disk {
 	struct scsi_device *device;
 	struct device	dev;
 	struct gendisk	*disk;
+#ifdef CONFIG_SCSI_ZBC
+	struct workqueue_struct *zone_work_q;
+	atomic_t	zone_reset;
+#endif
 	atomic_t	openers;
 	sector_t	capacity;	/* size in 512-byte sectors */
 	u32		max_xfer_blocks;
@@ -260,4 +264,34 @@ static inline void sd_dif_complete(struct scsi_cmnd *cmd, unsigned int a)
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
+#ifdef CONFIG_SCSI_ZBC
+
+extern int sd_zbc_report_zones(struct scsi_disk *sdkp, sector_t start_lba,
+			       unsigned char *buffer, int bufflen );
+extern int sd_zbc_setup(struct scsi_disk *, unsigned char *, int);
+extern void sd_zbc_remove(struct scsi_disk *);
+extern void sd_zbc_reset_zones(struct scsi_disk *);
+extern int sd_zbc_lookup_zone(struct scsi_disk *, struct request *,
+			      sector_t, unsigned int);
+extern void sd_zbc_update_zones(struct scsi_disk *, sector_t, int, bool);
+extern void sd_zbc_refresh_zone_work(struct work_struct *);
+
+#else /* CONFIG_SCSI_ZBC */
+
+static inline int sd_zbc_setup(struct scsi_disk *sdkp,
+			       unsigned char *buf, int buf_len)
+{
+	return 0;
+}
+
+static inline int sd_zbc_lookup_zone(struct scsi_disk *sdkp,
+				     struct request *rq, sector_t sector,
+				     unsigned int num_sectors)
+{
+	return BLKPREP_OK;
+}
+
+static inline void sd_zbc_remove(struct scsi_disk *sdkp) {}
+#endif /* CONFIG_SCSI_ZBC */
+
 #endif /* _SCSI_DISK_H */
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
new file mode 100644
index 0000000..67f18cb
--- /dev/null
+++ b/drivers/scsi/sd_zbc.c
@@ -0,0 +1,390 @@
+/*
+ * sd_zbc.c - SCSI Zoned Block commands
+ *
+ * Copyright (C) 2014-2015 SUSE Linux GmbH
+ * Written by: Hannes Reinecke <hare@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/rbtree.h>
+
+#include <asm/unaligned.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_dbg.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_driver.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_eh.h>
+
+#include "sd.h"
+#include "scsi_priv.h"
+
+enum zbc_zone_cond {
+	ZBC_ZONE_COND_NO_WP,
+	ZBC_ZONE_COND_EMPTY,
+	ZBC_ZONE_COND_IMPLICIT_OPEN,
+	ZBC_ZONE_COND_EXPLICIT_OPEN,
+	ZBC_ZONE_COND_CLOSED,
+	ZBC_ZONE_COND_READONLY = 0xd,
+	ZBC_ZONE_COND_FULL,
+	ZBC_ZONE_COND_OFFLINE,
+};
+
+#define SD_ZBC_BUF_SIZE 131072
+#define SD_ZBC_QUEUE_DELAY 5
+
+#undef SD_ZBC_DEBUG
+
+struct zbc_update_work {
+	struct work_struct zone_work;
+	struct scsi_disk *sdkp;
+	spinlock_t	zone_lock;
+	sector_t	zone_lba;
+	int		zone_buflen;
+	bool		zone_update;
+	char		zone_buf[0];
+};
+
+struct blk_zone *zbc_desc_to_zone(struct scsi_disk *sdkp, unsigned char *rec)
+{
+	struct blk_zone *zone;
+	enum zbc_zone_cond zone_cond;
+	u64 wp = (u64)-1;
+
+	zone = kzalloc(sizeof(struct blk_zone), GFP_KERNEL);
+	if (!zone)
+		return NULL;
+
+	spin_lock_init(&zone->lock);
+	zone->type = rec[0] & 0xf;
+	zone_cond = (rec[1] >> 4) & 0xf;
+	zone->len = get_unaligned_be64(&rec[8]);
+	zone->start = get_unaligned_be64(&rec[16]);
+
+	if (blk_zone_is_smr(zone)) {
+		wp = get_unaligned_be64(&rec[24]);
+		if (zone_cond == ZBC_ZONE_COND_READONLY) {
+			zone->state = BLK_ZONE_READONLY;
+		} else if (zone_cond == ZBC_ZONE_COND_OFFLINE) {
+			zone->state = BLK_ZONE_OFFLINE;
+		} else {
+			zone->state = BLK_ZONE_OPEN;
+		}
+	} else
+		zone->state = BLK_ZONE_NO_WP;
+
+	zone->wp = wp;
+	/*
+	 * Fixup block zone state
+	 */
+	if (zone_cond == ZBC_ZONE_COND_EMPTY &&
+	    zone->wp != zone->start) {
+#ifdef SD_ZBC_DEBUG
+		sd_printk(KERN_INFO, sdkp,
+			  "zone %llu state EMPTY wp %llu: adjust wp\n",
+			  zone->start, zone->wp);
+#endif
+		zone->wp = zone->start;
+	}
+	if (zone_cond == ZBC_ZONE_COND_FULL &&
+	    zone->wp != zone->start + zone->len) {
+#ifdef SD_ZBC_DEBUG
+		sd_printk(KERN_INFO, sdkp,
+			  "zone %llu state FULL wp %llu: adjust wp\n",
+			  zone->start, zone->wp);
+#endif
+		zone->wp = zone->start + zone->len;
+	}
+
+	return zone;
+}
+
+sector_t zbc_parse_zones(struct scsi_disk *sdkp, unsigned char *buf,
+			 unsigned int buf_len, sector_t start_lba)
+{
+	struct request_queue *q = sdkp->disk->queue;
+	unsigned char *rec = buf;
+	int rec_no = 0;
+	unsigned int list_length;
+	sector_t next_lba = -1;
+	u8 same;
+
+	/* Parse REPORT ZONES header */
+	list_length = get_unaligned_be32(&buf[0]);
+	same = buf[4] & 0xf;
+	rec = buf + 64;
+	list_length += 64;
+
+	if (list_length < buf_len)
+		buf_len = list_length;
+
+	while (rec < buf + buf_len) {
+		struct blk_zone *this, *old;
+		unsigned long flags;
+
+		this = zbc_desc_to_zone(sdkp, rec);
+		if (!this)
+			break;
+
+		next_lba = this->start + this->len;
+		old = blk_insert_zone(q, this);
+		if (old) {
+			spin_lock_irqsave(&old->lock, flags);
+			if (blk_zone_is_smr(old)) {
+				old->wp = this->wp;
+				old->state = this->state;
+			}
+			spin_unlock_irqrestore(&old->lock, flags);
+			kfree(this);
+		}
+		rec += 64;
+		rec_no++;
+	}
+
+#ifdef SD_ZBC_DEBUG
+	sd_printk(KERN_INFO, sdkp,
+		  "Inserted %d zones, next lba %zu len %d\n",
+		  rec_no, next_lba, list_length);
+#endif
+	return next_lba;
+}
+
+void sd_zbc_refresh_zone_work(struct work_struct *work)
+{
+	struct zbc_update_work *zbc_work =
+		container_of(work, struct zbc_update_work, zone_work);
+	struct request_queue *q = zbc_work->sdkp->disk->queue;
+	unsigned long flags;
+	unsigned int zone_buflen;
+	int ret;
+	sector_t last_lba;
+
+	zone_buflen = zbc_work->zone_buflen;
+	ret = sd_zbc_report_zones(zbc_work->sdkp, zbc_work->zone_lba,
+				  zbc_work->zone_buf, zone_buflen);
+	if (ret)
+		goto done_free;
+
+	last_lba = zbc_parse_zones(zbc_work->sdkp, zbc_work->zone_buf,
+				   zone_buflen, zbc_work->zone_lba);
+	if (last_lba != -1 && last_lba < zbc_work->sdkp->capacity &&
+	    !zbc_work->zone_update) {
+		if (atomic_read(&zbc_work->sdkp->zone_reset)) {
+			sd_printk(KERN_INFO, zbc_work->sdkp,
+				  "zones in reset, cancelling refresh\n");
+			goto done_free;
+		}
+
+		zbc_work->zone_lba = last_lba;
+		queue_work(zbc_work->sdkp->zone_work_q, &zbc_work->zone_work);
+		/* Kick request queue to be on the safe side */
+		goto done_start_queue;
+	}
+done_free:
+	kfree(zbc_work);
+done_start_queue:
+	spin_lock_irqsave(q->queue_lock, flags);
+	blk_start_queue(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+void sd_zbc_update_zones(struct scsi_disk *sdkp, sector_t lba, int bufsize,
+			 bool update)
+{
+	struct request_queue *q = sdkp->disk->queue;
+	struct zbc_update_work *zbc_work;
+	struct blk_zone *zone;
+	struct rb_node *node;
+	int zone_num = 0, zone_busy = 0, num_rec;
+	sector_t next_lba = lba;
+
+	if (atomic_read(&sdkp->zone_reset)) {
+		sd_printk(KERN_INFO, sdkp,
+			  "zones in reset, not starting update\n");
+		return;
+	}
+
+retry:
+	zbc_work = kzalloc(sizeof(struct zbc_update_work) + bufsize,
+			   GFP_KERNEL);
+	if (!zbc_work) {
+		if (bufsize > 512) {
+			sd_printk(KERN_INFO, sdkp,
+				  "retry with buffer size %d\n", bufsize);
+			bufsize = bufsize >> 1;
+			goto retry;
+		}
+		sd_printk(KERN_INFO, sdkp,
+			  "failed to allocate %d bytes\n", bufsize);
+		return;
+	}
+	zbc_work->zone_lba = lba;
+	zbc_work->zone_buflen = bufsize;
+	zbc_work->zone_update = update;
+	zbc_work->sdkp = sdkp;
+	INIT_WORK(&zbc_work->zone_work, sd_zbc_refresh_zone_work);
+	num_rec = (bufsize / 64) - 1;
+
+	for (node = rb_first(&q->zones); node; node = rb_next(node)) {
+		unsigned long flags;
+
+		zone = rb_entry(node, struct blk_zone, node);
+		if (update) {
+			if (num_rec == 0)
+				break;
+			if (zone->start != next_lba)
+				continue;
+			next_lba += zone->len;
+			num_rec--;
+		}
+		spin_lock_irqsave(&zone->lock, flags);
+		if (blk_zone_is_smr(zone)) {
+			if (zone->state == BLK_ZONE_BUSY) {
+				zone_busy++;
+			} else {
+				zone->state = BLK_ZONE_BUSY;
+				zone->wp = zone->start;
+			}
+			zone_num++;
+		}
+		spin_unlock_irqrestore(&zone->lock, flags);
+	}
+	if (zone_num && (zone_num == zone_busy)) {
+		sd_printk(KERN_INFO, sdkp,
+			  "zone %s for %zu in progress\n",
+			  update ? "update" : "refresh", lba);
+		kfree(zbc_work);
+		return;
+	}
+
+	if (atomic_read(&sdkp->zone_reset)) {
+		sd_printk(KERN_INFO, sdkp,
+			  "zones in reset, not starting update\n");
+		kfree(zbc_work);
+		return;
+	}
+	if (!queue_work(sdkp->zone_work_q, &zbc_work->zone_work)) {
+		sd_printk(KERN_INFO, sdkp,
+			  "zone update already queued?\n");
+		kfree(zbc_work);
+	}
+}
+
+int sd_zbc_lookup_zone(struct scsi_disk *sdkp, struct request *rq,
+		       sector_t sector, unsigned int num_sectors)
+{
+	struct request_queue *q = sdkp->disk->queue;
+	struct blk_zone *zone = NULL;
+	int ret = BLKPREP_OK;
+	unsigned long flags;
+
+	zone = blk_lookup_zone(q, sector);
+	/* Might happen during zone initialization */
+	if (!zone) {
+		if (printk_ratelimit())
+			sd_printk(KERN_INFO, sdkp,
+				  "zone for sector %zu not found, %s\n",
+				  sector, sdkp->device->type == TYPE_ZBC ?
+				  "deferring" : "skipping");
+		if (sdkp->device->type != TYPE_ZBC)
+			return BLKPREP_OK;
+		blk_delay_queue(q, 5);
+		return BLKPREP_DEFER;
+	}
+	spin_lock_irqsave(&zone->lock, flags);
+	if (zone->state == BLK_ZONE_UNKNOWN ||
+	    zone->state == BLK_ZONE_BUSY) {
+		if (printk_ratelimit())
+			sd_printk(KERN_INFO, sdkp,
+				  "zone %llu state %x, deferring\n",
+				  zone->start, zone->state);
+		blk_delay_queue(q, 5);
+		ret = BLKPREP_DEFER;
+	} else {
+		if (rq_data_dir(rq) == WRITE) {
+			if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
+				goto out;
+			if (blk_zone_is_full(zone)) {
+#ifdef SD_ZBC_DEBUG
+				sd_printk(KERN_ERR, sdkp,
+					  "Write to full zone %zu/%llu\n",
+					  sector, zone->wp);
+#endif
+				ret = BLKPREP_KILL;
+				goto out;
+			}
+			if (zone->wp != sector) {
+#ifdef SD_ZBC_DEBUG
+				sd_printk(KERN_ERR, sdkp,
+					  "Misaligned write %zu/%llu\n",
+					  sector, zone->wp);
+#endif
+				ret = BLKPREP_KILL;
+				goto out;
+			}
+			zone->wp += num_sectors;
+		} else if (blk_zone_is_smr(zone) && (zone->wp <= sector)) {
+#ifdef SD_ZBC_DEBUG
+			sd_printk(KERN_INFO, sdkp,
+				    "Read beyond wp %zu/%llu\n",
+				    sector, zone->wp);
+#endif
+			ret = BLKPREP_DONE;
+		}
+	}
+out:
+	spin_unlock_irqrestore(&zone->lock, flags);
+
+	return ret;
+}
+
+int sd_zbc_setup(struct scsi_disk *sdkp, unsigned char *buf, int buf_len)
+{
+	if (!sdkp->zone_work_q) {
+		char wq_name[32];
+
+		sprintf(wq_name, "zbc_wq_%s", sdkp->disk->disk_name);
+		sdkp->zone_work_q = create_singlethread_workqueue(wq_name);
+		if (!sdkp->zone_work_q) {
+			sdev_printk(KERN_WARNING, sdkp->device,
+				    "create zoned disk workqueue failed\n");
+			return -ENOMEM;
+		}
+		atomic_set(&sdkp->zone_reset, 0);
+	} else {
+		atomic_inc(&sdkp->zone_reset);
+		drain_workqueue(sdkp->zone_work_q);
+		atomic_set(&sdkp->zone_reset, 0);
+	}
+
+	sd_zbc_update_zones(sdkp, 0, SD_ZBC_BUF_SIZE, false);
+
+	blk_queue_io_min(sdkp->disk->queue, 4);
+	return 0;
+}
+
+void sd_zbc_remove(struct scsi_disk *sdkp)
+{
+	if (sdkp->zone_work_q) {
+		atomic_inc(&sdkp->zone_reset);
+		drain_workqueue(sdkp->zone_work_q);
+		destroy_workqueue(sdkp->zone_work_q);
+	}
+}
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-07-31 13:39 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
2015-07-31 13:36 ` [PATCH 1/8] blk-sysfs: Add 'chunk_sectors' to sysfs attributes Hannes Reinecke
2015-07-31 13:36 ` [PATCH 2/8] block: update chunk_sectors in blk_stack_limits() Hannes Reinecke
2015-07-31 13:36 ` [PATCH 3/8] sd: Set chunk_sectors to zone size Hannes Reinecke
2015-07-31 13:36 ` [PATCH 4/8] sd: Implement new RESET_WP provisioning mode Hannes Reinecke
2015-07-31 13:36 ` [PATCH 5/8] block: Implement support for zoned block devices Hannes Reinecke
2015-07-31 13:36 ` [PATCH 6/8] block: Add 'zoned' sysfs queue attribute Hannes Reinecke
2015-07-31 13:36 ` [PATCH 7/8] block: Introduce BLKPREP_DONE Hannes Reinecke
2015-07-31 13:36 ` [PATCH 8/8] sd: Implement support for ZBC devices Hannes Reinecke

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox