* [PATCH 1/8] blk-sysfs: Add 'chunk_sectors' to sysfs attributes
2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
2015-07-31 13:36 ` [PATCH 2/8] block: update chunk_sectors in blk_stack_limits() Hannes Reinecke
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
To: James Bottomley
Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
Hannes Reinecke
The queue limits already have a 'chunk_sectors' setting, so
we should be presenting it via sysfs.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
block/blk-sysfs.c | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 6264b38..e419f1f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -130,6 +130,26 @@ static ssize_t queue_physical_block_size_show(struct request_queue *q, char *pag
return queue_var_show(queue_physical_block_size(q), page);
}
+static ssize_t queue_chunk_sectors_show(struct request_queue *q, char *page)
+{
+ return queue_var_show(q->limits.chunk_sectors, page);
+}
+
+static ssize_t
+queue_chunk_sectors_store(struct request_queue *q, const char *page, size_t count)
+{
+ unsigned long chunk_sectors;
+
+ ssize_t ret = queue_var_store(&chunk_sectors, page, count);
+ if (ret < 0)
+ return ret;
+ spin_lock_irq(q->queue_lock);
+ blk_queue_chunk_sectors(q, chunk_sectors);
+ spin_unlock_irq(q->queue_lock);
+
+ return ret;
+}
+
static ssize_t queue_io_min_show(struct request_queue *q, char *page)
{
return queue_var_show(queue_io_min(q), page);
@@ -345,6 +365,12 @@ static struct queue_sysfs_entry queue_physical_block_size_entry = {
.show = queue_physical_block_size_show,
};
+static struct queue_sysfs_entry queue_chunk_sectors_entry = {
+ .attr = {.name = "chunk_sectors", .mode = S_IRUGO | S_IWUSR },
+ .show = queue_chunk_sectors_show,
+ .store = queue_chunk_sectors_store,
+};
+
static struct queue_sysfs_entry queue_io_min_entry = {
.attr = {.name = "minimum_io_size", .mode = S_IRUGO },
.show = queue_io_min_show,
@@ -417,6 +443,7 @@ static struct attribute *default_attrs[] = {
&queue_hw_sector_size_entry.attr,
&queue_logical_block_size_entry.attr,
&queue_physical_block_size_entry.attr,
+ &queue_chunk_sectors_entry.attr,
&queue_io_min_entry.attr,
&queue_io_opt_entry.attr,
&queue_discard_granularity_entry.attr,
--
1.8.5.2
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 2/8] block: update chunk_sectors in blk_stack_limits()
2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
2015-07-31 13:36 ` [PATCH 1/8] blk-sysfs: Add 'chunk_sectors' to sysfs attributes Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
2015-07-31 13:36 ` [PATCH 3/8] sd: Set chunk_sectors to zone size Hannes Reinecke
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
To: James Bottomley
Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
Hannes Reinecke, Hannes Reinecke
Signed-off-by: Hannes Reinecke <hare@suse.com>
---
block/blk-settings.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 12600bf..63aa067 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -647,6 +647,9 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->discard_granularity;
}
+ if (b->chunk_sectors)
+ t->chunk_sectors = max(t->chunk_sectors, b->chunk_sectors);
+
return ret;
}
EXPORT_SYMBOL(blk_stack_limits);
--
1.8.5.2
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 3/8] sd: Set chunk_sectors to zone size
2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
2015-07-31 13:36 ` [PATCH 1/8] blk-sysfs: Add 'chunk_sectors' to sysfs attributes Hannes Reinecke
2015-07-31 13:36 ` [PATCH 2/8] block: update chunk_sectors in blk_stack_limits() Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
2015-07-31 13:36 ` [PATCH 4/8] sd: Implement new RESET_WP provisioning mode Hannes Reinecke
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
To: James Bottomley
Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
Hannes Reinecke, Hannes Reinecke
For ZBC devices I/O must not cross zone boundaries, so setup
the 'chunk_sectors' block queue setting to the zone size.
This is only valid for REPORT ZONES SAME type 2 or 3;
for other types the zone sizes might be different
for individual zones. So issue a warning if the type is
found to be different.
Signed-off-by: Hannes Reinecke <hare@suse.com>
---
drivers/scsi/sd.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
drivers/scsi/sd.h | 1 +
2 files changed, 77 insertions(+)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3b2fcb4..b5d412a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1861,6 +1861,45 @@ sd_spinup_disk(struct scsi_disk *sdkp)
}
}
+static int
+sd_zbc_report_zones(struct scsi_disk *sdkp, sector_t start_lba,
+ unsigned char *buffer, int bufflen )
+{
+ struct scsi_device *sdp = sdkp->device;
+ const int timeout = sdp->request_queue->rq_timeout
+ * SD_FLUSH_TIMEOUT_MULTIPLIER;
+ struct scsi_sense_hdr sshdr;
+ unsigned char cmd[16];
+ int result;
+
+ if (!scsi_device_online(sdp)) {
+ sd_printk(KERN_INFO, sdkp, "device not online\n");
+ return -ENODEV;
+ }
+
+ sd_printk(KERN_INFO, sdkp, "REPORT ZONES lba %zu len %d\n",
+ start_lba, bufflen);
+
+ memset(cmd, 0, 16);
+ cmd[0] = ZBC_IN;
+ cmd[1] = ZI_REPORT_ZONES;
+ put_unaligned_be64(start_lba, &cmd[2]);
+ put_unaligned_be32(bufflen, &cmd[10]);
+ memset(buffer, 0, bufflen);
+
+ result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
+ buffer, bufflen, &sshdr,
+ timeout, SD_MAX_RETRIES, NULL);
+
+ if (result) {
+ sd_printk(KERN_NOTICE, sdkp,
+ "REPORT ZONES lba %zu failed with %d/%d\n",
+ start_lba, host_byte(result), driver_byte(result));
+
+ return -EIO;
+ }
+ return 0;
+}
/*
* Determine whether disk supports Data Integrity Field.
@@ -2631,6 +2670,7 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp)
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, sdkp->disk->queue);
}
+ sdkp->zoned = (buffer[8] >> 4) & 3;
out:
kfree(buffer);
}
@@ -2692,6 +2732,41 @@ static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer)
sdkp->ws10 = 1;
}
+static void sd_read_zones(struct scsi_disk *sdkp, unsigned char *buffer)
+{
+ int retval;
+ unsigned char *desc;
+ u32 rep_len;
+ u8 same;
+ u64 zone_len;
+
+ if (sdkp->zoned != 1)
+ /* Device managed, no special handling required */
+ return;
+
+ retval = sd_zbc_report_zones(sdkp, 0, buffer, SD_BUF_SIZE);
+ if (retval < 0)
+ return;
+
+ rep_len = get_unaligned_be32(&buffer[0]);
+ same = buffer[4] & 0xf;
+ if (same != 2 && same != 3) {
+ sd_printk(KERN_WARNING, sdkp,
+ "REPORT ZONES SAME type %d not supported\n", same);
+ return;
+ }
+ if (rep_len < 64) {
+ sd_printk(KERN_WARNING, sdkp,
+ "REPORT ZONES report invalid length %u\n",
+ rep_len);
+ return;
+ }
+ /* Read the zone length from the first zone descriptor */
+ desc = &buffer[64];
+ zone_len = get_unaligned_be64(&desc[8]);
+ blk_queue_chunk_sectors(sdkp->disk->queue, zone_len);
+}
+
static int sd_try_extended_inquiry(struct scsi_device *sdp)
{
/* Attempt VPD inquiry if the device blacklist explicitly calls
@@ -2757,6 +2832,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
sd_read_cache_type(sdkp, buffer);
sd_read_app_tag_own(sdkp, buffer);
sd_read_write_same(sdkp, buffer);
+ sd_read_zones(sdkp, buffer);
}
sdkp->first_scan = 0;
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 63ba5ca..6500d51 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -93,6 +93,7 @@ struct scsi_disk {
unsigned lbpvpd : 1;
unsigned ws10 : 1;
unsigned ws16 : 1;
+ unsigned zoned: 2; /* ZONED field */
};
#define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev)
--
1.8.5.2
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 4/8] sd: Implement new RESET_WP provisioning mode
2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
` (2 preceding siblings ...)
2015-07-31 13:36 ` [PATCH 3/8] sd: Set chunk_sectors to zone size Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
2015-07-31 13:36 ` [PATCH 5/8] block: Implement support for zoned block devices Hannes Reinecke
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
To: James Bottomley
Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
Hannes Reinecke
We can map the RESET WRITE POINTER command onto a 'discard'
request.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/scsi/sd.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++---------
drivers/scsi/sd.h | 1 +
2 files changed, 48 insertions(+), 9 deletions(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b5d412a..f909684 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -367,6 +367,7 @@ static const char *lbp_mode[] = {
[SD_LBP_WS16] = "writesame_16",
[SD_LBP_WS10] = "writesame_10",
[SD_LBP_ZERO] = "writesame_zero",
+ [SD_ZBC_RESET_WP] = "reset_wp",
[SD_LBP_DISABLE] = "disabled",
};
@@ -389,6 +390,13 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr,
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
+ if (sdkp->zoned == 1) {
+ if (!strncmp(buf, lbp_mode[SD_ZBC_RESET_WP], 20)) {
+ sd_config_discard(sdkp, SD_ZBC_RESET_WP);
+ return count;
+ }
+ return -EINVAL;
+ }
if (sdp->type != TYPE_DISK)
return -EINVAL;
@@ -668,6 +676,12 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
q->limits.discard_zeroes_data = sdkp->lbprz;
break;
+ case SD_ZBC_RESET_WP:
+ max_blocks = min_not_zero(sdkp->max_unmap_blocks,
+ (u32)SD_MAX_WS16_BLOCKS);
+ q->limits.discard_zeroes_data = 1;
+ break;
+
case SD_LBP_ZERO:
max_blocks = min_not_zero(sdkp->max_ws_blocks,
(u32)SD_MAX_WS10_BLOCKS);
@@ -696,16 +710,18 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
unsigned int nr_sectors = blk_rq_sectors(rq);
unsigned int nr_bytes = blk_rq_bytes(rq);
unsigned int len;
- int ret;
+ int ret = 0;
char *buf;
- struct page *page;
+ struct page *page = NULL;
sector >>= ilog2(sdp->sector_size) - 9;
nr_sectors >>= ilog2(sdp->sector_size) - 9;
- page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
- if (!page)
- return BLKPREP_DEFER;
+ if (sdkp->provisioning_mode != SD_ZBC_RESET_WP) {
+ page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+ if (!page)
+ return BLKPREP_DEFER;
+ }
switch (sdkp->provisioning_mode) {
case SD_LBP_UNMAP:
@@ -745,6 +761,16 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
len = sdkp->device->sector_size;
break;
+ case SD_ZBC_RESET_WP:
+ cmd->cmd_len = 16;
+ cmd->cmnd[0] = ZBC_OUT;
+ cmd->cmnd[1] = ZO_RESET_WRITE_POINTER;
+ put_unaligned_be64(sector, &cmd->cmnd[2]);
+ /* Reset Write Pointer doesn't have a payload */
+ len = 0;
+ cmd->sc_data_direction = DMA_NONE;
+ break;
+
default:
ret = BLKPREP_KILL;
goto out;
@@ -764,12 +790,14 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
* discarded on disk. This allows us to report completion on the full
* amount of blocks described by the request.
*/
- blk_add_request_payload(rq, page, len);
- ret = scsi_init_io(cmd);
+ if (len) {
+ blk_add_request_payload(rq, page, len);
+ ret = scsi_init_io(cmd);
+ }
rq->__data_len = nr_bytes;
out:
- if (ret != BLKPREP_OK)
+ if (page && ret != BLKPREP_OK)
__free_page(page);
return ret;
}
@@ -1136,7 +1164,8 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
{
struct request *rq = SCpnt->request;
- if (rq->cmd_flags & REQ_DISCARD)
+ if (rq->cmd_flags & REQ_DISCARD &&
+ rq->completion_data)
__free_page(rq->completion_data);
if (SCpnt->cmnd != rq->cmd) {
@@ -1657,6 +1686,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
int sense_deferred = 0;
unsigned char op = SCpnt->cmnd[0];
unsigned char unmap = SCpnt->cmnd[1] & 8;
+ unsigned char sa = SCpnt->cmnd[1] & 0xf;
if (req->cmd_flags & REQ_DISCARD || req->cmd_flags & REQ_WRITE_SAME) {
if (!result) {
@@ -1708,6 +1738,10 @@ static int sd_done(struct scsi_cmnd *SCpnt)
case UNMAP:
sd_config_discard(sdkp, SD_LBP_DISABLE);
break;
+ case ZBC_OUT:
+ if (sa == ZO_RESET_WRITE_POINTER)
+ sd_config_discard(sdkp, SD_LBP_DISABLE);
+ break;
case WRITE_SAME_16:
case WRITE_SAME:
if (unmap)
@@ -2764,6 +2798,10 @@ static void sd_read_zones(struct scsi_disk *sdkp, unsigned char *buffer)
/* Read the zone length from the first zone descriptor */
desc = &buffer[64];
zone_len = get_unaligned_be64(&desc[8]);
+ sdkp->max_unmap_blocks = zone_len;
+ sdkp->unmap_alignment = zone_len;
+ sdkp->unmap_granularity = zone_len;
+ sd_config_discard(sdkp, SD_ZBC_RESET_WP);
blk_queue_chunk_sectors(sdkp->disk->queue, zone_len);
}
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 6500d51..4edcf54 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -56,6 +56,7 @@ enum {
SD_LBP_WS16, /* Use WRITE SAME(16) with UNMAP bit */
SD_LBP_WS10, /* Use WRITE SAME(10) with UNMAP bit */
SD_LBP_ZERO, /* Use WRITE SAME(10) with zero payload */
+ SD_ZBC_RESET_WP, /* Use RESET WRITE POINTER */
SD_LBP_DISABLE, /* Discard disabled due to failed cmd */
};
--
1.8.5.2
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 5/8] block: Implement support for zoned block devices
2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
` (3 preceding siblings ...)
2015-07-31 13:36 ` [PATCH 4/8] sd: Implement new RESET_WP provisioning mode Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
2015-07-31 13:36 ` [PATCH 6/8] block: Add 'zoned' sysfs queue attribute Hannes Reinecke
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
To: James Bottomley
Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
Hannes Reinecke
Implement a RB-Tree holding the zone information and
add support functions for maintaining the RB-Tree.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
block/Kconfig | 9 +++++++
block/Makefile | 1 +
block/blk-core.c | 5 ++++
block/blk-zoned.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/blkdev.h | 47 +++++++++++++++++++++++++++++++++
5 files changed, 132 insertions(+)
create mode 100644 block/blk-zoned.c
diff --git a/block/Kconfig b/block/Kconfig
index 161491d..bc9c28ae 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -100,6 +100,15 @@ config BLK_DEV_THROTTLING
See Documentation/cgroups/blkio-controller.txt for more information.
+config BLK_DEV_ZONED
+ bool "Zoned block device support"
+ default n
+ ---help---
+ Block layer zoned block device support. This option enables
+ support for zoned block (ZAC/ZBC) devices.
+
+ Say yes here if you have a ZAC or ZBC storage device.
+
config BLK_CMDLINE_PARSER
bool "Block device command line partition parser"
default n
diff --git a/block/Makefile b/block/Makefile
index 00ecc97..171b572 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
+obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
diff --git a/block/blk-core.c b/block/blk-core.c
index 627ed0c..82d8ce9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -573,6 +573,8 @@ void blk_cleanup_queue(struct request_queue *q)
if (q->mq_ops)
blk_mq_free_queue(q);
+ blk_drop_zones(q);
+
spin_lock_irq(lock);
if (q->queue_lock != &q->__queue_lock)
q->queue_lock = &q->__queue_lock;
@@ -664,6 +666,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
#ifdef CONFIG_BLK_CGROUP
INIT_LIST_HEAD(&q->blkg_list);
#endif
+#ifdef CONFIG_BLK_DEV_ZONED
+ q->zones = RB_ROOT;
+#endif
INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
kobject_init(&q->kobj, &blk_queue_ktype);
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
new file mode 100644
index 0000000..975e863
--- /dev/null
+++ b/block/blk-zoned.c
@@ -0,0 +1,70 @@
+/*
+ * Zoned block device handling
+ *
+ * Copyright (c) 2015, Hannes Reinecke
+ * Copyright (c) 2015, SUSE Linux GmbH
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/rbtree.h>
+
+struct blk_zone *blk_lookup_zone(struct request_queue *q, sector_t lba)
+{
+ struct rb_root *root = &q->zones;
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct blk_zone *zone = container_of(node, struct blk_zone,
+ node);
+
+ if (lba < zone->start)
+ node = node->rb_left;
+ else if (lba >= zone->start + zone->len)
+ node = node->rb_right;
+ else
+ return zone;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(blk_lookup_zone);
+
+struct blk_zone *blk_insert_zone(struct request_queue *q, struct blk_zone *data)
+{
+ struct rb_root *root = &q->zones;
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct blk_zone *this = container_of(*new, struct blk_zone,
+ node);
+ parent = *new;
+ if (data->start + data->len <= this->start)
+ new = &((*new)->rb_left);
+ else if (data->start >= this->start + this->len)
+ new = &((*new)->rb_right);
+ else {
+ /* Return existing zone */
+ return this;
+ }
+ }
+ /* Add new node and rebalance tree. */
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(blk_insert_zone);
+
+void blk_drop_zones(struct request_queue *q)
+{
+ struct rb_root *root = &q->zones;
+ struct blk_zone *zone, *next;
+
+ rbtree_postorder_for_each_entry_safe(zone, next, root, node) {
+ kfree(zone);
+ }
+ q->zones = RB_ROOT;
+}
+EXPORT_SYMBOL_GPL(blk_drop_zones);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d4068c1..746ea82 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -255,6 +255,50 @@ struct blk_queue_tag {
#define BLK_SCSI_MAX_CMDS (256)
#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
+#ifdef CONFIG_BLK_DEV_ZONED
+enum blk_zone_type {
+ BLK_ZONE_TYPE_UNKNOWN,
+ BLK_ZONE_TYPE_CONVENTIONAL,
+ BLK_ZONE_TYPE_SEQWRITE_REQ,
+ BLK_ZONE_TYPE_SEQWRITE_PREF,
+ BLK_ZONE_TYPE_RESERVED,
+};
+
+enum blk_zone_state {
+ BLK_ZONE_UNKNOWN,
+ BLK_ZONE_NO_WP,
+ BLK_ZONE_OPEN,
+ BLK_ZONE_READONLY,
+ BLK_ZONE_OFFLINE,
+ BLK_ZONE_BUSY,
+};
+
+struct blk_zone {
+ struct rb_node node;
+ spinlock_t lock;
+ uint64_t start;
+ uint64_t len;
+ uint64_t wp;
+ enum blk_zone_type type;
+ enum blk_zone_state state;
+ void *private_data;
+};
+
+#define blk_zone_is_smr(z) ((z)->type == BLK_ZONE_TYPE_SEQWRITE_REQ || \
+ (z)->type == BLK_ZONE_TYPE_SEQWRITE_PREF)
+
+#define blk_zone_is_cmr(z) ((z)->type == BLK_ZONE_TYPE_CONVENTIONAL)
+#define blk_zone_is_full(z) ((z)->wp == (z)->start + (z)->len)
+#define blk_zone_is_empty(z) ((z)->wp == (z)->start)
+
+extern struct blk_zone *blk_lookup_zone(struct request_queue *, sector_t);
+extern struct blk_zone *blk_insert_zone(struct request_queue *,
+ struct blk_zone *);
+extern void blk_drop_zones(struct request_queue *);
+#else
+static inline void blk_drop_zones(struct request_queue *q) { };
+#endif
+
struct queue_limits {
unsigned long bounce_pfn;
unsigned long seg_boundary_mask;
@@ -420,6 +464,9 @@ struct request_queue {
struct queue_limits limits;
+#ifdef CONFIG_BLK_DEV_ZONED
+ struct rb_root zones;
+#endif
/*
* sg stuff
*/
--
1.8.5.2
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 6/8] block: Add 'zoned' sysfs queue attribute
2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
` (4 preceding siblings ...)
2015-07-31 13:36 ` [PATCH 5/8] block: Implement support for zoned block devices Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
2015-07-31 13:36 ` [PATCH 7/8] block: Introduce BLKPREP_DONE Hannes Reinecke
2015-07-31 13:36 ` [PATCH 8/8] sd: Implement support for ZBC devices Hannes Reinecke
7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
To: James Bottomley
Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
Hannes Reinecke
Add a sysfs queue attribute 'zoned' to display the zone layout
for zoned devices.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
block/blk-sysfs.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 47 insertions(+)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e419f1f..5e2ba53 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -211,6 +211,43 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
return queue_var_show(max_hw_sectors_kb, (page));
}
+#ifdef CONFIG_BLK_DEV_ZONED
+static ssize_t queue_zoned_show(struct request_queue *q, char *page)
+{
+ struct rb_node *node;
+ struct blk_zone *zone;
+ ssize_t offset = 0, end = 0;
+ int size = 0, num = 0;
+ enum blk_zone_type type = BLK_ZONE_TYPE_UNKNOWN;
+
+ for (node = rb_first(&q->zones); node; node = rb_next(node)) {
+ zone = rb_entry(node, struct blk_zone, node);
+ if (zone->type != type ||
+ zone->len != size ||
+ end != zone->start) {
+ if (size != 0)
+ offset += sprintf(page + offset, "%u\n", num);
+ /* We can only store one page ... */
+ if (offset + 42 > PAGE_SIZE) {
+ offset += sprintf(page + offset, "...\n");
+ return offset;
+ }
+ size = zone->len;
+ type = zone->type;
+ offset += sprintf(page + offset, "%llu %u %d ",
+ zone->start, size, type);
+ num = 0;
+ end = zone->start + size;
+ } else
+ end += zone->len;
+ num++;
+ }
+ if (num > 0)
+ offset += sprintf(page + offset, "%u\n", num);
+ return offset > 0 ? offset : -EINVAL;
+}
+#endif
+
#define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \
static ssize_t \
queue_show_##name(struct request_queue *q, char *page) \
@@ -401,6 +438,13 @@ static struct queue_sysfs_entry queue_write_same_max_entry = {
.show = queue_write_same_max_show,
};
+#ifdef CONFIG_BLK_DEV_ZONED
+static struct queue_sysfs_entry queue_zoned_entry = {
+ .attr = {.name = "zoned", .mode = S_IRUGO },
+ .show = queue_zoned_show,
+};
+#endif
+
static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_nonrot,
@@ -451,6 +495,9 @@ static struct attribute *default_attrs[] = {
&queue_discard_zeroes_data_entry.attr,
&queue_write_same_max_entry.attr,
&queue_nonrot_entry.attr,
+#ifdef CONFIG_BLK_DEV_ZONED
+ &queue_zoned_entry.attr,
+#endif
&queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,
&queue_iostats_entry.attr,
--
1.8.5.2
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 7/8] block: Introduce BLKPREP_DONE
2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
` (5 preceding siblings ...)
2015-07-31 13:36 ` [PATCH 6/8] block: Add 'zoned' sysfs queue attribute Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
2015-07-31 13:36 ` [PATCH 8/8] sd: Implement support for ZBC devices Hannes Reinecke
7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
To: James Bottomley
Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
Hannes Reinecke
Add a new blkprep return code BLKPREP_DONE to signal completion
without I/O error.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
block/blk-core.c | 8 ++++++++
drivers/scsi/scsi_lib.c | 3 ++-
include/linux/blkdev.h | 1 +
3 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 82d8ce9..ecdfe1a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2359,6 +2359,14 @@ struct request *blk_peek_request(struct request_queue *q)
*/
blk_start_request(rq);
__blk_end_request_all(rq, -EIO);
+ } else if (ret == BLKPREP_DONE) {
+ rq->cmd_flags |= REQ_QUIET;
+ /*
+ * Mark this request as started so we don't trigger
+ * any debug logic in the end I/O path.
+ */
+ blk_start_request(rq);
+ __blk_end_request_all(rq, 0);
} else {
printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
break;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b1a2631..830a70b 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1342,8 +1342,9 @@ scsi_prep_return(struct request_queue *q, struct request *req, int ret)
struct scsi_device *sdev = q->queuedata;
switch (ret) {
+ case BLKPREP_DONE:
case BLKPREP_KILL:
- req->errors = DID_NO_CONNECT << 16;
+ req->errors = (ret == BLKPREP_KILL) ? DID_NO_CONNECT << 16 : 0;
/* release the command and kill it */
if (req->special) {
struct scsi_cmnd *cmd = req->special;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 746ea82..86d787d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -728,6 +728,7 @@ static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
#define BLKPREP_OK 0 /* serve it */
#define BLKPREP_KILL 1 /* fatal error, kill */
#define BLKPREP_DEFER 2 /* leave on queue */
+#define BLKPREP_DONE 3 /* complete w/o error */
extern unsigned long blk_max_low_pfn, blk_max_pfn;
--
1.8.5.2
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 8/8] sd: Implement support for ZBC devices
2015-07-31 13:36 [RFC PATCH 0/8] ZBC host-managed device support Hannes Reinecke
` (6 preceding siblings ...)
2015-07-31 13:36 ` [PATCH 7/8] block: Introduce BLKPREP_DONE Hannes Reinecke
@ 2015-07-31 13:36 ` Hannes Reinecke
7 siblings, 0 replies; 9+ messages in thread
From: Hannes Reinecke @ 2015-07-31 13:36 UTC (permalink / raw)
To: James Bottomley
Cc: Christoph Hellwig, linux-scsi, Jens Axboe, linux-kernel,
Hannes Reinecke
Implement ZBC support functions to read in the zone information
and setup the zone tree.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/scsi/Kconfig | 8 ++
drivers/scsi/Makefile | 1 +
drivers/scsi/sd.c | 125 ++++++++++++++--
drivers/scsi/sd.h | 34 +++++
drivers/scsi/sd_zbc.c | 390 ++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 547 insertions(+), 11 deletions(-)
create mode 100644 drivers/scsi/sd_zbc.c
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 456e1567..4135448 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -200,6 +200,14 @@ config SCSI_ENCLOSURE
it has an enclosure device. Selecting this option will just allow
certain enclosure conditions to be reported and is not required.
+config SCSI_ZBC
+ bool "SCSI ZBC (zoned block commands) Support"
+ depends on SCSI && BLK_DEV_ZONED
+ help
+ Enable support for ZBC (zoned block commands) devices.
+
+ If unsure say N.
+
config SCSI_CONSTANTS
bool "Verbose SCSI error reporting (kernel size +=75K)"
depends on SCSI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 91209e3..8893305 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -176,6 +176,7 @@ hv_storvsc-y := storvsc_drv.o
sd_mod-objs := sd.o
sd_mod-$(CONFIG_BLK_DEV_INTEGRITY) += sd_dif.o
+sd_mod-$(CONFIG_SCSI_ZBC) += sd_zbc.o
sr_mod-objs := sr.o sr_ioctl.o sr_vendor.o
ncr53c8xx-flags-$(CONFIG_SCSI_ZALON) \
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index f909684..3f20f86 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -91,6 +91,7 @@ MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK15_MAJOR);
MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK);
MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD);
MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC);
+MODULE_ALIAS_SCSI_DEVICE(TYPE_ZBC);
#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
#define SD_MINORS 16
@@ -161,7 +162,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
static const char temp[] = "temporary ";
int len;
- if (sdp->type != TYPE_DISK)
+ if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC)
/* no cache control on RBC devices; theoretically they
* can do it, but there's probably so many exceptions
* it's not worth the risk */
@@ -259,7 +260,7 @@ allow_restart_store(struct device *dev, struct device_attribute *attr,
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- if (sdp->type != TYPE_DISK)
+ if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC)
return -EINVAL;
sdp->allow_restart = simple_strtoul(buf, NULL, 10);
@@ -390,7 +391,7 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr,
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- if (sdkp->zoned == 1) {
+ if (sdkp->zoned == 1 || sdp->type == TYPE_ZBC) {
if (!strncmp(buf, lbp_mode[SD_ZBC_RESET_WP], 20)) {
sd_config_discard(sdkp, SD_ZBC_RESET_WP);
return count;
@@ -464,7 +465,7 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr,
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- if (sdp->type != TYPE_DISK)
+ if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC)
return -EINVAL;
err = kstrtoul(buf, 10, &max);
@@ -713,6 +714,10 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
int ret = 0;
char *buf;
struct page *page = NULL;
+#ifdef CONFIG_SCSI_ZBC
+ struct blk_zone *zone;
+ unsigned long flags;
+#endif
sector >>= ilog2(sdp->sector_size) - 9;
nr_sectors >>= ilog2(sdp->sector_size) - 9;
@@ -762,6 +767,52 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
break;
case SD_ZBC_RESET_WP:
+#ifdef CONFIG_SCSI_ZBC
+ zone = blk_lookup_zone(rq->q, sector);
+ if (!zone) {
+ ret = BLKPREP_KILL;
+ goto out;
+ }
+ spin_lock_irqsave(&zone->lock, flags);
+ if (zone->state == BLK_ZONE_BUSY) {
+ sd_printk(KERN_INFO, sdkp,
+ "Discarding busy zone %llu/%llu\n",
+ zone->start, zone->len);
+ spin_unlock_irqrestore(&zone->lock, flags);
+ ret = BLKPREP_DEFER;
+ goto out;
+ }
+ if (!blk_zone_is_smr(zone)) {
+ sd_printk(KERN_INFO, sdkp,
+ "Discarding %s zone %llu/%llu\n",
+ blk_zone_is_cmr(zone) ? "CMR" : "unknown",
+ zone->start, zone->len);
+ spin_unlock_irqrestore(&zone->lock, flags);
+ ret = BLKPREP_DONE;
+ goto out;
+ }
+ if (blk_zone_is_empty(zone)) {
+ spin_unlock_irqrestore(&zone->lock, flags);
+ ret = BLKPREP_DONE;
+ goto out;
+ }
+ if (zone->start != sector ||
+ zone->len < nr_sectors) {
+ sd_printk(KERN_INFO, sdkp,
+ "Misaligned RESET WP, start %llu/%zu "
+ "len %llu/%u\n",
+ zone->start, sector, zone->len, nr_sectors);
+ spin_unlock_irqrestore(&zone->lock, flags);
+ ret = BLKPREP_KILL;
+ goto out;
+ }
+ /*
+ * Opportunistic setting, needs to be fixed up
+ * if RESET WRITE POINTER fails.
+ */
+ zone->wp = zone->start;
+ spin_unlock_irqrestore(&zone->lock, flags);
+#endif
cmd->cmd_len = 16;
cmd->cmnd[0] = ZBC_OUT;
cmd->cmnd[1] = ZO_RESET_WRITE_POINTER;
@@ -1016,6 +1067,13 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
this_count = this_count >> 3;
}
}
+
+ if (sdkp->zoned || sdp->type == TYPE_ZBC) {
+ ret = sd_zbc_lookup_zone(sdkp, rq, block, this_count);
+ if (ret != BLKPREP_OK)
+ goto out;
+ }
+
if (rq_data_dir(rq) == WRITE) {
SCpnt->cmnd[0] = WRITE_6;
@@ -1693,6 +1751,13 @@ static int sd_done(struct scsi_cmnd *SCpnt)
good_bytes = blk_rq_bytes(req);
scsi_set_resid(SCpnt, 0);
} else {
+#ifdef CONFIG_SCSI_ZBC
+ if (op == ZBC_OUT)
+ /* RESET WRITE POINTER failed */
+ sd_zbc_update_zones(sdkp,
+ blk_rq_pos(req),
+ 512, true);
+#endif
good_bytes = 0;
scsi_set_resid(SCpnt, blk_rq_bytes(req));
}
@@ -1756,6 +1821,26 @@ static int sd_done(struct scsi_cmnd *SCpnt)
}
}
}
+ if (sshdr.asc == 0x21) {
+ /*
+ * ZBC: read beyond the write pointer position.
+ * Clear out error and return the buffer as-is.
+ */
+ if (sshdr.ascq == 0x06) {
+ good_bytes = blk_rq_bytes(req);
+ scsi_set_resid(SCpnt, 0);
+ }
+#ifdef CONFIG_SCSI_ZBC
+ /*
+ * ZBC: Unaligned write command.
+ * Write did not start a write pointer position.
+ */
+ if (sshdr.ascq == 0x04)
+ sd_zbc_update_zones(sdkp,
+ blk_rq_pos(req),
+ 512, true);
+#endif
+ }
break;
default:
break;
@@ -1895,9 +1980,8 @@ sd_spinup_disk(struct scsi_disk *sdkp)
}
}
-static int
-sd_zbc_report_zones(struct scsi_disk *sdkp, sector_t start_lba,
- unsigned char *buffer, int bufflen )
+int sd_zbc_report_zones(struct scsi_disk *sdkp, sector_t start_lba,
+ unsigned char *buffer, int bufflen )
{
struct scsi_device *sdp = sdkp->device;
const int timeout = sdp->request_queue->rq_timeout
@@ -2575,7 +2659,7 @@ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer)
struct scsi_mode_data data;
struct scsi_sense_hdr sshdr;
- if (sdp->type != TYPE_DISK)
+ if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC)
return;
if (sdkp->protection_type == 0)
@@ -2768,14 +2852,18 @@ static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer)
static void sd_read_zones(struct scsi_disk *sdkp, unsigned char *buffer)
{
+ struct scsi_device *sdp = sdkp->device;
int retval;
unsigned char *desc;
u32 rep_len;
u8 same;
u64 zone_len;
- if (sdkp->zoned != 1)
- /* Device managed, no special handling required */
+ if (sdkp->zoned != 1 && sdp->type != TYPE_ZBC)
+ /*
+ * Device managed or normal SCSI disk,
+ * no special handling required
+ */
return;
retval = sd_zbc_report_zones(sdkp, 0, buffer, SD_BUF_SIZE);
@@ -2875,6 +2963,9 @@ static int sd_revalidate_disk(struct gendisk *disk)
sdkp->first_scan = 0;
+ if (sdkp->zoned || sdp->type == TYPE_ZBC)
+ sd_zbc_setup(sdkp, buffer, SD_BUF_SIZE);
+
/*
* We now have all cache related info, determine how we deal
* with flush requests.
@@ -3047,9 +3138,16 @@ static int sd_probe(struct device *dev)
scsi_autopm_get_device(sdp);
error = -ENODEV;
- if (sdp->type != TYPE_DISK && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC)
+ if (sdp->type != TYPE_DISK &&
+ sdp->type != TYPE_ZBC &&
+ sdp->type != TYPE_MOD &&
+ sdp->type != TYPE_RBC)
goto out;
+#ifndef CONFIG_SCSI_ZBC
+ if (sdp->type == TYPE_ZBC)
+ goto out;
+#endif
SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp,
"sd_probe\n"));
@@ -3153,6 +3251,8 @@ static int sd_remove(struct device *dev)
del_gendisk(sdkp->disk);
sd_shutdown(dev);
+ sd_zbc_remove(sdkp);
+
blk_register_region(devt, SD_MINORS, NULL,
sd_default_probe, NULL, NULL);
@@ -3183,6 +3283,9 @@ static void scsi_disk_release(struct device *dev)
spin_unlock(&sd_index_lock);
blk_integrity_unregister(disk);
+#ifdef CONFIG_SCSI_ZBC
+ drain_workqueue(sdkp->zone_work_q);
+#endif
disk->private_data = NULL;
put_disk(disk);
put_device(&sdkp->device->sdev_gendev);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 4edcf54..e911306 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -65,6 +65,10 @@ struct scsi_disk {
struct scsi_device *device;
struct device dev;
struct gendisk *disk;
+#ifdef CONFIG_SCSI_ZBC
+ struct workqueue_struct *zone_work_q;
+ atomic_t zone_reset;
+#endif
atomic_t openers;
sector_t capacity; /* size in 512-byte sectors */
u32 max_xfer_blocks;
@@ -260,4 +264,34 @@ static inline void sd_dif_complete(struct scsi_cmnd *cmd, unsigned int a)
#endif /* CONFIG_BLK_DEV_INTEGRITY */
+#ifdef CONFIG_SCSI_ZBC
+
+extern int sd_zbc_report_zones(struct scsi_disk *sdkp, sector_t start_lba,
+ unsigned char *buffer, int bufflen );
+extern int sd_zbc_setup(struct scsi_disk *, unsigned char *, int);
+extern void sd_zbc_remove(struct scsi_disk *);
+extern void sd_zbc_reset_zones(struct scsi_disk *);
+extern int sd_zbc_lookup_zone(struct scsi_disk *, struct request *,
+ sector_t, unsigned int);
+extern void sd_zbc_update_zones(struct scsi_disk *, sector_t, int, bool);
+extern void sd_zbc_refresh_zone_work(struct work_struct *);
+
+#else /* CONFIG_SCSI_ZBC */
+
+static inline int sd_zbc_setup(struct scsi_disk *sdkp,
+ unsigned char *buf, int buf_len)
+{
+ return 0;
+}
+
+static inline int sd_zbc_lookup_zone(struct scsi_disk *sdkp,
+ struct request *rq, sector_t sector,
+ unsigned int num_sectors)
+{
+ return BLKPREP_OK;
+}
+
+static inline void sd_zbc_remove(struct scsi_disk *sdkp) {}
+#endif /* CONFIG_SCSI_ZBC */
+
#endif /* _SCSI_DISK_H */
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
new file mode 100644
index 0000000..67f18cb
--- /dev/null
+++ b/drivers/scsi/sd_zbc.c
@@ -0,0 +1,390 @@
+/*
+ * sd_zbc.c - SCSI Zoned Block commands
+ *
+ * Copyright (C) 2014-2015 SUSE Linux GmbH
+ * Written by: Hannes Reinecke <hare@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/rbtree.h>
+
+#include <asm/unaligned.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_dbg.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_driver.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_eh.h>
+
+#include "sd.h"
+#include "scsi_priv.h"
+
+enum zbc_zone_cond {
+ ZBC_ZONE_COND_NO_WP,
+ ZBC_ZONE_COND_EMPTY,
+ ZBC_ZONE_COND_IMPLICIT_OPEN,
+ ZBC_ZONE_COND_EXPLICIT_OPEN,
+ ZBC_ZONE_COND_CLOSED,
+ ZBC_ZONE_COND_READONLY = 0xd,
+ ZBC_ZONE_COND_FULL,
+ ZBC_ZONE_COND_OFFLINE,
+};
+
+#define SD_ZBC_BUF_SIZE 131072
+#define SD_ZBC_QUEUE_DELAY 5
+
+#undef SD_ZBC_DEBUG
+
+struct zbc_update_work {
+ struct work_struct zone_work;
+ struct scsi_disk *sdkp;
+ spinlock_t zone_lock;
+ sector_t zone_lba;
+ int zone_buflen;
+ bool zone_update;
+ char zone_buf[0];
+};
+
+struct blk_zone *zbc_desc_to_zone(struct scsi_disk *sdkp, unsigned char *rec)
+{
+ struct blk_zone *zone;
+ enum zbc_zone_cond zone_cond;
+ u64 wp = (u64)-1;
+
+ zone = kzalloc(sizeof(struct blk_zone), GFP_KERNEL);
+ if (!zone)
+ return NULL;
+
+ spin_lock_init(&zone->lock);
+ zone->type = rec[0] & 0xf;
+ zone_cond = (rec[1] >> 4) & 0xf;
+ zone->len = get_unaligned_be64(&rec[8]);
+ zone->start = get_unaligned_be64(&rec[16]);
+
+ if (blk_zone_is_smr(zone)) {
+ wp = get_unaligned_be64(&rec[24]);
+ if (zone_cond == ZBC_ZONE_COND_READONLY) {
+ zone->state = BLK_ZONE_READONLY;
+ } else if (zone_cond == ZBC_ZONE_COND_OFFLINE) {
+ zone->state = BLK_ZONE_OFFLINE;
+ } else {
+ zone->state = BLK_ZONE_OPEN;
+ }
+ } else
+ zone->state = BLK_ZONE_NO_WP;
+
+ zone->wp = wp;
+ /*
+ * Fixup block zone state
+ */
+ if (zone_cond == ZBC_ZONE_COND_EMPTY &&
+ zone->wp != zone->start) {
+#ifdef SD_ZBC_DEBUG
+ sd_printk(KERN_INFO, sdkp,
+ "zone %llu state EMPTY wp %llu: adjust wp\n",
+ zone->start, zone->wp);
+#endif
+ zone->wp = zone->start;
+ }
+ if (zone_cond == ZBC_ZONE_COND_FULL &&
+ zone->wp != zone->start + zone->len) {
+#ifdef SD_ZBC_DEBUG
+ sd_printk(KERN_INFO, sdkp,
+ "zone %llu state FULL wp %llu: adjust wp\n",
+ zone->start, zone->wp);
+#endif
+ zone->wp = zone->start + zone->len;
+ }
+
+ return zone;
+}
+
+sector_t zbc_parse_zones(struct scsi_disk *sdkp, unsigned char *buf,
+ unsigned int buf_len, sector_t start_lba)
+{
+ struct request_queue *q = sdkp->disk->queue;
+ unsigned char *rec = buf;
+ int rec_no = 0;
+ unsigned int list_length;
+ sector_t next_lba = -1;
+ u8 same;
+
+ /* Parse REPORT ZONES header */
+ list_length = get_unaligned_be32(&buf[0]);
+ same = buf[4] & 0xf;
+ rec = buf + 64;
+ list_length += 64;
+
+ if (list_length < buf_len)
+ buf_len = list_length;
+
+ while (rec < buf + buf_len) {
+ struct blk_zone *this, *old;
+ unsigned long flags;
+
+ this = zbc_desc_to_zone(sdkp, rec);
+ if (!this)
+ break;
+
+ next_lba = this->start + this->len;
+ old = blk_insert_zone(q, this);
+ if (old) {
+ spin_lock_irqsave(&old->lock, flags);
+ if (blk_zone_is_smr(old)) {
+ old->wp = this->wp;
+ old->state = this->state;
+ }
+ spin_unlock_irqrestore(&old->lock, flags);
+ kfree(this);
+ }
+ rec += 64;
+ rec_no++;
+ }
+
+#ifdef SD_ZBC_DEBUG
+ sd_printk(KERN_INFO, sdkp,
+ "Inserted %d zones, next lba %zu len %d\n",
+ rec_no, next_lba, list_length);
+#endif
+ return next_lba;
+}
+
+void sd_zbc_refresh_zone_work(struct work_struct *work)
+{
+ struct zbc_update_work *zbc_work =
+ container_of(work, struct zbc_update_work, zone_work);
+ struct request_queue *q = zbc_work->sdkp->disk->queue;
+ unsigned long flags;
+ unsigned int zone_buflen;
+ int ret;
+ sector_t last_lba;
+
+ zone_buflen = zbc_work->zone_buflen;
+ ret = sd_zbc_report_zones(zbc_work->sdkp, zbc_work->zone_lba,
+ zbc_work->zone_buf, zone_buflen);
+ if (ret)
+ goto done_free;
+
+ last_lba = zbc_parse_zones(zbc_work->sdkp, zbc_work->zone_buf,
+ zone_buflen, zbc_work->zone_lba);
+ if (last_lba != -1 && last_lba < zbc_work->sdkp->capacity &&
+ !zbc_work->zone_update) {
+ if (atomic_read(&zbc_work->sdkp->zone_reset)) {
+ sd_printk(KERN_INFO, zbc_work->sdkp,
+ "zones in reset, cancelling refresh\n");
+ goto done_free;
+ }
+
+ zbc_work->zone_lba = last_lba;
+ queue_work(zbc_work->sdkp->zone_work_q, &zbc_work->zone_work);
+ /* Kick request queue to be on the safe side */
+ goto done_start_queue;
+ }
+done_free:
+ kfree(zbc_work);
+done_start_queue:
+ spin_lock_irqsave(q->queue_lock, flags);
+ blk_start_queue(q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+void sd_zbc_update_zones(struct scsi_disk *sdkp, sector_t lba, int bufsize,
+ bool update)
+{
+ struct request_queue *q = sdkp->disk->queue;
+ struct zbc_update_work *zbc_work;
+ struct blk_zone *zone;
+ struct rb_node *node;
+ int zone_num = 0, zone_busy = 0, num_rec;
+ sector_t next_lba = lba;
+
+ if (atomic_read(&sdkp->zone_reset)) {
+ sd_printk(KERN_INFO, sdkp,
+ "zones in reset, not starting update\n");
+ return;
+ }
+
+retry:
+ zbc_work = kzalloc(sizeof(struct zbc_update_work) + bufsize,
+ GFP_KERNEL);
+ if (!zbc_work) {
+ if (bufsize > 512) {
+ sd_printk(KERN_INFO, sdkp,
+ "retry with buffer size %d\n", bufsize);
+ bufsize = bufsize >> 1;
+ goto retry;
+ }
+ sd_printk(KERN_INFO, sdkp,
+ "failed to allocate %d bytes\n", bufsize);
+ return;
+ }
+ zbc_work->zone_lba = lba;
+ zbc_work->zone_buflen = bufsize;
+ zbc_work->zone_update = update;
+ zbc_work->sdkp = sdkp;
+ INIT_WORK(&zbc_work->zone_work, sd_zbc_refresh_zone_work);
+ num_rec = (bufsize / 64) - 1;
+
+ for (node = rb_first(&q->zones); node; node = rb_next(node)) {
+ unsigned long flags;
+
+ zone = rb_entry(node, struct blk_zone, node);
+ if (update) {
+ if (num_rec == 0)
+ break;
+ if (zone->start != next_lba)
+ continue;
+ next_lba += zone->len;
+ num_rec--;
+ }
+ spin_lock_irqsave(&zone->lock, flags);
+ if (blk_zone_is_smr(zone)) {
+ if (zone->state == BLK_ZONE_BUSY) {
+ zone_busy++;
+ } else {
+ zone->state = BLK_ZONE_BUSY;
+ zone->wp = zone->start;
+ }
+ zone_num++;
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+ }
+ if (zone_num && (zone_num == zone_busy)) {
+ sd_printk(KERN_INFO, sdkp,
+ "zone %s for %zu in progress\n",
+ update ? "update" : "refresh", lba);
+ kfree(zbc_work);
+ return;
+ }
+
+ if (atomic_read(&sdkp->zone_reset)) {
+ sd_printk(KERN_INFO, sdkp,
+ "zones in reset, not starting update\n");
+ kfree(zbc_work);
+ return;
+ }
+ if (!queue_work(sdkp->zone_work_q, &zbc_work->zone_work)) {
+ sd_printk(KERN_INFO, sdkp,
+ "zone update already queued?\n");
+ kfree(zbc_work);
+ }
+}
+
+int sd_zbc_lookup_zone(struct scsi_disk *sdkp, struct request *rq,
+ sector_t sector, unsigned int num_sectors)
+{
+ struct request_queue *q = sdkp->disk->queue;
+ struct blk_zone *zone = NULL;
+ int ret = BLKPREP_OK;
+ unsigned long flags;
+
+ zone = blk_lookup_zone(q, sector);
+ /* Might happen during zone initialization */
+ if (!zone) {
+ if (printk_ratelimit())
+ sd_printk(KERN_INFO, sdkp,
+ "zone for sector %zu not found, %s\n",
+ sector, sdkp->device->type == TYPE_ZBC ?
+ "deferring" : "skipping");
+ if (sdkp->device->type != TYPE_ZBC)
+ return BLKPREP_OK;
+ blk_delay_queue(q, 5);
+ return BLKPREP_DEFER;
+ }
+ spin_lock_irqsave(&zone->lock, flags);
+ if (zone->state == BLK_ZONE_UNKNOWN ||
+ zone->state == BLK_ZONE_BUSY) {
+ if (printk_ratelimit())
+ sd_printk(KERN_INFO, sdkp,
+ "zone %llu state %x, deferring\n",
+ zone->start, zone->state);
+ blk_delay_queue(q, 5);
+ ret = BLKPREP_DEFER;
+ } else {
+ if (rq_data_dir(rq) == WRITE) {
+ if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
+ goto out;
+ if (blk_zone_is_full(zone)) {
+#ifdef SD_ZBC_DEBUG
+ sd_printk(KERN_ERR, sdkp,
+ "Write to full zone %zu/%llu\n",
+ sector, zone->wp);
+#endif
+ ret = BLKPREP_KILL;
+ goto out;
+ }
+ if (zone->wp != sector) {
+#ifdef SD_ZBC_DEBUG
+ sd_printk(KERN_ERR, sdkp,
+ "Misaligned write %zu/%llu\n",
+ sector, zone->wp);
+#endif
+ ret = BLKPREP_KILL;
+ goto out;
+ }
+ zone->wp += num_sectors;
+ } else if (blk_zone_is_smr(zone) && (zone->wp <= sector)) {
+#ifdef SD_ZBC_DEBUG
+ sd_printk(KERN_INFO, sdkp,
+ "Read beyond wp %zu/%llu\n",
+ sector, zone->wp);
+#endif
+ ret = BLKPREP_DONE;
+ }
+ }
+out:
+ spin_unlock_irqrestore(&zone->lock, flags);
+
+ return ret;
+}
+
+int sd_zbc_setup(struct scsi_disk *sdkp, unsigned char *buf, int buf_len)
+{
+ if (!sdkp->zone_work_q) {
+ char wq_name[32];
+
+ sprintf(wq_name, "zbc_wq_%s", sdkp->disk->disk_name);
+ sdkp->zone_work_q = create_singlethread_workqueue(wq_name);
+ if (!sdkp->zone_work_q) {
+ sdev_printk(KERN_WARNING, sdkp->device,
+ "create zoned disk workqueue failed\n");
+ return -ENOMEM;
+ }
+ atomic_set(&sdkp->zone_reset, 0);
+ } else {
+ atomic_inc(&sdkp->zone_reset);
+ drain_workqueue(sdkp->zone_work_q);
+ atomic_set(&sdkp->zone_reset, 0);
+ }
+
+ sd_zbc_update_zones(sdkp, 0, SD_ZBC_BUF_SIZE, false);
+
+ blk_queue_io_min(sdkp->disk->queue, 4);
+ return 0;
+}
+
+void sd_zbc_remove(struct scsi_disk *sdkp)
+{
+ if (sdkp->zone_work_q) {
+ atomic_inc(&sdkp->zone_reset);
+ drain_workqueue(sdkp->zone_work_q);
+ destroy_workqueue(sdkp->zone_work_q);
+ }
+}
--
1.8.5.2
^ permalink raw reply related [flat|nested] 9+ messages in thread