* [PATCH 1/2] block: have drivers use blk_queue_max_discard_sectors()
2015-07-14 15:02 [PATCH 0/2] Configurable max discard size Jens Axboe
@ 2015-07-14 15:02 ` Jens Axboe
2015-07-14 15:02 ` [PATCH 2/2] block: make /sys/block/<dev>/queue/discard_max_bytes writeable Jens Axboe
2015-07-14 16:01 ` [PATCH 0/2] Configurable max discard size Christoph Hellwig
2 siblings, 0 replies; 9+ messages in thread
From: Jens Axboe @ 2015-07-14 15:02 UTC (permalink / raw)
To: linux-kernel; +Cc: snitzer, hch, Jens Axboe
Some drivers use it now, others just set the limits field manually.
But in preparation for splitting this into a hard and soft limit,
ensure that they all call the proper function for setting the hw
limit for discards.
Signed-off-by: Jens Axboe <axboe@fb.com>
---
drivers/block/brd.c | 2 +-
drivers/block/drbd/drbd_nl.c | 4 ++--
drivers/block/loop.c | 4 ++--
drivers/block/nbd.c | 2 +-
drivers/block/nvme-core.c | 2 +-
drivers/block/rbd.c | 2 +-
drivers/block/skd_main.c | 2 +-
drivers/block/zram/zram_drv.c | 2 +-
drivers/md/bcache/super.c | 2 +-
drivers/mmc/card/queue.c | 2 +-
drivers/mtd/mtd_blkdevs.c | 2 +-
drivers/scsi/sd.c | 4 ++--
12 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 64ab4951e9d6..e573e470bd8a 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -500,7 +500,7 @@ static struct brd_device *brd_alloc(int i)
blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE);
brd->brd_queue->limits.discard_granularity = PAGE_SIZE;
- brd->brd_queue->limits.max_discard_sectors = UINT_MAX;
+ blk_queue_max_discard_sectors(brd->brd_queue, UINT_MAX);
brd->brd_queue->limits.discard_zeroes_data = 1;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 74df8cfad414..e80cbefbc2b5 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1156,14 +1156,14 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
/* For now, don't allow more than one activity log extent worth of data
* to be discarded in one go. We may need to rework drbd_al_begin_io()
* to allow for even larger discard ranges */
- q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS;
+ blk_queue_max_discard_sectors(q, DRBD_MAX_DISCARD_SECTORS);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
/* REALLY? Is stacking secdiscard "legal"? */
if (blk_queue_secdiscard(b))
queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
} else {
- q->limits.max_discard_sectors = 0;
+ blk_queue_max_discard_sectors(q, 0);
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q);
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f7a4c9d7f721..f9889b6bc02c 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -675,7 +675,7 @@ static void loop_config_discard(struct loop_device *lo)
lo->lo_encrypt_key_size) {
q->limits.discard_granularity = 0;
q->limits.discard_alignment = 0;
- q->limits.max_discard_sectors = 0;
+ blk_queue_max_discard_sectors(q, 0);
q->limits.discard_zeroes_data = 0;
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
return;
@@ -683,7 +683,7 @@ static void loop_config_discard(struct loop_device *lo)
q->limits.discard_granularity = inode->i_sb->s_blocksize;
q->limits.discard_alignment = 0;
- q->limits.max_discard_sectors = UINT_MAX >> 9;
+ blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
q->limits.discard_zeroes_data = 1;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
}
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 0e385d8e9b86..f169faf9838a 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -822,7 +822,7 @@ static int __init nbd_init(void)
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
disk->queue->limits.discard_granularity = 512;
- disk->queue->limits.max_discard_sectors = UINT_MAX;
+ blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
disk->queue->limits.discard_zeroes_data = 0;
blk_queue_max_hw_sectors(disk->queue, 65536);
disk->queue->limits.max_sectors = 256;
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index d1d6141920d3..b1eb9d321071 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1935,7 +1935,7 @@ static void nvme_config_discard(struct nvme_ns *ns)
ns->queue->limits.discard_zeroes_data = 0;
ns->queue->limits.discard_alignment = logical_block_size;
ns->queue->limits.discard_granularity = logical_block_size;
- ns->queue->limits.max_discard_sectors = 0xffffffff;
+ blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index d94529d5c8e9..dcc86937f55c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3803,7 +3803,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
q->limits.discard_granularity = segment_size;
q->limits.discard_alignment = segment_size;
- q->limits.max_discard_sectors = segment_size / SECTOR_SIZE;
+ blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
q->limits.discard_zeroes_data = 1;
blk_queue_merge_bvec(q, rbd_merge_bvec);
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 1e46eb2305c0..586f9168ffa4 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -4422,7 +4422,7 @@ static int skd_cons_disk(struct skd_device *skdev)
/* DISCARD Flag initialization. */
q->limits.discard_granularity = 8192;
q->limits.discard_alignment = 0;
- q->limits.max_discard_sectors = UINT_MAX >> 9;
+ blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
q->limits.discard_zeroes_data = 1;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index fb655e8d1e3b..f439ad2800da 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1244,7 +1244,7 @@ static int zram_add(void)
blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
- zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
+ blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
/*
* zram_bio_discard() will clear all logical blocks if logical block
* size is identical with physical block size(PAGE_SIZE). But if it is
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 94980bfca434..fc8e545ced18 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -830,7 +830,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
q->limits.max_sectors = UINT_MAX;
q->limits.max_segment_size = UINT_MAX;
q->limits.max_segments = BIO_MAX_PAGES;
- q->limits.max_discard_sectors = UINT_MAX;
+ blk_queue_max_discard_sectors(q, UINT_MAX);
q->limits.discard_granularity = 512;
q->limits.io_min = block_size;
q->limits.logical_block_size = block_size;
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index b5a2b145d89f..5daf302835b1 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -165,7 +165,7 @@ static void mmc_queue_setup_discard(struct request_queue *q,
return;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
- q->limits.max_discard_sectors = max_discard;
+ blk_queue_max_discard_sectors(q, max_discard);
if (card->erased_byte == 0 && !mmc_can_discard(card))
q->limits.discard_zeroes_data = 1;
q->limits.discard_granularity = card->pref_erase << 9;
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 41acc507b22e..1b96cf771d2b 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -423,7 +423,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
if (tr->discard) {
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, new->rq);
- new->rq->limits.max_discard_sectors = UINT_MAX;
+ blk_queue_max_discard_sectors(new->rq, UINT_MAX);
}
gd->queue = new->rq;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3b2fcb4fada0..160e44e7b24a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -647,7 +647,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
switch (mode) {
case SD_LBP_DISABLE:
- q->limits.max_discard_sectors = 0;
+ blk_queue_max_discard_sectors(q, 0);
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
return;
@@ -675,7 +675,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
break;
}
- q->limits.max_discard_sectors = max_blocks * (logical_block_size >> 9);
+ blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9));
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
}
--
2.4.1.168.g1ea28e1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 2/2] block: make /sys/block/<dev>/queue/discard_max_bytes writeable
2015-07-14 15:02 [PATCH 0/2] Configurable max discard size Jens Axboe
2015-07-14 15:02 ` [PATCH 1/2] block: have drivers use blk_queue_max_discard_sectors() Jens Axboe
@ 2015-07-14 15:02 ` Jens Axboe
2015-07-14 15:23 ` Mike Snitzer
2015-07-14 16:01 ` [PATCH 0/2] Configurable max discard size Christoph Hellwig
2 siblings, 1 reply; 9+ messages in thread
From: Jens Axboe @ 2015-07-14 15:02 UTC (permalink / raw)
To: linux-kernel; +Cc: snitzer, hch, Jens Axboe
Lots of devices support huge discard sizes these days. Depending
on how the device handles them internally, huge discards can
introduce massive latencies (hundreds of msec) on the device side.
We have a sysfs file, discard_max_bytes, that advertises the max
hardware supported discard size. Make this writeable, and split
the settings into a soft and hard limit. This can be set from
'discard_granularity' and up to the hardware limit.
Signed-off-by: Jens Axboe <axboe@fb.com>
---
Documentation/block/queue-sysfs.txt | 4 +++-
block/blk-settings.c | 4 ++++
block/blk-sysfs.c | 26 +++++++++++++++++++++++++-
include/linux/blkdev.h | 1 +
4 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index 3a29f8914df9..3748cf827131 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -20,7 +20,7 @@ This shows the size of internal allocation of the device in bytes, if
reported by the device. A value of '0' means device does not support
the discard functionality.
-discard_max_bytes (RO)
+discard_max_bytes (RW)
----------------------
Devices that support discard functionality may have internal limits on
the number of bytes that can be trimmed or unmapped in a single operation.
@@ -28,6 +28,8 @@ The discard_max_bytes parameter is set by the device driver to the maximum
number of bytes that can be discarded in a single operation. Discard
requests issued to the device must not exceed this limit. A discard_max_bytes
value of 0 means that the device does not support discard functionality.
+Writing a lower value to this file can limit the maximum discard size issued
+to the device, which can help latencies.
discard_zeroes_data (RO)
------------------------
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 12600bfffca9..b38d8d723276 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -116,6 +116,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->chunk_sectors = 0;
lim->max_write_same_sectors = 0;
lim->max_discard_sectors = 0;
+ lim->max_hw_discard_sectors = 0;
lim->discard_granularity = 0;
lim->discard_alignment = 0;
lim->discard_misaligned = 0;
@@ -303,6 +304,7 @@ EXPORT_SYMBOL(blk_queue_chunk_sectors);
void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors)
{
+ q->limits.max_hw_discard_sectors = max_discard_sectors;
q->limits.max_discard_sectors = max_discard_sectors;
}
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
@@ -641,6 +643,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
b->max_discard_sectors);
+ t->max_hw_discard_sectors = min_not_zero(t->max_hw_discard_sectors,
+ b->max_hw_discard_sectors);
t->discard_granularity = max(t->discard_granularity,
b->discard_granularity);
t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) %
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 6264b382d4d1..3d1dba600228 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -151,6 +151,29 @@ static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
(unsigned long long)q->limits.max_discard_sectors << 9);
}
+static ssize_t queue_discard_max_store(struct request_queue *q,
+ const char *page, size_t count)
+{
+ unsigned long max_discard;
+ ssize_t ret = queue_var_store(&max_discard, page, count);
+
+ if (ret < 0)
+ return ret;
+
+ if (max_discard & (q->limits.discard_granularity - 1))
+ return -EINVAL;
+
+ max_discard >>= 9;
+ if (max_discard > UINT_MAX)
+ return -EINVAL;
+
+ if (max_discard > q->limits.max_hw_discard_sectors)
+ max_discard = q->limits.max_hw_discard_sectors;
+
+ q->limits.max_discard_sectors = max_discard;
+ return ret;
+}
+
static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
{
return queue_var_show(queue_discard_zeroes_data(q), page);
@@ -361,8 +384,9 @@ static struct queue_sysfs_entry queue_discard_granularity_entry = {
};
static struct queue_sysfs_entry queue_discard_max_entry = {
- .attr = {.name = "discard_max_bytes", .mode = S_IRUGO },
+ .attr = {.name = "discard_max_bytes", .mode = S_IRUGO | S_IWUSR },
.show = queue_discard_max_show,
+ .store = queue_discard_max_store,
};
static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d4068c17d0df..243f29e779ec 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -268,6 +268,7 @@ struct queue_limits {
unsigned int io_min;
unsigned int io_opt;
unsigned int max_discard_sectors;
+ unsigned int max_hw_discard_sectors;
unsigned int max_write_same_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;
--
2.4.1.168.g1ea28e1
^ permalink raw reply related [flat|nested] 9+ messages in thread