linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Bart Van Assche <bvanassche@acm.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, linux-scsi@vger.kernel.org,
	Jaegeuk Kim <jaegeuk@kernel.org>,
	Avri Altman <avri.altman@wdc.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Christoph Hellwig <hch@lst.de>, Ming Lei <ming.lei@redhat.com>,
	Bart Van Assche <bvanassche@acm.org>,
	Keith Busch <kbusch@kernel.org>
Subject: [PATCH v4 2/7] block: Support configuring limits below the page size
Date: Mon, 30 Jan 2023 13:26:51 -0800	[thread overview]
Message-ID: <20230130212656.876311-3-bvanassche@acm.org> (raw)
In-Reply-To: <20230130212656.876311-1-bvanassche@acm.org>

Allow block drivers to configure the following:
* Maximum number of hardware sectors values smaller than
  PAGE_SIZE >> SECTOR_SHIFT. With PAGE_SIZE = 4096 this means that values
  below 8 are supported.
* A maximum segment size below the page size. This is most useful
  for page sizes above 4096 bytes.

The blk_sub_page_segments static branch will be used in later patches to
prevent that performance of block drivers that support segments >=
PAGE_SIZE and max_hw_sectors >= PAGE_SIZE >> SECTOR_SHIFT would be affected.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Keith Busch <kbusch@kernel.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/blk-core.c       |  1 +
 block/blk-mq-debugfs.c |  5 +++
 block/blk-settings.c   | 82 +++++++++++++++++++++++++++++++++++++-----
 block/blk.h            | 10 ++++++
 include/linux/blkdev.h |  2 ++
 5 files changed, 92 insertions(+), 8 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 0dacc2df9588..b193040c7c73 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -270,6 +270,7 @@ static void blk_free_queue(struct request_queue *q)
 
 	blk_free_queue_stats(q->stats);
 	kfree(q->poll_stat);
+	blk_disable_sub_page_limits(&q->limits);
 
 	if (queue_is_mq(q))
 		blk_mq_release(q);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 60d1de0ce624..4f06e02961f3 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -875,7 +875,12 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx)
 	hctx->sched_debugfs_dir = NULL;
 }
 
+DEFINE_DEBUGFS_ATTRIBUTE(blk_sub_page_limit_queues_fops,
+			blk_sub_page_limit_queues_get, NULL, "%llu\n");
+
 void blk_mq_debugfs_init(void)
 {
 	blk_debugfs_root = debugfs_create_dir("block", NULL);
+	debugfs_create_file("sub_page_limit_queues", 0400, blk_debugfs_root,
+			    NULL, &blk_sub_page_limit_queues_fops);
 }
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 9c9713c9269c..46d43cef8377 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -18,6 +18,11 @@
 #include "blk.h"
 #include "blk-wbt.h"
 
+/* Protects blk_nr_sub_page_limit_queues and blk_sub_page_limits changes. */
+static DEFINE_MUTEX(blk_sub_page_limit_lock);
+static uint32_t blk_nr_sub_page_limit_queues;
+DEFINE_STATIC_KEY_FALSE(blk_sub_page_limits);
+
 void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
 {
 	q->rq_timeout = timeout;
@@ -58,6 +63,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->zoned = BLK_ZONED_NONE;
 	lim->zone_write_granularity = 0;
 	lim->dma_alignment = 511;
+	lim->sub_page_limits = false;
 }
 
 /**
@@ -100,6 +106,55 @@ void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce bounce)
 }
 EXPORT_SYMBOL(blk_queue_bounce_limit);
 
+/* For debugfs. */
+int blk_sub_page_limit_queues_get(void *data, u64 *val)
+{
+	*val = READ_ONCE(blk_nr_sub_page_limit_queues);
+
+	return 0;
+}
+
+/**
+ * blk_enable_sub_page_limits - enable support for max_segment_size values smaller than PAGE_SIZE and for max_hw_sectors values below PAGE_SIZE >> SECTOR_SHIFT
+ * @lim: request queue limits for which to enable support of these features.
+ *
+ * Support for these features is not enabled all the time because of the
+ * runtime overhead of these features.
+ */
+static void blk_enable_sub_page_limits(struct queue_limits *lim)
+{
+	if (lim->sub_page_limits)
+		return;
+
+	lim->sub_page_limits = true;
+
+	mutex_lock(&blk_sub_page_limit_lock);
+	if (++blk_nr_sub_page_limit_queues == 1)
+		static_branch_enable(&blk_sub_page_limits);
+	mutex_unlock(&blk_sub_page_limit_lock);
+}
+
+/**
+ * blk_disable_sub_page_limits - disable support for max_segment_size values smaller than PAGE_SIZE and for max_hw_sectors values below PAGE_SIZE >> SECTOR_SHIFT
+ * @lim: request queue limits for which to enable support of these features.
+ *
+ * Support for these features is not enabled all the time because of the
+ * runtime overhead of these features.
+ */
+void blk_disable_sub_page_limits(struct queue_limits *lim)
+{
+	if (!lim->sub_page_limits)
+		return;
+
+	lim->sub_page_limits = false;
+
+	mutex_lock(&blk_sub_page_limit_lock);
+	WARN_ON_ONCE(blk_nr_sub_page_limit_queues <= 0);
+	if (--blk_nr_sub_page_limit_queues == 0)
+		static_branch_disable(&blk_sub_page_limits);
+	mutex_unlock(&blk_sub_page_limit_lock);
+}
+
 /**
  * blk_queue_max_hw_sectors - set max sectors for a request for this queue
  * @q:  the request queue for the device
@@ -122,12 +177,17 @@ EXPORT_SYMBOL(blk_queue_bounce_limit);
 void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
 {
 	struct queue_limits *limits = &q->limits;
+	unsigned int min_max_hw_sectors = PAGE_SIZE >> SECTOR_SHIFT;
 	unsigned int max_sectors;
 
-	if ((max_hw_sectors << 9) < PAGE_SIZE) {
-		max_hw_sectors = 1 << (PAGE_SHIFT - 9);
-		printk(KERN_INFO "%s: set to minimum %d\n",
-		       __func__, max_hw_sectors);
+	if (max_hw_sectors < min_max_hw_sectors) {
+		blk_enable_sub_page_limits(limits);
+		min_max_hw_sectors = 1;
+	}
+
+	if (max_hw_sectors < min_max_hw_sectors) {
+		max_hw_sectors = min_max_hw_sectors;
+		pr_info("%s: set to minimum %u\n", __func__, max_hw_sectors);
 	}
 
 	max_hw_sectors = round_down(max_hw_sectors,
@@ -282,10 +342,16 @@ EXPORT_SYMBOL_GPL(blk_queue_max_discard_segments);
  **/
 void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
 {
-	if (max_size < PAGE_SIZE) {
-		max_size = PAGE_SIZE;
-		printk(KERN_INFO "%s: set to minimum %d\n",
-		       __func__, max_size);
+	unsigned int min_max_segment_size = PAGE_SIZE;
+
+	if (max_size < min_max_segment_size) {
+		blk_enable_sub_page_limits(&q->limits);
+		min_max_segment_size = SECTOR_SIZE;
+	}
+
+	if (max_size < min_max_segment_size) {
+		max_size = min_max_segment_size;
+		pr_info("%s: set to minimum %u\n", __func__, max_size);
 	}
 
 	/* see blk_queue_virt_boundary() for the explanation */
diff --git a/block/blk.h b/block/blk.h
index 4c3b3325219a..9a56d7002efc 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -13,6 +13,7 @@ struct elevator_type;
 #define BLK_MAX_TIMEOUT		(5 * HZ)
 
 extern struct dentry *blk_debugfs_root;
+DECLARE_STATIC_KEY_FALSE(blk_sub_page_limits);
 
 struct blk_flush_queue {
 	unsigned int		flush_pending_idx:1;
@@ -32,6 +33,15 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
 					      gfp_t flags);
 void blk_free_flush_queue(struct blk_flush_queue *q);
 
+static inline bool blk_queue_sub_page_limits(const struct queue_limits *lim)
+{
+	return static_branch_unlikely(&blk_sub_page_limits) &&
+		lim->sub_page_limits;
+}
+
+int blk_sub_page_limit_queues_get(void *data, u64 *val);
+void blk_disable_sub_page_limits(struct queue_limits *q);
+
 void blk_freeze_queue(struct request_queue *q);
 void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
 void blk_queue_start_drain(struct request_queue *q);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b9637d63e6f0..af04bf241714 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -319,6 +319,8 @@ struct queue_limits {
 	 * due to possible offsets.
 	 */
 	unsigned int		dma_alignment;
+
+	bool			sub_page_limits;
 };
 
 typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,

  parent reply	other threads:[~2023-01-30 21:27 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-30 21:26 [PATCH v4 0/7] Add support for limits below the page size Bart Van Assche
2023-01-30 21:26 ` [PATCH v4 1/7] block: Introduce blk_mq_debugfs_init() Bart Van Assche
2023-02-01 20:58   ` Luis Chamberlain
2023-02-01 21:23     ` Luis Chamberlain
2023-02-01 22:01       ` Bart Van Assche
2023-02-01 23:59         ` Luis Chamberlain
2023-02-02  1:06           ` Bart Van Assche
2023-02-06 16:03             ` Luis Chamberlain
2023-02-06 21:11               ` Bart Van Assche
2023-01-30 21:26 ` Bart Van Assche [this message]
2023-02-01 23:50   ` [PATCH v4 2/7] block: Support configuring limits below the page size Luis Chamberlain
2023-02-07  0:02     ` Bart Van Assche
2023-02-07  0:19       ` Luis Chamberlain
2023-02-07  0:31         ` Bart Van Assche
2023-02-07  2:08           ` Luis Chamberlain
2023-02-07 18:26             ` Bart Van Assche
2023-01-30 21:26 ` [PATCH v4 3/7] block: Support submitting passthrough requests with small segments Bart Van Assche
2023-01-30 21:26 ` [PATCH v4 4/7] block: Add support for filesystem requests and " Bart Van Assche
2023-01-30 21:26 ` [PATCH v4 5/7] block: Add support for small segments in blk_rq_map_user_iov() Bart Van Assche
2023-01-30 21:26 ` [PATCH v4 6/7] scsi_debug: Support configuring the maximum segment size Bart Van Assche
2023-01-30 21:26 ` [PATCH v4 7/7] null_blk: " Bart Van Assche

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230130212656.876311-3-bvanassche@acm.org \
    --to=bvanassche@acm.org \
    --cc=adrian.hunter@intel.com \
    --cc=avri.altman@wdc.com \
    --cc=axboe@kernel.dk \
    --cc=hch@lst.de \
    --cc=jaegeuk@kernel.org \
    --cc=kbusch@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=ming.lei@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).