All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nilay Shroff <nilay@linux.ibm.com>
To: linux-block@vger.kernel.org
Cc: hch@lst.de, ming.lei@redhat.com, dlemoal@kernel.org,
	axboe@kernel.dk, gjoyce@ibm.com
Subject: [PATCHv2 1/6] blk-sysfs: remove q->sysfs_lock for attributes which don't need it
Date: Tue, 18 Feb 2025 13:58:54 +0530	[thread overview]
Message-ID: <20250218082908.265283-2-nilay@linux.ibm.com> (raw)
In-Reply-To: <20250218082908.265283-1-nilay@linux.ibm.com>

There're few sysfs attributes in block layer which don't really need
acquiring q->sysfs_lock while accessing it. The reason being, writing
a value to such attributes are either atomic or could be easily
protected using WRITE_ONCE()/READ_ONCE(). Moreover, sysfs attributes
are inherently protected with sysfs/kernfs internal locking.

So this change help segregate all existing sysfs attributes for which 
we could avoid acquiring q->sysfs_lock. We group all such attributes,
which don't require any sorts of locking, using macro QUEUE_RO_ENTRY_
NOLOCK() or QUEUE_RW_ENTRY_NOLOCK(). The newly introduced show/store 
method (show_nolock/store_nolock) is assigned to attributes using these 
new macros. The show_nolock/store_nolock run without holding q->sysfs_
lock.

Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
 block/blk-settings.c |   2 +-
 block/blk-sysfs.c    | 106 ++++++++++++++++++++++++++++++++-----------
 2 files changed, 81 insertions(+), 27 deletions(-)

diff --git a/block/blk-settings.c b/block/blk-settings.c
index c44dadc35e1e..c541bf22f543 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -21,7 +21,7 @@
 
 void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
 {
-	q->rq_timeout = timeout;
+	WRITE_ONCE(q->rq_timeout, timeout);
 }
 EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 6f548a4376aa..0c9be7c7ecc1 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -23,9 +23,14 @@
 struct queue_sysfs_entry {
 	struct attribute attr;
 	ssize_t (*show)(struct gendisk *disk, char *page);
+	ssize_t (*show_nolock)(struct gendisk *disk, char *page);
+
 	ssize_t (*store)(struct gendisk *disk, const char *page, size_t count);
+	ssize_t (*store_nolock)(struct gendisk *disk,
+			const char *page, size_t count);
 	int (*store_limit)(struct gendisk *disk, const char *page,
 			size_t count, struct queue_limits *lim);
+
 	void (*load_module)(struct gendisk *disk, const char *page, size_t count);
 };
 
@@ -320,7 +325,12 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
 	ret = queue_var_store(&val, page, count);
 	if (ret < 0)
 		return ret;
-
+	/*
+	 * Here we update two queue flags each using atomic bitops, although
+	 * updating two flags isn't atomic it should be harmless as those flags
+	 * are accessed individually using atomic test_bit operation. So we
+	 * don't grab any lock while updating these flags.
+	 */
 	if (val == 2) {
 		blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
 		blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
@@ -353,7 +363,8 @@ static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
 
 static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
 {
-	return sysfs_emit(page, "%u\n", jiffies_to_msecs(disk->queue->rq_timeout));
+	return sysfs_emit(page, "%u\n",
+			jiffies_to_msecs(READ_ONCE(disk->queue->rq_timeout)));
 }
 
 static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
@@ -405,6 +416,19 @@ static struct queue_sysfs_entry _prefix##_entry = {	\
 	.show	= _prefix##_show,			\
 };
 
+#define QUEUE_RO_ENTRY_NOLOCK(_prefix, _name)			\
+static struct queue_sysfs_entry _prefix##_entry = {		\
+	.attr		= {.name = _name, .mode = 0644 },	\
+	.show_nolock	= _prefix##_show,			\
+}
+
+#define QUEUE_RW_ENTRY_NOLOCK(_prefix, _name)			\
+static struct queue_sysfs_entry _prefix##_entry = {		\
+	.attr		= {.name = _name, .mode = 0644 },	\
+	.show_nolock	= _prefix##_show,			\
+	.store_nolock	= _prefix##_store,			\
+}
+
 #define QUEUE_RW_ENTRY(_prefix, _name)			\
 static struct queue_sysfs_entry _prefix##_entry = {	\
 	.attr	= { .name = _name, .mode = 0644 },	\
@@ -446,7 +470,7 @@ QUEUE_RO_ENTRY(queue_max_discard_segments, "max_discard_segments");
 QUEUE_RO_ENTRY(queue_discard_granularity, "discard_granularity");
 QUEUE_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes");
 QUEUE_LIM_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes");
-QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
+QUEUE_RO_ENTRY_NOLOCK(queue_discard_zeroes_data, "discard_zeroes_data");
 
 QUEUE_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes");
 QUEUE_RO_ENTRY(queue_atomic_write_boundary_sectors,
@@ -454,25 +478,25 @@ QUEUE_RO_ENTRY(queue_atomic_write_boundary_sectors,
 QUEUE_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes");
 QUEUE_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes");
 
-QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
+QUEUE_RO_ENTRY_NOLOCK(queue_write_same_max, "write_same_max_bytes");
 QUEUE_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes");
 QUEUE_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes");
 QUEUE_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
 
 QUEUE_RO_ENTRY(queue_zoned, "zoned");
-QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
+QUEUE_RO_ENTRY_NOLOCK(queue_nr_zones, "nr_zones");
 QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
 QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
 
-QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
+QUEUE_RW_ENTRY_NOLOCK(queue_nomerges, "nomerges");
 QUEUE_LIM_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough");
-QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
-QUEUE_RW_ENTRY(queue_poll, "io_poll");
-QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay");
+QUEUE_RW_ENTRY_NOLOCK(queue_rq_affinity, "rq_affinity");
+QUEUE_RW_ENTRY_NOLOCK(queue_poll, "io_poll");
+QUEUE_RW_ENTRY_NOLOCK(queue_poll_delay, "io_poll_delay");
 QUEUE_LIM_RW_ENTRY(queue_wc, "write_cache");
 QUEUE_RO_ENTRY(queue_fua, "fua");
 QUEUE_RO_ENTRY(queue_dax, "dax");
-QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
+QUEUE_RW_ENTRY_NOLOCK(queue_io_timeout, "io_timeout");
 QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
 QUEUE_RO_ENTRY(queue_dma_alignment, "dma_alignment");
 
@@ -561,9 +585,11 @@ QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
 
 /* Common attributes for bio-based and request-based queues. */
 static struct attribute *queue_attrs[] = {
+	/*
+	 * attributes protected with q->sysfs_lock
+	 */
 	&queue_ra_entry.attr,
 	&queue_max_hw_sectors_entry.attr,
-	&queue_max_sectors_entry.attr,
 	&queue_max_segments_entry.attr,
 	&queue_max_discard_segments_entry.attr,
 	&queue_max_integrity_segments_entry.attr,
@@ -575,46 +601,63 @@ static struct attribute *queue_attrs[] = {
 	&queue_io_min_entry.attr,
 	&queue_io_opt_entry.attr,
 	&queue_discard_granularity_entry.attr,
-	&queue_max_discard_sectors_entry.attr,
 	&queue_max_hw_discard_sectors_entry.attr,
-	&queue_discard_zeroes_data_entry.attr,
 	&queue_atomic_write_max_sectors_entry.attr,
 	&queue_atomic_write_boundary_sectors_entry.attr,
 	&queue_atomic_write_unit_min_entry.attr,
 	&queue_atomic_write_unit_max_entry.attr,
-	&queue_write_same_max_entry.attr,
 	&queue_max_write_zeroes_sectors_entry.attr,
 	&queue_max_zone_append_sectors_entry.attr,
 	&queue_zone_write_granularity_entry.attr,
-	&queue_rotational_entry.attr,
 	&queue_zoned_entry.attr,
-	&queue_nr_zones_entry.attr,
 	&queue_max_open_zones_entry.attr,
 	&queue_max_active_zones_entry.attr,
-	&queue_nomerges_entry.attr,
+	&queue_fua_entry.attr,
+	&queue_dax_entry.attr,
+	&queue_virt_boundary_mask_entry.attr,
+	&queue_dma_alignment_entry.attr,
+
+	/*
+	 * attributes protected with q->limits_lock
+	 */
+	&queue_max_sectors_entry.attr,
+	&queue_max_discard_sectors_entry.attr,
+	&queue_rotational_entry.attr,
 	&queue_iostats_passthrough_entry.attr,
 	&queue_iostats_entry.attr,
 	&queue_stable_writes_entry.attr,
 	&queue_add_random_entry.attr,
-	&queue_poll_entry.attr,
 	&queue_wc_entry.attr,
-	&queue_fua_entry.attr,
-	&queue_dax_entry.attr,
+
+	/*
+	 * attributes which don't require locking
+	 */
+	&queue_nomerges_entry.attr,
+	&queue_poll_entry.attr,
 	&queue_poll_delay_entry.attr,
-	&queue_virt_boundary_mask_entry.attr,
-	&queue_dma_alignment_entry.attr,
+	&queue_discard_zeroes_data_entry.attr,
+	&queue_write_same_max_entry.attr,
+	&queue_nr_zones_entry.attr,
+
 	NULL,
 };
 
 /* Request-based queue attributes that are not relevant for bio-based queues. */
 static struct attribute *blk_mq_queue_attrs[] = {
+	/*
+	 * attributes protected with q->sysfs_lock
+	 */
 	&queue_requests_entry.attr,
 	&elv_iosched_entry.attr,
-	&queue_rq_affinity_entry.attr,
-	&queue_io_timeout_entry.attr,
 #ifdef CONFIG_BLK_WBT
 	&queue_wb_lat_entry.attr,
 #endif
+	/*
+	 * attrbiutes which don't require locking
+	 */
+	&queue_rq_affinity_entry.attr,
+	&queue_io_timeout_entry.attr,
+
 	NULL,
 };
 
@@ -666,8 +709,12 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 	struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
 	ssize_t res;
 
-	if (!entry->show)
+	if (!entry->show && !entry->show_nolock)
 		return -EIO;
+
+	if (entry->show_nolock)
+		return entry->show_nolock(disk, page);
+
 	mutex_lock(&disk->queue->sysfs_lock);
 	res = entry->show(disk, page);
 	mutex_unlock(&disk->queue->sysfs_lock);
@@ -684,7 +731,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
 	unsigned int memflags;
 	ssize_t res;
 
-	if (!entry->store_limit && !entry->store)
+	if (!entry->store_limit && !entry->store_nolock && !entry->store)
 		return -EIO;
 
 	/*
@@ -695,6 +742,13 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
 	if (entry->load_module)
 		entry->load_module(disk, page, length);
 
+	if (entry->store_nolock) {
+		memflags = blk_mq_freeze_queue(q);
+		res = entry->store_nolock(disk, page, length);
+		blk_mq_unfreeze_queue(q, memflags);
+		return res;
+	}
+
 	if (entry->store_limit) {
 		struct queue_limits lim = queue_limits_start_update(q);
 
-- 
2.47.1


  reply	other threads:[~2025-02-18  8:29 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-18  8:28 [PATCHv2 0/6] block: fix lock order and remove redundant locking Nilay Shroff
2025-02-18  8:28 ` Nilay Shroff [this message]
2025-02-18  8:46   ` [PATCHv2 1/6] blk-sysfs: remove q->sysfs_lock for attributes which don't need it Christoph Hellwig
2025-02-18 11:26     ` Nilay Shroff
2025-02-21 14:02       ` Nilay Shroff
2025-02-22 12:44         ` Ming Lei
2025-02-24 13:09           ` Nilay Shroff
2025-02-24 14:49           ` Christoph Hellwig
2025-02-26 12:09             ` Nilay Shroff
2025-02-24  8:41         ` Hannes Reinecke
2025-02-24 13:12           ` Nilay Shroff
2025-02-18 12:10   ` Ming Lei
2025-02-18 13:11     ` Nilay Shroff
2025-02-18 13:45       ` Ming Lei
2025-02-18 16:29         ` Christoph Hellwig
2025-02-19  3:24           ` Ming Lei
2025-02-19  5:42             ` Christoph Hellwig
2025-02-19  8:34             ` Nilay Shroff
2025-02-19  8:56               ` Nilay Shroff
2025-02-19  9:20                 ` Ming Lei
2025-02-18  8:28 ` [PATCHv2 2/6] blk-sysfs: acquire q->limits_lock while reading attributes Nilay Shroff
2025-02-18  8:46   ` Christoph Hellwig
2025-02-18  8:28 ` [PATCHv2 3/6] block: Introduce a dedicated lock for protecting queue elevator updates Nilay Shroff
2025-02-18  9:05   ` Christoph Hellwig
2025-02-18 11:14     ` Nilay Shroff
2025-02-18 16:32       ` Christoph Hellwig
2025-02-19  8:41         ` Nilay Shroff
2025-02-18  8:28 ` [PATCHv2 4/6] blk-sysfs: protect nr_requests update using q->elevator_lock Nilay Shroff
2025-02-18  8:28 ` [PATCHv2 5/6] blk-sysfs: protect wbt_lat_usec " Nilay Shroff
2025-02-18  8:28 ` [PATCHv2 6/6] blk-sysfs: protect read_ahead_kb using q->limits_lock Nilay Shroff
2025-02-18  9:12   ` Christoph Hellwig
2025-02-18 11:27     ` Nilay Shroff
2025-02-18  9:21 ` [PATCHv2 0/6] block: fix lock order and remove redundant locking Christoph Hellwig
2025-02-18 12:09   ` Nilay Shroff

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250218082908.265283-2-nilay@linux.ibm.com \
    --to=nilay@linux.ibm.com \
    --cc=axboe@kernel.dk \
    --cc=dlemoal@kernel.org \
    --cc=gjoyce@ibm.com \
    --cc=hch@lst.de \
    --cc=linux-block@vger.kernel.org \
    --cc=ming.lei@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.