* [PATCH] dm-mpath: Don't grab work_mutex while probing paths
@ 2025-05-16 1:55 Benjamin Marzinski
2025-05-16 11:35 ` Mikulas Patocka
0 siblings, 1 reply; 5+ messages in thread
From: Benjamin Marzinski @ 2025-05-16 1:55 UTC (permalink / raw)
To: Mikulas Patocka, Mike Snitzer
Cc: dm-devel, Kevin Wolf, Hanna Czenczek, Martin Wilck, Paolo Bonzini,
Christoph Hellwig, Hannes Reinecke
Grabbing the work_mutex keeps probe_active_paths() from running at the
same time as multipath_message(). The only messages that could interfere
with probing the paths are "disable_group", "enable_group", and
"switch_group". These messages could force multipath to pick a new
pathgroup while probe_active_paths() was probing the current pathgroup.
If the multipath device has a hardware handler, and it switches active
pathgroups while there is outstanding IO to a path device, it's possible
that IO to the path will fail, even if the path would be usable if it
was in the active pathgroup. To avoid this, do not clear the current
pathgroup for the *_group messages while probe_active_paths() is
running. Instead set a flag, and probe_active_paths() will clear the
current pathgroup when it finishes probing the paths. For this to work
correctly, multipath needs to check current_pg before next_pg in
choose_pgpath(), but before this patch next_pg was only ever set when
current_pg was cleared, so this doesn't change the current behavior when
paths aren't being probed. Even with this change, it is still possible
to switch pathgroups while the probe is running, but only if all the
paths have failed, and the probe function will skip them as well in this
case.
If multiple DM_MPATH_PROBE_PATHS requests are received at once, there is
no point in repeatedly issuing test IOs. Instead, the later probes
should wait for the current probe to complete. If current pathgroup is
still the same as the one that was just checked, the other probes should
skip probing and just check the number of valid paths. Finally, probing
the paths should quit early if the multipath device is trying to
suspend, instead of continuing to issue test IOs, delaying the suspend.
While this patch will not change the behavior of existing multipath
users which don't use the DM_MPATH_PROBE_PATHS ioctl, when that ioctl
is used, the behavior of the "disable_group", "enable_group", and
"switch_group" messages can change subtly. When these messages return,
the next IO to the multipath device will no longer be guaranteed to
choose a new pathgroup. Instead, choosing a new pathgroup could be
delayed by an in-progress DM_MPATH_PROBE_PATHS ioctl. The userspace
multipath tools make no assumptions about what will happen to IOs after
sending these messages, so this change will not effect already released
versions of them, even if the DM_MPATH_PROBE_PATHS ioctl is run
alongside them.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
drivers/md/dm-mpath.c | 118 ++++++++++++++++++++++++++++--------------
1 file changed, 78 insertions(+), 40 deletions(-)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 53861ad5dd1d..12b7bcae490c 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -79,6 +79,7 @@ struct multipath {
struct pgpath *current_pgpath;
struct priority_group *current_pg;
struct priority_group *next_pg; /* Switch to this PG if set */
+ struct priority_group *last_probed_pg;
atomic_t nr_valid_paths; /* Total number of usable paths */
unsigned int nr_priority_groups;
@@ -87,6 +88,7 @@ struct multipath {
const char *hw_handler_name;
char *hw_handler_params;
wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
+ wait_queue_head_t probe_wait; /* Wait for probing paths */
unsigned int pg_init_retries; /* Number of times to retry pg_init */
unsigned int pg_init_delay_msecs; /* Number of msecs before pg_init retry */
atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */
@@ -100,6 +102,7 @@ struct multipath {
struct bio_list queued_bios;
struct timer_list nopath_timer; /* Timeout for queue_if_no_path */
+ bool is_suspending;
};
/*
@@ -132,6 +135,8 @@ static void queue_if_no_path_timeout_work(struct timer_list *t);
#define MPATHF_PG_INIT_DISABLED 4 /* pg_init is not currently allowed */
#define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */
#define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */
+#define MPATHF_DELAY_PG_SWITCH 7 /* Delay switching pg if it still has paths */
+#define MPATHF_NEED_PG_SWITCH 8 /* Need to switch pgs after the delay has ended */
static bool mpath_double_check_test_bit(int MPATHF_bit, struct multipath *m)
{
@@ -254,6 +259,7 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
atomic_set(&m->pg_init_count, 0);
m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
init_waitqueue_head(&m->pg_init_wait);
+ init_waitqueue_head(&m->probe_wait);
return 0;
}
@@ -413,13 +419,21 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
goto failed;
}
+ /* Don't change PG until it has no remaining paths */
+ pg = READ_ONCE(m->current_pg);
+ if (pg) {
+ pgpath = choose_path_in_pg(m, pg, nr_bytes);
+ if (!IS_ERR_OR_NULL(pgpath))
+ return pgpath;
+ }
+
/* Were we instructed to switch PG? */
if (READ_ONCE(m->next_pg)) {
spin_lock_irqsave(&m->lock, flags);
pg = m->next_pg;
if (!pg) {
spin_unlock_irqrestore(&m->lock, flags);
- goto check_current_pg;
+ goto check_all_pgs;
}
m->next_pg = NULL;
spin_unlock_irqrestore(&m->lock, flags);
@@ -427,16 +441,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
if (!IS_ERR_OR_NULL(pgpath))
return pgpath;
}
-
- /* Don't change PG until it has no remaining paths */
-check_current_pg:
- pg = READ_ONCE(m->current_pg);
- if (pg) {
- pgpath = choose_path_in_pg(m, pg, nr_bytes);
- if (!IS_ERR_OR_NULL(pgpath))
- return pgpath;
- }
-
+check_all_pgs:
/*
* Loop through priority groups until we find a valid path.
* First time we skip PGs marked 'bypassed'.
@@ -1439,15 +1444,19 @@ static int action_dev(struct multipath *m, dev_t dev, action_fn action)
* Temporarily try to avoid having to use the specified PG
*/
static void bypass_pg(struct multipath *m, struct priority_group *pg,
- bool bypassed)
+ bool bypassed, bool can_be_delayed)
{
unsigned long flags;
spin_lock_irqsave(&m->lock, flags);
pg->bypassed = bypassed;
- m->current_pgpath = NULL;
- m->current_pg = NULL;
+ if (can_be_delayed && test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags))
+ set_bit(MPATHF_NEED_PG_SWITCH, &m->flags);
+ else {
+ m->current_pgpath = NULL;
+ m->current_pg = NULL;
+ }
spin_unlock_irqrestore(&m->lock, flags);
@@ -1476,8 +1485,12 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
if (--pgnum)
continue;
- m->current_pgpath = NULL;
- m->current_pg = NULL;
+ if (test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags))
+ set_bit(MPATHF_NEED_PG_SWITCH, &m->flags);
+ else {
+ m->current_pgpath = NULL;
+ m->current_pg = NULL;
+ }
m->next_pg = pg;
}
spin_unlock_irqrestore(&m->lock, flags);
@@ -1507,7 +1520,7 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed)
break;
}
- bypass_pg(m, pg, bypassed);
+ bypass_pg(m, pg, bypassed, true);
return 0;
}
@@ -1561,7 +1574,7 @@ static void pg_init_done(void *data, int errors)
* Probably doing something like FW upgrade on the
* controller so try the other pg.
*/
- bypass_pg(m, pg, true);
+ bypass_pg(m, pg, true, false);
break;
case SCSI_DH_RETRY:
/* Wait before retrying. */
@@ -1741,7 +1754,11 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
static void multipath_presuspend(struct dm_target *ti)
{
struct multipath *m = ti->private;
+ unsigned long flags;
+ spin_lock_irqsave(&m->lock, flags);
+ m->is_suspending = true;
+ spin_unlock_irqrestore(&m->lock, flags);
/* FIXME: bio-based shouldn't need to always disable queue_if_no_path */
if (m->queue_mode == DM_TYPE_BIO_BASED || !dm_noflush_suspending(m->ti))
queue_if_no_path(m, false, true, __func__);
@@ -1765,6 +1782,7 @@ static void multipath_resume(struct dm_target *ti)
unsigned long flags;
spin_lock_irqsave(&m->lock, flags);
+ m->is_suspending = false;
if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) {
set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
@@ -1845,10 +1863,10 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
DMEMIT("%u ", m->nr_priority_groups);
- if (m->next_pg)
- pg_num = m->next_pg->pg_num;
- else if (m->current_pg)
+ if (m->current_pg)
pg_num = m->current_pg->pg_num;
+ else if (m->next_pg)
+ pg_num = m->next_pg->pg_num;
else
pg_num = (m->nr_priority_groups ? 1 : 0);
@@ -2077,35 +2095,55 @@ static int probe_path(struct pgpath *pgpath)
static int probe_active_paths(struct multipath *m)
{
struct pgpath *pgpath;
- struct priority_group *pg;
+ struct priority_group *pg = NULL;
unsigned long flags;
int r = 0;
- mutex_lock(&m->work_mutex);
-
spin_lock_irqsave(&m->lock, flags);
- if (test_bit(MPATHF_QUEUE_IO, &m->flags))
- pg = NULL;
- else
- pg = m->current_pg;
+ if (test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags)) {
+ wait_event_lock_irq(m->probe_wait,
+ !test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags),
+ m->lock);
+ /*
+ * if we waited because a probe was already in progress,
+ * and it probed the current active pathgroup, don't
+ * reprobe. Just return the number of valid paths
+ */
+ if (m->current_pg == m->last_probed_pg)
+ goto skip_probe;
+ }
+ if (!m->current_pg || m->is_suspending ||
+ test_bit(MPATHF_QUEUE_IO, &m->flags))
+ goto skip_probe;
+ set_bit(MPATHF_DELAY_PG_SWITCH, &m->flags);
+ pg = m->last_probed_pg = m->current_pg;
spin_unlock_irqrestore(&m->lock, flags);
- if (pg) {
- list_for_each_entry(pgpath, &pg->pgpaths, list) {
- if (!pgpath->is_active)
- continue;
+ list_for_each_entry(pgpath, &pg->pgpaths, list) {
+ if (pg != READ_ONCE(m->current_pg) ||
+ READ_ONCE(m->is_suspending))
+ goto out;
+ if (!pgpath->is_active)
+ continue;
- r = probe_path(pgpath);
- if (r < 0)
- goto out;
- }
+ r = probe_path(pgpath);
+ if (r < 0)
+ goto out;
}
- if (!atomic_read(&m->nr_valid_paths))
- r = -ENOTCONN;
-
out:
- mutex_unlock(&m->work_mutex);
+ spin_lock_irqsave(&m->lock, flags);
+ clear_bit(MPATHF_DELAY_PG_SWITCH, &m->flags);
+ if (test_and_clear_bit(MPATHF_NEED_PG_SWITCH, &m->flags)) {
+ m->current_pgpath = NULL;
+ m->current_pg = NULL;
+ }
+skip_probe:
+ if (r == 0 && !atomic_read(&m->nr_valid_paths))
+ r = -ENOTCONN;
+ spin_unlock_irqrestore(&m->lock, flags);
+ if (pg)
+ wake_up(&m->probe_wait);
return r;
}
--
2.48.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] dm-mpath: Don't grab work_mutex while probing paths
2025-05-16 1:55 [PATCH] dm-mpath: Don't grab work_mutex while probing paths Benjamin Marzinski
@ 2025-05-16 11:35 ` Mikulas Patocka
2025-05-16 19:16 ` Benjamin Marzinski
2025-05-19 13:50 ` [PATCH] dm mpath: replace spin_lock_irqsave with spin_lock_irq Mikulas Patocka
0 siblings, 2 replies; 5+ messages in thread
From: Mikulas Patocka @ 2025-05-16 11:35 UTC (permalink / raw)
To: Benjamin Marzinski
Cc: Mike Snitzer, dm-devel, Kevin Wolf, Hanna Czenczek, Martin Wilck,
Paolo Bonzini, Christoph Hellwig, Hannes Reinecke
On Thu, 15 May 2025, Benjamin Marzinski wrote:
> @@ -2077,35 +2095,55 @@ static int probe_path(struct pgpath *pgpath)
> static int probe_active_paths(struct multipath *m)
> {
> struct pgpath *pgpath;
> - struct priority_group *pg;
> + struct priority_group *pg = NULL;
> unsigned long flags;
> int r = 0;
>
> - mutex_lock(&m->work_mutex);
> -
> spin_lock_irqsave(&m->lock, flags);
Hi
I suggest replacing spin_lock_irqsave/spin_unlock_irqrestore with
spin_lock_irq/spin_unlock_irq here and in some other places where it is
known that interrupts are enabled (for example __map_bio,
process_queued_bios, multipath_ctr, flush_multipath_work,
multipath_resume, multipath_status, multipath_prepare_ioctl, ...).
I accepted this patch, so you can send the spinlock changes in a follow-up
patch.
Mikulas
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] dm-mpath: Don't grab work_mutex while probing paths
2025-05-16 11:35 ` Mikulas Patocka
@ 2025-05-16 19:16 ` Benjamin Marzinski
2025-05-19 13:50 ` [PATCH] dm mpath: replace spin_lock_irqsave with spin_lock_irq Mikulas Patocka
1 sibling, 0 replies; 5+ messages in thread
From: Benjamin Marzinski @ 2025-05-16 19:16 UTC (permalink / raw)
To: Mikulas Patocka
Cc: Mike Snitzer, dm-devel, Kevin Wolf, Hanna Czenczek, Martin Wilck,
Paolo Bonzini, Christoph Hellwig, Hannes Reinecke
On Fri, May 16, 2025 at 01:35:39PM +0200, Mikulas Patocka wrote:
>
>
> On Thu, 15 May 2025, Benjamin Marzinski wrote:
>
> > @@ -2077,35 +2095,55 @@ static int probe_path(struct pgpath *pgpath)
> > static int probe_active_paths(struct multipath *m)
> > {
> > struct pgpath *pgpath;
> > - struct priority_group *pg;
> > + struct priority_group *pg = NULL;
> > unsigned long flags;
> > int r = 0;
> >
> > - mutex_lock(&m->work_mutex);
> > -
> > spin_lock_irqsave(&m->lock, flags);
>
> Hi
>
> I suggest replacing spin_lock_irqsave/spin_unlock_irqrestore with
> spin_lock_irq/spin_unlock_irq here and in some other places where it is
> known that interrupts are enabled (for example __map_bio,
> process_queued_bios, multipath_ctr, flush_multipath_work,
> multipath_resume, multipath_status, multipath_prepare_ioctl, ...).
>
> I accepted this patch, so you can send the spinlock changes in a follow-up
> patch.
Sure. I can do that.
-Ben
>
> Mikulas
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH] dm mpath: replace spin_lock_irqsave with spin_lock_irq
2025-05-16 11:35 ` Mikulas Patocka
2025-05-16 19:16 ` Benjamin Marzinski
@ 2025-05-19 13:50 ` Mikulas Patocka
2025-05-20 3:56 ` Benjamin Marzinski
1 sibling, 1 reply; 5+ messages in thread
From: Mikulas Patocka @ 2025-05-19 13:50 UTC (permalink / raw)
To: Benjamin Marzinski
Cc: Mike Snitzer, dm-devel, Kevin Wolf, Hanna Czenczek, Martin Wilck,
Paolo Bonzini, Christoph Hellwig, Hannes Reinecke
On Fri, 16 May 2025, Mikulas Patocka wrote:
>
>
> On Thu, 15 May 2025, Benjamin Marzinski wrote:
>
> > @@ -2077,35 +2095,55 @@ static int probe_path(struct pgpath *pgpath)
> > static int probe_active_paths(struct multipath *m)
> > {
> > struct pgpath *pgpath;
> > - struct priority_group *pg;
> > + struct priority_group *pg = NULL;
> > unsigned long flags;
> > int r = 0;
> >
> > - mutex_lock(&m->work_mutex);
> > -
> > spin_lock_irqsave(&m->lock, flags);
>
> Hi
>
> I suggest replacing spin_lock_irqsave/spin_unlock_irqrestore with
> spin_lock_irq/spin_unlock_irq here and in some other places where it is
> known that interrupts are enabled (for example __map_bio,
> process_queued_bios, multipath_ctr, flush_multipath_work,
> multipath_resume, multipath_status, multipath_prepare_ioctl, ...).
>
> I accepted this patch, so you can send the spinlock changes in a follow-up
> patch.
>
> Mikulas
Hi
I've created a patch for this. Please test it (with spinlock debugging
enabled), if it passes the tests, I'll stage it.
Mikulas
From: Mikulas Patocka <mpatocka@redhat.com>
Replace spin_lock_irqsave/spin_unlock_irqrestore with
spin_lock_irq/spin_unlock_irq at places where it is known that interrupts
are enabled.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
drivers/md/dm-mpath.c | 61 ++++++++++++++++++++------------------------------
1 file changed, 25 insertions(+), 36 deletions(-)
Index: linux-2.6/drivers/md/dm-mpath.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-mpath.c
+++ linux-2.6/drivers/md/dm-mpath.c
@@ -612,7 +612,6 @@ static void multipath_queue_bio(struct m
static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
{
struct pgpath *pgpath;
- unsigned long flags;
/* Do we need to select a new pgpath? */
pgpath = READ_ONCE(m->current_pgpath);
@@ -620,12 +619,12 @@ static struct pgpath *__map_bio(struct m
pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
if (!pgpath) {
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
__multipath_queue_bio(m, bio);
pgpath = ERR_PTR(-EAGAIN);
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
} else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) ||
mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) {
@@ -688,7 +687,6 @@ static void process_queued_io_list(struc
static void process_queued_bios(struct work_struct *work)
{
int r;
- unsigned long flags;
struct bio *bio;
struct bio_list bios;
struct blk_plug plug;
@@ -697,16 +695,16 @@ static void process_queued_bios(struct w
bio_list_init(&bios);
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (bio_list_empty(&m->queued_bios)) {
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
return;
}
bio_list_merge_init(&bios, &m->queued_bios);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
blk_start_plug(&plug);
while ((bio = bio_list_pop(&bios))) {
@@ -1190,7 +1188,6 @@ static int multipath_ctr(struct dm_targe
struct dm_arg_set as;
unsigned int pg_count = 0;
unsigned int next_pg_num;
- unsigned long flags;
as.argc = argc;
as.argv = argv;
@@ -1255,9 +1252,9 @@ static int multipath_ctr(struct dm_targe
goto bad;
}
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
enable_nopath_timeout(m);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
@@ -1292,23 +1289,21 @@ static void multipath_wait_for_pg_init_c
static void flush_multipath_work(struct multipath *m)
{
if (m->hw_handler_name) {
- unsigned long flags;
-
if (!atomic_read(&m->pg_init_in_progress))
goto skip;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (atomic_read(&m->pg_init_in_progress) &&
!test_and_set_bit(MPATHF_PG_INIT_DISABLED, &m->flags)) {
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
flush_workqueue(kmpath_handlerd);
multipath_wait_for_pg_init_completion(m);
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
skip:
if (m->queue_mode == DM_TYPE_BIO_BASED)
@@ -1370,11 +1365,10 @@ out:
static int reinstate_path(struct pgpath *pgpath)
{
int r = 0, run_queue = 0;
- unsigned long flags;
struct multipath *m = pgpath->pg->m;
unsigned int nr_valid_paths;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (pgpath->is_active)
goto out;
@@ -1404,7 +1398,7 @@ static int reinstate_path(struct pgpath
schedule_work(&m->trigger_event);
out:
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
if (run_queue) {
dm_table_run_md_queue_async(m->ti->table);
process_queued_io_list(m);
@@ -1461,7 +1455,6 @@ static int switch_pg_num(struct multipat
{
struct priority_group *pg;
unsigned int pgnum;
- unsigned long flags;
char dummy;
if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
@@ -1470,7 +1463,7 @@ static int switch_pg_num(struct multipat
return -EINVAL;
}
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
list_for_each_entry(pg, &m->priority_groups, list) {
pg->bypassed = false;
if (--pgnum)
@@ -1480,7 +1473,7 @@ static int switch_pg_num(struct multipat
m->current_pg = NULL;
m->next_pg = pg;
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
schedule_work(&m->trigger_event);
return 0;
@@ -1762,9 +1755,8 @@ static void multipath_postsuspend(struct
static void multipath_resume(struct dm_target *ti)
{
struct multipath *m = ti->private;
- unsigned long flags;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) {
set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
@@ -1775,7 +1767,7 @@ static void multipath_resume(struct dm_t
test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags),
test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags));
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
/*
@@ -1798,14 +1790,13 @@ static void multipath_status(struct dm_t
unsigned int status_flags, char *result, unsigned int maxlen)
{
int sz = 0, pg_counter, pgpath_counter;
- unsigned long flags;
struct multipath *m = ti->private;
struct priority_group *pg;
struct pgpath *p;
unsigned int pg_num;
char state;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
/* Features */
if (type == STATUSTYPE_INFO)
@@ -1951,7 +1942,7 @@ static void multipath_status(struct dm_t
break;
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
static int multipath_message(struct dm_target *ti, unsigned int argc, char **argv,
@@ -1961,7 +1952,6 @@ static int multipath_message(struct dm_t
dev_t dev;
struct multipath *m = ti->private;
action_fn action;
- unsigned long flags;
mutex_lock(&m->work_mutex);
@@ -1973,9 +1963,9 @@ static int multipath_message(struct dm_t
if (argc == 1) {
if (!strcasecmp(argv[0], "queue_if_no_path")) {
r = queue_if_no_path(m, true, false, __func__);
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
enable_nopath_timeout(m);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
goto out;
} else if (!strcasecmp(argv[0], "fail_if_no_path")) {
r = queue_if_no_path(m, false, false, __func__);
@@ -2026,7 +2016,6 @@ static int multipath_prepare_ioctl(struc
{
struct multipath *m = ti->private;
struct pgpath *pgpath;
- unsigned long flags;
int r;
pgpath = READ_ONCE(m->current_pgpath);
@@ -2044,10 +2033,10 @@ static int multipath_prepare_ioctl(struc
} else {
/* No path is available */
r = -EIO;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
r = -ENOTCONN;
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
if (r == -ENOTCONN) {
@@ -2055,10 +2044,10 @@ static int multipath_prepare_ioctl(struc
/* Path status changed, redo selection */
(void) choose_pgpath(m, 0);
}
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
(void) __pg_init_all_paths(m);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
dm_table_run_md_queue_async(m->ti->table);
process_queued_io_list(m);
}
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] dm mpath: replace spin_lock_irqsave with spin_lock_irq
2025-05-19 13:50 ` [PATCH] dm mpath: replace spin_lock_irqsave with spin_lock_irq Mikulas Patocka
@ 2025-05-20 3:56 ` Benjamin Marzinski
0 siblings, 0 replies; 5+ messages in thread
From: Benjamin Marzinski @ 2025-05-20 3:56 UTC (permalink / raw)
To: Mikulas Patocka
Cc: Mike Snitzer, dm-devel, Kevin Wolf, Hanna Czenczek, Martin Wilck,
Paolo Bonzini, Christoph Hellwig, Hannes Reinecke
On Mon, May 19, 2025 at 03:50:15PM +0200, Mikulas Patocka wrote:
>
>
> On Fri, 16 May 2025, Mikulas Patocka wrote:
>
> >
> >
> > On Thu, 15 May 2025, Benjamin Marzinski wrote:
> >
> > > @@ -2077,35 +2095,55 @@ static int probe_path(struct pgpath *pgpath)
> > > static int probe_active_paths(struct multipath *m)
> > > {
> > > struct pgpath *pgpath;
> > > - struct priority_group *pg;
> > > + struct priority_group *pg = NULL;
> > > unsigned long flags;
> > > int r = 0;
> > >
> > > - mutex_lock(&m->work_mutex);
> > > -
> > > spin_lock_irqsave(&m->lock, flags);
> >
> > Hi
> >
> > I suggest replacing spin_lock_irqsave/spin_unlock_irqrestore with
> > spin_lock_irq/spin_unlock_irq here and in some other places where it is
> > known that interrupts are enabled (for example __map_bio,
> > process_queued_bios, multipath_ctr, flush_multipath_work,
> > multipath_resume, multipath_status, multipath_prepare_ioctl, ...).
> >
> > I accepted this patch, so you can send the spinlock changes in a follow-up
> > patch.
> >
> > Mikulas
>
> Hi
>
> I've created a patch for this. Please test it (with spinlock debugging
> enabled), if it passes the tests, I'll stage it.
I tested this with CONFIG_DEBUG_SPINLOCK=y and everything looks fine.
Actually, I tested a slightly different patch. Your patch applies on top
of dm-6.15, but that means it doesn't clean up the spin_lock_irqsave()
calls in my last patch. I tested using a version of your patch that goes
on top of dm-for-next, and changes the spinlocks in probe_active_paths()
and multipath_presuspend(), both of which can also only be called with
interrupts enabled. Here it is:
From: Mikulas Patocka <mpatocka@redhat.com>
Replace spin_lock_irqsave/spin_unlock_irqrestore with
spin_lock_irq/spin_unlock_irq at places where it is known that interrupts
are enabled.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
drivers/md/dm-mpath.c | 75 ++++++++++++++++++-------------------------
1 file changed, 31 insertions(+), 44 deletions(-)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 12b7bcae490c..81fec2e1e0ef 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -617,7 +617,6 @@ static void multipath_queue_bio(struct multipath *m, struct bio *bio)
static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
{
struct pgpath *pgpath;
- unsigned long flags;
/* Do we need to select a new pgpath? */
pgpath = READ_ONCE(m->current_pgpath);
@@ -625,12 +624,12 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
if (!pgpath) {
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
__multipath_queue_bio(m, bio);
pgpath = ERR_PTR(-EAGAIN);
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
} else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) ||
mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) {
@@ -693,7 +692,6 @@ static void process_queued_io_list(struct multipath *m)
static void process_queued_bios(struct work_struct *work)
{
int r;
- unsigned long flags;
struct bio *bio;
struct bio_list bios;
struct blk_plug plug;
@@ -702,16 +700,16 @@ static void process_queued_bios(struct work_struct *work)
bio_list_init(&bios);
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (bio_list_empty(&m->queued_bios)) {
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
return;
}
bio_list_merge_init(&bios, &m->queued_bios);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
blk_start_plug(&plug);
while ((bio = bio_list_pop(&bios))) {
@@ -1195,7 +1193,6 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, char **argv)
struct dm_arg_set as;
unsigned int pg_count = 0;
unsigned int next_pg_num;
- unsigned long flags;
as.argc = argc;
as.argv = argv;
@@ -1260,9 +1257,9 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
enable_nopath_timeout(m);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
@@ -1297,23 +1294,21 @@ static void multipath_wait_for_pg_init_completion(struct multipath *m)
static void flush_multipath_work(struct multipath *m)
{
if (m->hw_handler_name) {
- unsigned long flags;
-
if (!atomic_read(&m->pg_init_in_progress))
goto skip;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (atomic_read(&m->pg_init_in_progress) &&
!test_and_set_bit(MPATHF_PG_INIT_DISABLED, &m->flags)) {
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
flush_workqueue(kmpath_handlerd);
multipath_wait_for_pg_init_completion(m);
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
skip:
if (m->queue_mode == DM_TYPE_BIO_BASED)
@@ -1375,11 +1370,10 @@ static int fail_path(struct pgpath *pgpath)
static int reinstate_path(struct pgpath *pgpath)
{
int r = 0, run_queue = 0;
- unsigned long flags;
struct multipath *m = pgpath->pg->m;
unsigned int nr_valid_paths;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (pgpath->is_active)
goto out;
@@ -1409,7 +1403,7 @@ static int reinstate_path(struct pgpath *pgpath)
schedule_work(&m->trigger_event);
out:
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
if (run_queue) {
dm_table_run_md_queue_async(m->ti->table);
process_queued_io_list(m);
@@ -1470,7 +1464,6 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
{
struct priority_group *pg;
unsigned int pgnum;
- unsigned long flags;
char dummy;
if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
@@ -1479,7 +1472,7 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
return -EINVAL;
}
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
list_for_each_entry(pg, &m->priority_groups, list) {
pg->bypassed = false;
if (--pgnum)
@@ -1493,7 +1486,7 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
}
m->next_pg = pg;
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
schedule_work(&m->trigger_event);
return 0;
@@ -1754,11 +1747,10 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
static void multipath_presuspend(struct dm_target *ti)
{
struct multipath *m = ti->private;
- unsigned long flags;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
m->is_suspending = true;
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
/* FIXME: bio-based shouldn't need to always disable queue_if_no_path */
if (m->queue_mode == DM_TYPE_BIO_BASED || !dm_noflush_suspending(m->ti))
queue_if_no_path(m, false, true, __func__);
@@ -1779,9 +1771,8 @@ static void multipath_postsuspend(struct dm_target *ti)
static void multipath_resume(struct dm_target *ti)
{
struct multipath *m = ti->private;
- unsigned long flags;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
m->is_suspending = false;
if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) {
set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
@@ -1793,7 +1784,7 @@ static void multipath_resume(struct dm_target *ti)
test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags),
test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags));
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
/*
@@ -1816,14 +1807,13 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
unsigned int status_flags, char *result, unsigned int maxlen)
{
int sz = 0, pg_counter, pgpath_counter;
- unsigned long flags;
struct multipath *m = ti->private;
struct priority_group *pg;
struct pgpath *p;
unsigned int pg_num;
char state;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
/* Features */
if (type == STATUSTYPE_INFO)
@@ -1969,7 +1959,7 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
break;
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
static int multipath_message(struct dm_target *ti, unsigned int argc, char **argv,
@@ -1979,7 +1969,6 @@ static int multipath_message(struct dm_target *ti, unsigned int argc, char **arg
dev_t dev;
struct multipath *m = ti->private;
action_fn action;
- unsigned long flags;
mutex_lock(&m->work_mutex);
@@ -1991,9 +1980,9 @@ static int multipath_message(struct dm_target *ti, unsigned int argc, char **arg
if (argc == 1) {
if (!strcasecmp(argv[0], "queue_if_no_path")) {
r = queue_if_no_path(m, true, false, __func__);
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
enable_nopath_timeout(m);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
goto out;
} else if (!strcasecmp(argv[0], "fail_if_no_path")) {
r = queue_if_no_path(m, false, false, __func__);
@@ -2096,10 +2085,9 @@ static int probe_active_paths(struct multipath *m)
{
struct pgpath *pgpath;
struct priority_group *pg = NULL;
- unsigned long flags;
int r = 0;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags)) {
wait_event_lock_irq(m->probe_wait,
!test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags),
@@ -2117,7 +2105,7 @@ static int probe_active_paths(struct multipath *m)
goto skip_probe;
set_bit(MPATHF_DELAY_PG_SWITCH, &m->flags);
pg = m->last_probed_pg = m->current_pg;
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
list_for_each_entry(pgpath, &pg->pgpaths, list) {
if (pg != READ_ONCE(m->current_pg) ||
@@ -2132,7 +2120,7 @@ static int probe_active_paths(struct multipath *m)
}
out:
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
clear_bit(MPATHF_DELAY_PG_SWITCH, &m->flags);
if (test_and_clear_bit(MPATHF_NEED_PG_SWITCH, &m->flags)) {
m->current_pgpath = NULL;
@@ -2141,7 +2129,7 @@ static int probe_active_paths(struct multipath *m)
skip_probe:
if (r == 0 && !atomic_read(&m->nr_valid_paths))
r = -ENOTCONN;
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
if (pg)
wake_up(&m->probe_wait);
return r;
@@ -2154,7 +2142,6 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
{
struct multipath *m = ti->private;
struct pgpath *pgpath;
- unsigned long flags;
int r;
if (_IOC_TYPE(cmd) == DM_IOCTL) {
@@ -2182,10 +2169,10 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
} else {
/* No path is available */
r = -EIO;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
r = -ENOTCONN;
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
if (r == -ENOTCONN) {
@@ -2193,10 +2180,10 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
/* Path status changed, redo selection */
(void) choose_pgpath(m, 0);
}
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
(void) __pg_init_all_paths(m);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
dm_table_run_md_queue_async(m->ti->table);
process_queued_io_list(m);
}
--
2.46.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-05-20 3:56 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-16 1:55 [PATCH] dm-mpath: Don't grab work_mutex while probing paths Benjamin Marzinski
2025-05-16 11:35 ` Mikulas Patocka
2025-05-16 19:16 ` Benjamin Marzinski
2025-05-19 13:50 ` [PATCH] dm mpath: replace spin_lock_irqsave with spin_lock_irq Mikulas Patocka
2025-05-20 3:56 ` Benjamin Marzinski
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.