* RE: [PATCH] Handle multipath paths in a path group properly during pg_init
2009-03-30 18:07 [PATCH] Handle multipath paths in a path group properly during pg_init Chandra Seetharaman
@ 2009-03-30 22:14 ` Moger, Babu
2009-03-31 6:33 ` Hannes Reinecke
2009-04-27 22:48 ` Chandra Seetharaman
2 siblings, 0 replies; 8+ messages in thread
From: Moger, Babu @ 2009-03-30 22:14 UTC (permalink / raw)
To: sekharan@linux.vnet.ibm.com, dm-devel; +Cc: Anderson, Mike, Alasdair G Kergon
Reviewed and Tested-by: Babu Moger <babu.moger@lsi.com>
> -----Original Message-----
> From: Chandra Seetharaman [mailto:sekharan@us.ibm.com]
> Sent: Monday, March 30, 2009 1:08 PM
> To: dm-devel
> Cc: Alasdair G Kergon; Moger, Babu; Hannes Reinecke; Mike Anderson
> Subject: [PATCH] Handle multipath paths in a path group properly during
> pg_init
>
> Resending the patch to get in patchwork...
> -------
> The problem reported by Moger Babu was caused due to the architectural
> change made when we moved from dm hardware handler to SCSI hardware
> handler.
>
> Thanks Babu for finding and reporting the bug.
>
> All of the hardware handlers, do have a state now, and they are set to
> active and (some form of) inactive. All of them have prep_fn, which use
> this "state" to fail the I/O without it ever being sent to the device.
>
> As Babu has noted in his email, the pg_init/activate is sent on only one
> path and the "state" of that path is changed appropriately to "active"
> while other paths in the same path group are never changed as they never
> got an "activate".
>
> Attached is a patch (compiled, tested, but not clean yet), which makes
> changes in the dm-multipath layer to send an "activate" on each paths in
> the path groups.
>
> Mike (Anderson) and I had a discussion about whether to implement this
> in the dm-mulitpath layer or in the SCSI hardware handler layer and we
> came to a conclusion that it is best suited to be in the dm-mulitpath
> layer as it is the one that knows the relationship between different
> paths.
>
> If it were to be done at the Hardware handler layer, then the hardware
> handler may end up having a different notion of the path relationship
> and hence may not work as expected by the dm-multipath layer.
>
> This patch has been tested by Hannes in EMC storage. Babu and I tested it
> in LSI storage.
> ----------
>
> Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
>
> ---
> Index: linux-2.6.29/drivers/md/dm-mpath.c
> ===================================================================
> --- linux-2.6.29.orig/drivers/md/dm-mpath.c
> +++ linux-2.6.29/drivers/md/dm-mpath.c
> @@ -36,6 +36,7 @@ struct pgpath {
>
> struct dm_path path;
> struct work_struct deactivate_path;
> + struct work_struct activate_path;
> };
>
> #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
> @@ -65,8 +66,6 @@ struct multipath {
> spinlock_t lock;
>
> const char *hw_handler_name;
> - struct work_struct activate_path;
> - struct pgpath *pgpath_to_activate;
> unsigned nr_priority_groups;
> struct list_head priority_groups;
> unsigned pg_init_required; /* pg_init needs calling? */
> @@ -129,6 +128,7 @@ static struct pgpath *alloc_pgpath(void)
> if (pgpath) {
> pgpath->is_active = 1;
> INIT_WORK(&pgpath->deactivate_path, deactivate_path);
> + INIT_WORK(&pgpath->activate_path, activate_path);
> }
>
> return pgpath;
> @@ -170,10 +170,6 @@ static void free_pgpaths(struct list_hea
> if (m->hw_handler_name)
> scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
> dm_put_device(ti, pgpath->path.dev);
> - spin_lock_irqsave(&m->lock, flags);
> - if (m->pgpath_to_activate == pgpath)
> - m->pgpath_to_activate = NULL;
> - spin_unlock_irqrestore(&m->lock, flags);
> free_pgpath(pgpath);
> }
> }
> @@ -203,7 +199,6 @@ static struct multipath *alloc_multipath
> m->queue_io = 1;
> INIT_WORK(&m->process_queued_ios, process_queued_ios);
> INIT_WORK(&m->trigger_event, trigger_event);
> - INIT_WORK(&m->activate_path, activate_path);
> m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
> if (!m->mpio_pool) {
> kfree(m);
> @@ -428,8 +423,8 @@ static void process_queued_ios(struct wo
> {
> struct multipath *m =
> container_of(work, struct multipath, process_queued_ios);
> - struct pgpath *pgpath = NULL;
> - unsigned init_required = 0, must_queue = 1;
> + struct pgpath *pgpath = NULL, *tmp;
> + unsigned must_queue = 1;
> unsigned long flags;
>
> spin_lock_irqsave(&m->lock, flags);
> @@ -447,19 +442,15 @@ static void process_queued_ios(struct wo
> must_queue = 0;
>
> if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
> - m->pgpath_to_activate = pgpath;
> m->pg_init_count++;
> m->pg_init_required = 0;
> - m->pg_init_in_progress = 1;
> - init_required = 1;
> + list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
> + queue_work(kmpath_handlerd, &tmp->activate_path);
> + m->pg_init_in_progress++;
> + }
> }
> -
> out:
> spin_unlock_irqrestore(&m->lock, flags);
> -
> - if (init_required)
> - queue_work(kmpath_handlerd, &m->activate_path);
> -
> if (!must_queue)
> dispatch_queued_ios(m);
> }
> @@ -1111,27 +1102,20 @@ static void pg_init_done(struct dm_path
> pg->bypassed = 0;
> }
>
> - m->pg_init_in_progress = 0;
> - queue_work(kmultipathd, &m->process_queued_ios);
> + m->pg_init_in_progress--;
> + if (!m->pg_init_in_progress)
> + queue_work(kmultipathd, &m->process_queued_ios);
> spin_unlock_irqrestore(&m->lock, flags);
> }
>
> static void activate_path(struct work_struct *work)
> {
> int ret;
> - struct multipath *m =
> - container_of(work, struct multipath, activate_path);
> - struct dm_path *path;
> - unsigned long flags;
> + struct pgpath *pgpath =
> + container_of(work, struct pgpath, activate_path);
>
> - spin_lock_irqsave(&m->lock, flags);
> - path = &m->pgpath_to_activate->path;
> - m->pgpath_to_activate = NULL;
> - spin_unlock_irqrestore(&m->lock, flags);
> - if (!path)
> - return;
> - ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
> - pg_init_done(path, ret);
> + ret = scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev));
> + pg_init_done(&pgpath->path, ret);
> }
>
> /*
>
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH] Handle multipath paths in a path group properly during pg_init
2009-03-30 18:07 [PATCH] Handle multipath paths in a path group properly during pg_init Chandra Seetharaman
2009-03-30 22:14 ` Moger, Babu
@ 2009-03-31 6:33 ` Hannes Reinecke
2009-04-27 22:48 ` Chandra Seetharaman
2 siblings, 0 replies; 8+ messages in thread
From: Hannes Reinecke @ 2009-03-31 6:33 UTC (permalink / raw)
To: sekharan; +Cc: Mike Anderson, dm-devel, Alasdair G Kergon, Moger, Babu
Chandra Seetharaman wrote:
> Resending the patch to get in patchwork...
> -------
> The problem reported by Moger Babu was caused due to the architectural
> change made when we moved from dm hardware handler to SCSI hardware
> handler.
>
> Thanks Babu for finding and reporting the bug.
>
> All of the hardware handlers, do have a state now, and they are set to
> active and (some form of) inactive. All of them have prep_fn, which use
> this "state" to fail the I/O without it ever being sent to the device.
>
> As Babu has noted in his email, the pg_init/activate is sent on only one
> path and the "state" of that path is changed appropriately to "active"
> while other paths in the same path group are never changed as they never
> got an "activate".
>
> Attached is a patch (compiled, tested, but not clean yet), which makes
> changes in the dm-multipath layer to send an "activate" on each paths in
> the path groups.
>
> Mike (Anderson) and I had a discussion about whether to implement this
> in the dm-mulitpath layer or in the SCSI hardware handler layer and we
> came to a conclusion that it is best suited to be in the dm-mulitpath
> layer as it is the one that knows the relationship between different
> paths.
>
> If it were to be done at the Hardware handler layer, then the hardware
> handler may end up having a different notion of the path relationship
> and hence may not work as expected by the dm-multipath layer.
>
> This patch has been tested by Hannes in EMC storage. Babu and I tested it
> in LSI storage.
> ----------
>
> Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
>
Acked-by: Hannes Reinecke <hare@suse.de>
Cheers,
Hannes
--
Dr. Hannes Reinecke zSeries & Storage
hare@suse.de +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Markus Rex, HRB 16746 (AG Nürnberg)
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH] Handle multipath paths in a path group properly during pg_init
2009-03-30 18:07 [PATCH] Handle multipath paths in a path group properly during pg_init Chandra Seetharaman
2009-03-30 22:14 ` Moger, Babu
2009-03-31 6:33 ` Hannes Reinecke
@ 2009-04-27 22:48 ` Chandra Seetharaman
2009-04-28 6:30 ` Hannes Reinecke
2009-04-28 19:31 ` Chandra Seetharaman
2 siblings, 2 replies; 8+ messages in thread
From: Chandra Seetharaman @ 2009-04-27 22:48 UTC (permalink / raw)
To: dm-devel; +Cc: Mike Anderson, Alasdair G Kergon, Moger, Babu
Resending the patch after fixing the header, Porting and testing in
2.6.30-rc3, and a bug fix.
---------------
There is a problem which was caused due to the architectural change made
when we moved from dm hardware handler to SCSI hardware handler.
In dm, handler was called to do a pg_init for a path group, and there
was no state maintained in hardware handler code for each path.
In SCSI dh, "state" is maintanined per path, as we wanted to fail I/O
early on that path if it is not the active path.
All of the hardware handlers, do have a state now, and they are set to
active and (some form of) inactive. All of them have prep_fn(), which use
this "state" to fail the I/O without it ever being sent to the device.
So, in effect when dm-multipath calls scsi_dh_activate(), activate is
sent to only one path and the "state" of that path is changed appropriately
to "active" while other paths in the same path group are never changed
as they never got an "activate".
In order make sure all the paths in a path group gets their state set
properly when a pg_init happens, we need to call scsi_dh_activate() on
all paths in a path group.
Doing this at the hardware handler layer is not a good option as we
want the multipath layer to define the relationship between path and path
groups and not the hardware handler.
Attached patch sends an "activate" on each path in a path group when a
path group is switched. It also sends an activate when a path is reinstated.
Patch is applied and tested on 2.6.30-rc3.
----------
Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
---
Index: linux-2.6.30-rc3/drivers/md/dm-mpath.c
===================================================================
--- linux-2.6.30-rc3.orig/drivers/md/dm-mpath.c
+++ linux-2.6.30-rc3/drivers/md/dm-mpath.c
@@ -35,6 +35,7 @@ struct pgpath {
struct dm_path path;
struct work_struct deactivate_path;
+ struct work_struct activate_path;
};
#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
@@ -64,8 +65,6 @@ struct multipath {
spinlock_t lock;
const char *hw_handler_name;
- struct work_struct activate_path;
- struct pgpath *pgpath_to_activate;
unsigned nr_priority_groups;
struct list_head priority_groups;
unsigned pg_init_required; /* pg_init needs calling? */
@@ -128,6 +127,7 @@ static struct pgpath *alloc_pgpath(void)
if (pgpath) {
pgpath->is_active = 1;
INIT_WORK(&pgpath->deactivate_path, deactivate_path);
+ INIT_WORK(&pgpath->activate_path, activate_path);
}
return pgpath;
@@ -169,10 +169,6 @@ static void free_pgpaths(struct list_hea
if (m->hw_handler_name)
scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
dm_put_device(ti, pgpath->path.dev);
- spin_lock_irqsave(&m->lock, flags);
- if (m->pgpath_to_activate == pgpath)
- m->pgpath_to_activate = NULL;
- spin_unlock_irqrestore(&m->lock, flags);
free_pgpath(pgpath);
}
}
@@ -202,7 +198,6 @@ static struct multipath *alloc_multipath
m->queue_io = 1;
INIT_WORK(&m->process_queued_ios, process_queued_ios);
INIT_WORK(&m->trigger_event, trigger_event);
- INIT_WORK(&m->activate_path, activate_path);
m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
if (!m->mpio_pool) {
kfree(m);
@@ -427,8 +422,8 @@ static void process_queued_ios(struct wo
{
struct multipath *m =
container_of(work, struct multipath, process_queued_ios);
- struct pgpath *pgpath = NULL;
- unsigned init_required = 0, must_queue = 1;
+ struct pgpath *pgpath = NULL, *tmp;
+ unsigned must_queue = 1;
unsigned long flags;
spin_lock_irqsave(&m->lock, flags);
@@ -446,19 +441,15 @@ static void process_queued_ios(struct wo
must_queue = 0;
if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
- m->pgpath_to_activate = pgpath;
m->pg_init_count++;
m->pg_init_required = 0;
- m->pg_init_in_progress = 1;
- init_required = 1;
+ list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
+ queue_work(kmpath_handlerd, &tmp->activate_path);
+ m->pg_init_in_progress++;
+ }
}
-
out:
spin_unlock_irqrestore(&m->lock, flags);
-
- if (init_required)
- queue_work(kmpath_handlerd, &m->activate_path);
-
if (!must_queue)
dispatch_queued_ios(m);
}
@@ -924,9 +915,13 @@ static int reinstate_path(struct pgpath
pgpath->is_active = 1;
- m->current_pgpath = NULL;
- if (!m->nr_valid_paths++ && m->queue_size)
+ if (!m->nr_valid_paths++ && m->queue_size) {
+ m->current_pgpath = NULL;
queue_work(kmultipathd, &m->process_queued_ios);
+ } else if (m->hw_handler_name) {
+ m->pg_init_in_progress++;
+ queue_work(kmpath_handlerd, &pgpath->activate_path);
+ }
dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
pgpath->path.dev->name, m->nr_valid_paths);
@@ -1102,35 +1097,30 @@ static void pg_init_done(struct dm_path
spin_lock_irqsave(&m->lock, flags);
if (errors) {
- DMERR("Could not failover device. Error %d.", errors);
- m->current_pgpath = NULL;
- m->current_pg = NULL;
+ if (pgpath == m->current_pgpath) {
+ DMERR("Could not failover device. Error %d.", errors);
+ m->current_pgpath = NULL;
+ m->current_pg = NULL;
+ }
} else if (!m->pg_init_required) {
m->queue_io = 0;
pg->bypassed = 0;
}
- m->pg_init_in_progress = 0;
- queue_work(kmultipathd, &m->process_queued_ios);
+ m->pg_init_in_progress--;
+ if (!m->pg_init_in_progress)
+ queue_work(kmultipathd, &m->process_queued_ios);
spin_unlock_irqrestore(&m->lock, flags);
}
static void activate_path(struct work_struct *work)
{
int ret;
- struct multipath *m =
- container_of(work, struct multipath, activate_path);
- struct dm_path *path;
- unsigned long flags;
+ struct pgpath *pgpath =
+ container_of(work, struct pgpath, activate_path);
- spin_lock_irqsave(&m->lock, flags);
- path = &m->pgpath_to_activate->path;
- m->pgpath_to_activate = NULL;
- spin_unlock_irqrestore(&m->lock, flags);
- if (!path)
- return;
- ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
- pg_init_done(path, ret);
+ ret = scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev));
+ pg_init_done(&pgpath->path, ret);
}
/*
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH] Handle multipath paths in a path group properly during pg_init
2009-04-27 22:48 ` Chandra Seetharaman
@ 2009-04-28 6:30 ` Hannes Reinecke
2009-04-28 19:31 ` Chandra Seetharaman
1 sibling, 0 replies; 8+ messages in thread
From: Hannes Reinecke @ 2009-04-28 6:30 UTC (permalink / raw)
To: sekharan; +Cc: Mike Anderson, dm-devel, Alasdair G Kergon, Moger, Babu
Chandra Seetharaman wrote:
> Resending the patch after fixing the header, Porting and testing in
> 2.6.30-rc3, and a bug fix.
> ---------------
> There is a problem which was caused due to the architectural change made
> when we moved from dm hardware handler to SCSI hardware handler.
>
> In dm, handler was called to do a pg_init for a path group, and there
> was no state maintained in hardware handler code for each path.
>
> In SCSI dh, "state" is maintanined per path, as we wanted to fail I/O
> early on that path if it is not the active path.
>
> All of the hardware handlers, do have a state now, and they are set to
> active and (some form of) inactive. All of them have prep_fn(), which use
> this "state" to fail the I/O without it ever being sent to the device.
>
> So, in effect when dm-multipath calls scsi_dh_activate(), activate is
> sent to only one path and the "state" of that path is changed appropriately
> to "active" while other paths in the same path group are never changed
> as they never got an "activate".
>
> In order make sure all the paths in a path group gets their state set
> properly when a pg_init happens, we need to call scsi_dh_activate() on
> all paths in a path group.
>
> Doing this at the hardware handler layer is not a good option as we
> want the multipath layer to define the relationship between path and path
> groups and not the hardware handler.
>
> Attached patch sends an "activate" on each path in a path group when a
> path group is switched. It also sends an activate when a path is reinstated.
>
> Patch is applied and tested on 2.6.30-rc3.
>
> ----------
>
> Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
Acked-by: Hannes Reinecke <hare@suse.de>
--
Dr. Hannes Reinecke zSeries & Storage
hare@suse.de +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Markus Rex, HRB 16746 (AG Nürnberg)
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH] Handle multipath paths in a path group properly during pg_init
2009-04-27 22:48 ` Chandra Seetharaman
2009-04-28 6:30 ` Hannes Reinecke
@ 2009-04-28 19:31 ` Chandra Seetharaman
2009-06-06 0:04 ` Chandra Seetharaman
1 sibling, 1 reply; 8+ messages in thread
From: Chandra Seetharaman @ 2009-04-28 19:31 UTC (permalink / raw)
To: device-mapper development; +Cc: Mike Anderson, Alasdair G Kergon, Moger, Babu
Fixed a problem while reinstating passive paths.
----------
There is a problem which was caused due to the architectural change made
when we moved from dm hardware handler to SCSI hardware handler.
In dm, handler was called to do a pg_init for a path group, and there
was no state maintained in hardware handler code for each path.
In SCSI dh, "state" is maintanined per path, as we wanted to fail I/O
early on that path if it is not the active path.
All of the hardware handlers, do have a state now, and they are set to
active and (some form of) inactive. All of them have prep_fn(), which use
this "state" to fail the I/O without it ever being sent to the device.
So, in effect when dm-multipath calls scsi_dh_activate(), activate is
sent to only one path and the "state" of that path is changed appropriately
to "active" while other paths in the same path group are never changed
as they never got an "activate".
In order make sure all the paths in a path group gets their state set
properly when a pg_init happens, we need to call scsi_dh_activate() on
all paths in a path group.
Doing this at the hardware handler layer is not a good option as we
want the multipath layer to define the relationship between path and path
groups and not the hardware handler.
Attached patch sends an "activate" on each path in a path group when a
path group is switched. It also sends an activate when a path is reinstated.
Patch is applied and tested on 2.6.30-rc3.
----------
Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
---
Index: linux-2.6.30-rc3/drivers/md/dm-mpath.c
===================================================================
--- linux-2.6.30-rc3.orig/drivers/md/dm-mpath.c
+++ linux-2.6.30-rc3/drivers/md/dm-mpath.c
@@ -35,6 +35,7 @@ struct pgpath {
struct dm_path path;
struct work_struct deactivate_path;
+ struct work_struct activate_path;
};
#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
@@ -64,8 +65,6 @@ struct multipath {
spinlock_t lock;
const char *hw_handler_name;
- struct work_struct activate_path;
- struct pgpath *pgpath_to_activate;
unsigned nr_priority_groups;
struct list_head priority_groups;
unsigned pg_init_required; /* pg_init needs calling? */
@@ -128,6 +127,7 @@ static struct pgpath *alloc_pgpath(void)
if (pgpath) {
pgpath->is_active = 1;
INIT_WORK(&pgpath->deactivate_path, deactivate_path);
+ INIT_WORK(&pgpath->activate_path, activate_path);
}
return pgpath;
@@ -160,7 +160,6 @@ static struct priority_group *alloc_prio
static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
{
- unsigned long flags;
struct pgpath *pgpath, *tmp;
struct multipath *m = ti->private;
@@ -169,10 +168,6 @@ static void free_pgpaths(struct list_hea
if (m->hw_handler_name)
scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
dm_put_device(ti, pgpath->path.dev);
- spin_lock_irqsave(&m->lock, flags);
- if (m->pgpath_to_activate == pgpath)
- m->pgpath_to_activate = NULL;
- spin_unlock_irqrestore(&m->lock, flags);
free_pgpath(pgpath);
}
}
@@ -202,7 +197,6 @@ static struct multipath *alloc_multipath
m->queue_io = 1;
INIT_WORK(&m->process_queued_ios, process_queued_ios);
INIT_WORK(&m->trigger_event, trigger_event);
- INIT_WORK(&m->activate_path, activate_path);
m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
if (!m->mpio_pool) {
kfree(m);
@@ -427,8 +421,8 @@ static void process_queued_ios(struct wo
{
struct multipath *m =
container_of(work, struct multipath, process_queued_ios);
- struct pgpath *pgpath = NULL;
- unsigned init_required = 0, must_queue = 1;
+ struct pgpath *pgpath = NULL, *tmp;
+ unsigned must_queue = 1;
unsigned long flags;
spin_lock_irqsave(&m->lock, flags);
@@ -446,19 +440,15 @@ static void process_queued_ios(struct wo
must_queue = 0;
if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
- m->pgpath_to_activate = pgpath;
m->pg_init_count++;
m->pg_init_required = 0;
- m->pg_init_in_progress = 1;
- init_required = 1;
+ list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
+ queue_work(kmpath_handlerd, &tmp->activate_path);
+ m->pg_init_in_progress++;
+ }
}
-
out:
spin_unlock_irqrestore(&m->lock, flags);
-
- if (init_required)
- queue_work(kmpath_handlerd, &m->activate_path);
-
if (!must_queue)
dispatch_queued_ios(m);
}
@@ -924,9 +914,13 @@ static int reinstate_path(struct pgpath
pgpath->is_active = 1;
- m->current_pgpath = NULL;
- if (!m->nr_valid_paths++ && m->queue_size)
+ if (!m->nr_valid_paths++ && m->queue_size) {
+ m->current_pgpath = NULL;
queue_work(kmultipathd, &m->process_queued_ios);
+ } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
+ m->pg_init_in_progress++;
+ queue_work(kmpath_handlerd, &pgpath->activate_path);
+ }
dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
pgpath->path.dev->name, m->nr_valid_paths);
@@ -1102,35 +1096,30 @@ static void pg_init_done(struct dm_path
spin_lock_irqsave(&m->lock, flags);
if (errors) {
- DMERR("Could not failover device. Error %d.", errors);
- m->current_pgpath = NULL;
- m->current_pg = NULL;
+ if (pgpath == m->current_pgpath) {
+ DMERR("Could not failover device. Error %d.", errors);
+ m->current_pgpath = NULL;
+ m->current_pg = NULL;
+ }
} else if (!m->pg_init_required) {
m->queue_io = 0;
pg->bypassed = 0;
}
- m->pg_init_in_progress = 0;
- queue_work(kmultipathd, &m->process_queued_ios);
+ m->pg_init_in_progress--;
+ if (!m->pg_init_in_progress)
+ queue_work(kmultipathd, &m->process_queued_ios);
spin_unlock_irqrestore(&m->lock, flags);
}
static void activate_path(struct work_struct *work)
{
int ret;
- struct multipath *m =
- container_of(work, struct multipath, activate_path);
- struct dm_path *path;
- unsigned long flags;
+ struct pgpath *pgpath =
+ container_of(work, struct pgpath, activate_path);
- spin_lock_irqsave(&m->lock, flags);
- path = &m->pgpath_to_activate->path;
- m->pgpath_to_activate = NULL;
- spin_unlock_irqrestore(&m->lock, flags);
- if (!path)
- return;
- ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
- pg_init_done(path, ret);
+ ret = scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev));
+ pg_init_done(&pgpath->path, ret);
}
/*
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH] Handle multipath paths in a path group properly during pg_init
2009-04-28 19:31 ` Chandra Seetharaman
@ 2009-06-06 0:04 ` Chandra Seetharaman
2009-06-15 22:16 ` Moger, Babu
0 siblings, 1 reply; 8+ messages in thread
From: Chandra Seetharaman @ 2009-06-06 0:04 UTC (permalink / raw)
To: device-mapper development; +Cc: Alasdair G Kergon, Moger, Babu
Found a race condition where pg_init_in_progress being incremented when
queue_work() returns without queuing the work.
Changed the code to increment pg_init_in_progress only when queue_work
returns success.
Tested in 2.6.30-rc8
---------------------------------------------------
From: Chandra Seetharaman <sekharan@us.ibm.com>
Fixed a problem affecting reinstatement of passive paths and another
problem with io lockup during device offline/online.
Before we moved the hardware handler from dm to SCSI, it performed a pg_init
for a path group and didn't maintain any state about each path in hardware
handler code.
But in SCSI dh, such state is now maintained, as we want to fail I/O early on a
path if it is not the active path.
All the hardware handlers have a state now and set to active or some form of
inactive. They have prep_fn() which uses this state to fail the I/O without
it ever being sent to the device.
So in effect when dm-multipath calls scsi_dh_activate(), activate is
sent to only one path and the "state" of that path is changed appropriately
to "active" while other paths in the same path group are never changed
as they never got an "activate".
In order make sure all the paths in a path group gets their state set
properly when a pg_init happens, we need to call scsi_dh_activate() on
all paths in a path group.
Doing this at the hardware handler layer is not a good option as we
want the multipath layer to define the relationship between path and path
groups and not the hardware handler.
Attached patch sends an "activate" on each path in a path group when a
path group is switched. It also sends an activate when a path is reinstated.
Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
drivers/md/dm-mpath.c | 63 +++++++++++++++++++++-----------------------------
1 file changed, 26 insertions(+), 37 deletions(-)
Index: linux-2.6.29/drivers/md/dm-mpath.c
===================================================================
--- linux-2.6.29.orig/drivers/md/dm-mpath.c
+++ linux-2.6.29/drivers/md/dm-mpath.c
@@ -35,6 +35,7 @@ struct pgpath {
struct dm_path path;
struct work_struct deactivate_path;
+ struct work_struct activate_path;
};
#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
@@ -64,8 +65,6 @@ struct multipath {
spinlock_t lock;
const char *hw_handler_name;
- struct work_struct activate_path;
- struct pgpath *pgpath_to_activate;
unsigned nr_priority_groups;
struct list_head priority_groups;
unsigned pg_init_required; /* pg_init needs calling? */
@@ -128,6 +127,7 @@ static struct pgpath *alloc_pgpath(void)
if (pgpath) {
pgpath->is_active = 1;
INIT_WORK(&pgpath->deactivate_path, deactivate_path);
+ INIT_WORK(&pgpath->activate_path, activate_path);
}
return pgpath;
@@ -160,7 +160,6 @@ static struct priority_group *alloc_prio
static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
{
- unsigned long flags;
struct pgpath *pgpath, *tmp;
struct multipath *m = ti->private;
@@ -169,10 +168,6 @@ static void free_pgpaths(struct list_hea
if (m->hw_handler_name)
scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
dm_put_device(ti, pgpath->path.dev);
- spin_lock_irqsave(&m->lock, flags);
- if (m->pgpath_to_activate == pgpath)
- m->pgpath_to_activate = NULL;
- spin_unlock_irqrestore(&m->lock, flags);
free_pgpath(pgpath);
}
}
@@ -202,7 +197,6 @@ static struct multipath *alloc_multipath
m->queue_io = 1;
INIT_WORK(&m->process_queued_ios, process_queued_ios);
INIT_WORK(&m->trigger_event, trigger_event);
- INIT_WORK(&m->activate_path, activate_path);
m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
if (!m->mpio_pool) {
kfree(m);
@@ -427,8 +421,8 @@ static void process_queued_ios(struct wo
{
struct multipath *m =
container_of(work, struct multipath, process_queued_ios);
- struct pgpath *pgpath = NULL;
- unsigned init_required = 0, must_queue = 1;
+ struct pgpath *pgpath = NULL, *tmp;
+ unsigned must_queue = 1;
unsigned long flags;
spin_lock_irqsave(&m->lock, flags);
@@ -446,19 +440,15 @@ static void process_queued_ios(struct wo
must_queue = 0;
if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
- m->pgpath_to_activate = pgpath;
m->pg_init_count++;
m->pg_init_required = 0;
- m->pg_init_in_progress = 1;
- init_required = 1;
+ list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
+ if (queue_work(kmpath_handlerd, &tmp->activate_path))
+ m->pg_init_in_progress++;
+ }
}
-
out:
spin_unlock_irqrestore(&m->lock, flags);
-
- if (init_required)
- queue_work(kmpath_handlerd, &m->activate_path);
-
if (!must_queue)
dispatch_queued_ios(m);
}
@@ -924,9 +914,13 @@ static int reinstate_path(struct pgpath
pgpath->is_active = 1;
- m->current_pgpath = NULL;
- if (!m->nr_valid_paths++ && m->queue_size)
+ if (!m->nr_valid_paths++ && m->queue_size) {
+ m->current_pgpath = NULL;
queue_work(kmultipathd, &m->process_queued_ios);
+ } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
+ if (queue_work(kmpath_handlerd, &pgpath->activate_path))
+ m->pg_init_in_progress++;
+ }
dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
pgpath->path.dev->name, m->nr_valid_paths);
@@ -1102,35 +1096,30 @@ static void pg_init_done(struct dm_path
spin_lock_irqsave(&m->lock, flags);
if (errors) {
- DMERR("Could not failover device. Error %d.", errors);
- m->current_pgpath = NULL;
- m->current_pg = NULL;
+ if (pgpath == m->current_pgpath) {
+ DMERR("Could not failover device. Error %d.", errors);
+ m->current_pgpath = NULL;
+ m->current_pg = NULL;
+ }
} else if (!m->pg_init_required) {
m->queue_io = 0;
pg->bypassed = 0;
}
- m->pg_init_in_progress = 0;
- queue_work(kmultipathd, &m->process_queued_ios);
+ m->pg_init_in_progress--;
+ if (!m->pg_init_in_progress)
+ queue_work(kmultipathd, &m->process_queued_ios);
spin_unlock_irqrestore(&m->lock, flags);
}
static void activate_path(struct work_struct *work)
{
int ret;
- struct multipath *m =
- container_of(work, struct multipath, activate_path);
- struct dm_path *path;
- unsigned long flags;
+ struct pgpath *pgpath =
+ container_of(work, struct pgpath, activate_path);
- spin_lock_irqsave(&m->lock, flags);
- path = &m->pgpath_to_activate->path;
- m->pgpath_to_activate = NULL;
- spin_unlock_irqrestore(&m->lock, flags);
- if (!path)
- return;
- ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
- pg_init_done(path, ret);
+ ret = scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev));
+ pg_init_done(&pgpath->path, ret);
}
/*
^ permalink raw reply [flat|nested] 8+ messages in thread* RE: [PATCH] Handle multipath paths in a path group properly during pg_init
2009-06-06 0:04 ` Chandra Seetharaman
@ 2009-06-15 22:16 ` Moger, Babu
0 siblings, 0 replies; 8+ messages in thread
From: Moger, Babu @ 2009-06-15 22:16 UTC (permalink / raw)
To: sekharan@linux.vnet.ibm.com, device-mapper development; +Cc: Alasdair G Kergon
I have seen this race condition earlier on my system. Problem is resolved after testing with this patch. Thanks.
--Babu Moger
> -----Original Message-----
> From: Chandra Seetharaman [mailto:sekharan@us.ibm.com]
> Sent: Friday, June 05, 2009 7:05 PM
> To: device-mapper development
> Cc: Alasdair G Kergon; Moger, Babu
> Subject: Re: [dm-devel] [PATCH] Handle multipath paths in a path group
> properly during pg_init
>
> Found a race condition where pg_init_in_progress being incremented when
> queue_work() returns without queuing the work.
>
> Changed the code to increment pg_init_in_progress only when queue_work
> returns success.
>
> Tested in 2.6.30-rc8
>
> ---------------------------------------------------
> From: Chandra Seetharaman <sekharan@us.ibm.com>
>
> Fixed a problem affecting reinstatement of passive paths and another
> problem with io lockup during device offline/online.
>
> Before we moved the hardware handler from dm to SCSI, it performed a
> pg_init
> for a path group and didn't maintain any state about each path in hardware
> handler code.
>
> But in SCSI dh, such state is now maintained, as we want to fail I/O early
> on a
> path if it is not the active path.
>
> All the hardware handlers have a state now and set to active or some form
> of
> inactive. They have prep_fn() which uses this state to fail the I/O
> without
> it ever being sent to the device.
>
> So in effect when dm-multipath calls scsi_dh_activate(), activate is
> sent to only one path and the "state" of that path is changed
> appropriately
> to "active" while other paths in the same path group are never changed
> as they never got an "activate".
>
> In order make sure all the paths in a path group gets their state set
> properly when a pg_init happens, we need to call scsi_dh_activate() on
> all paths in a path group.
>
> Doing this at the hardware handler layer is not a good option as we
> want the multipath layer to define the relationship between path and path
> groups and not the hardware handler.
>
> Attached patch sends an "activate" on each path in a path group when a
> path group is switched. It also sends an activate when a path is
> reinstated.
>
> Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
>
> ---
> drivers/md/dm-mpath.c | 63 +++++++++++++++++++++-----------------------
> ------
> 1 file changed, 26 insertions(+), 37 deletions(-)
>
> Index: linux-2.6.29/drivers/md/dm-mpath.c
> ===================================================================
> --- linux-2.6.29.orig/drivers/md/dm-mpath.c
> +++ linux-2.6.29/drivers/md/dm-mpath.c
> @@ -35,6 +35,7 @@ struct pgpath {
>
> struct dm_path path;
> struct work_struct deactivate_path;
> + struct work_struct activate_path;
> };
>
> #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
> @@ -64,8 +65,6 @@ struct multipath {
> spinlock_t lock;
>
> const char *hw_handler_name;
> - struct work_struct activate_path;
> - struct pgpath *pgpath_to_activate;
> unsigned nr_priority_groups;
> struct list_head priority_groups;
> unsigned pg_init_required; /* pg_init needs calling? */
> @@ -128,6 +127,7 @@ static struct pgpath *alloc_pgpath(void)
> if (pgpath) {
> pgpath->is_active = 1;
> INIT_WORK(&pgpath->deactivate_path, deactivate_path);
> + INIT_WORK(&pgpath->activate_path, activate_path);
> }
>
> return pgpath;
> @@ -160,7 +160,6 @@ static struct priority_group *alloc_prio
>
> static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
> {
> - unsigned long flags;
> struct pgpath *pgpath, *tmp;
> struct multipath *m = ti->private;
>
> @@ -169,10 +168,6 @@ static void free_pgpaths(struct list_hea
> if (m->hw_handler_name)
> scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
> dm_put_device(ti, pgpath->path.dev);
> - spin_lock_irqsave(&m->lock, flags);
> - if (m->pgpath_to_activate == pgpath)
> - m->pgpath_to_activate = NULL;
> - spin_unlock_irqrestore(&m->lock, flags);
> free_pgpath(pgpath);
> }
> }
> @@ -202,7 +197,6 @@ static struct multipath *alloc_multipath
> m->queue_io = 1;
> INIT_WORK(&m->process_queued_ios, process_queued_ios);
> INIT_WORK(&m->trigger_event, trigger_event);
> - INIT_WORK(&m->activate_path, activate_path);
> m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
> if (!m->mpio_pool) {
> kfree(m);
> @@ -427,8 +421,8 @@ static void process_queued_ios(struct wo
> {
> struct multipath *m =
> container_of(work, struct multipath, process_queued_ios);
> - struct pgpath *pgpath = NULL;
> - unsigned init_required = 0, must_queue = 1;
> + struct pgpath *pgpath = NULL, *tmp;
> + unsigned must_queue = 1;
> unsigned long flags;
>
> spin_lock_irqsave(&m->lock, flags);
> @@ -446,19 +440,15 @@ static void process_queued_ios(struct wo
> must_queue = 0;
>
> if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
> - m->pgpath_to_activate = pgpath;
> m->pg_init_count++;
> m->pg_init_required = 0;
> - m->pg_init_in_progress = 1;
> - init_required = 1;
> + list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
> + if (queue_work(kmpath_handlerd, &tmp->activate_path))
> + m->pg_init_in_progress++;
> + }
> }
> -
> out:
> spin_unlock_irqrestore(&m->lock, flags);
> -
> - if (init_required)
> - queue_work(kmpath_handlerd, &m->activate_path);
> -
> if (!must_queue)
> dispatch_queued_ios(m);
> }
> @@ -924,9 +914,13 @@ static int reinstate_path(struct pgpath
>
> pgpath->is_active = 1;
>
> - m->current_pgpath = NULL;
> - if (!m->nr_valid_paths++ && m->queue_size)
> + if (!m->nr_valid_paths++ && m->queue_size) {
> + m->current_pgpath = NULL;
> queue_work(kmultipathd, &m->process_queued_ios);
> + } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
> + if (queue_work(kmpath_handlerd, &pgpath->activate_path))
> + m->pg_init_in_progress++;
> + }
>
> dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
> pgpath->path.dev->name, m->nr_valid_paths);
> @@ -1102,35 +1096,30 @@ static void pg_init_done(struct dm_path
>
> spin_lock_irqsave(&m->lock, flags);
> if (errors) {
> - DMERR("Could not failover device. Error %d.", errors);
> - m->current_pgpath = NULL;
> - m->current_pg = NULL;
> + if (pgpath == m->current_pgpath) {
> + DMERR("Could not failover device. Error %d.", errors);
> + m->current_pgpath = NULL;
> + m->current_pg = NULL;
> + }
> } else if (!m->pg_init_required) {
> m->queue_io = 0;
> pg->bypassed = 0;
> }
>
> - m->pg_init_in_progress = 0;
> - queue_work(kmultipathd, &m->process_queued_ios);
> + m->pg_init_in_progress--;
> + if (!m->pg_init_in_progress)
> + queue_work(kmultipathd, &m->process_queued_ios);
> spin_unlock_irqrestore(&m->lock, flags);
> }
>
> static void activate_path(struct work_struct *work)
> {
> int ret;
> - struct multipath *m =
> - container_of(work, struct multipath, activate_path);
> - struct dm_path *path;
> - unsigned long flags;
> + struct pgpath *pgpath =
> + container_of(work, struct pgpath, activate_path);
>
> - spin_lock_irqsave(&m->lock, flags);
> - path = &m->pgpath_to_activate->path;
> - m->pgpath_to_activate = NULL;
> - spin_unlock_irqrestore(&m->lock, flags);
> - if (!path)
> - return;
> - ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
> - pg_init_done(path, ret);
> + ret = scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev));
> + pg_init_done(&pgpath->path, ret);
> }
>
> /*
>
>
>
^ permalink raw reply [flat|nested] 8+ messages in thread