* [PATCH] fix to serialize unload and discovery
@ 2006-10-19 3:23 malahal
2006-10-19 20:58 ` Luben Tuikov
0 siblings, 1 reply; 4+ messages in thread
From: malahal @ 2006-10-19 3:23 UTC (permalink / raw)
To: linux-scsi
sas_discover_domain() and sas_deform_port() assume that they are single
threaded and never run in parallel. That is mostly true as we have a
single threaded work queue to handle discovery and other events. The
only race I find is unloading a module will call sas_deform_port()
without watching if there is a discovery event is in progress. This
patch will stop queuing further events, flush the queue, and then call
sas_deform_port() to avoid the race.
Signed-off-by: Malahal Naineni <malahal@us.ibm.com>
diff -r 8fe3b862f968 drivers/scsi/libsas/sas_discover.c
--- a/drivers/scsi/libsas/sas_discover.c Tue Oct 17 13:24:04 2006 -0700
+++ b/drivers/scsi/libsas/sas_discover.c Tue Oct 17 13:25:28 2006 -0700
@@ -722,7 +722,7 @@ int sas_discover_event(struct asd_sas_po
BUG_ON(ev >= DISC_NUM_EVENTS);
sas_queue_event(ev, &disc->disc_event_lock, &disc->pending,
- &disc->disc_work[ev], port->ha->core.shost);
+ &disc->disc_work[ev], port->ha);
return 0;
}
diff -r 8fe3b862f968 drivers/scsi/libsas/sas_event.c
--- a/drivers/scsi/libsas/sas_event.c Tue Oct 17 13:24:04 2006 -0700
+++ b/drivers/scsi/libsas/sas_event.c Tue Oct 17 13:25:28 2006 -0700
@@ -31,7 +31,7 @@ static void notify_ha_event(struct sas_h
BUG_ON(event >= HA_NUM_EVENTS);
sas_queue_event(event, &sas_ha->event_lock, &sas_ha->pending,
- &sas_ha->ha_events[event], sas_ha->core.shost);
+ &sas_ha->ha_events[event], sas_ha);
}
static void notify_port_event(struct asd_sas_phy *phy, enum port_event event)
@@ -41,7 +41,7 @@ static void notify_port_event(struct asd
BUG_ON(event >= PORT_NUM_EVENTS);
sas_queue_event(event, &ha->event_lock, &phy->port_events_pending,
- &phy->port_events[event], ha->core.shost);
+ &phy->port_events[event], ha);
}
static void notify_phy_event(struct asd_sas_phy *phy, enum phy_event event)
@@ -51,7 +51,7 @@ static void notify_phy_event(struct asd_
BUG_ON(event >= PHY_NUM_EVENTS);
sas_queue_event(event, &ha->event_lock, &phy->phy_events_pending,
- &phy->phy_events[event], ha->core.shost);
+ &phy->phy_events[event], ha);
}
int sas_init_events(struct sas_ha_struct *sas_ha)
diff -r 8fe3b862f968 drivers/scsi/libsas/sas_init.c
--- a/drivers/scsi/libsas/sas_init.c Tue Oct 17 13:24:04 2006 -0700
+++ b/drivers/scsi/libsas/sas_init.c Tue Oct 17 13:29:02 2006 -0700
@@ -85,6 +85,9 @@ int sas_register_ha(struct sas_ha_struct
else if (sas_ha->lldd_queue_size == -1)
sas_ha->lldd_queue_size = 128; /* Sanity */
+ sas_ha->state = SAS_HA_REGISTERED;
+ spin_lock_init(&sas_ha->state_lock);
+
error = sas_register_phys(sas_ha);
if (error) {
printk(KERN_NOTICE "couldn't register sas phys:%d\n", error);
@@ -123,9 +126,18 @@ Undo_phys:
int sas_unregister_ha(struct sas_ha_struct *sas_ha)
{
+ unsigned long flags;
+
if (sas_ha->lldd_max_execute_num > 1) {
sas_shutdown_queue(sas_ha);
}
+
+ /* Set the state to unregistered to avoid further
+ * events to be queued */
+ spin_lock_irqsave(&sas_ha->state_lock, flags);
+ sas_ha->state = SAS_HA_UNREGISTERED;
+ spin_unlock_irqrestore(&sas_ha->state_lock, flags);
+ scsi_flush_work(sas_ha->core.shost);
sas_unregister_ports(sas_ha);
diff -r 8fe3b862f968 drivers/scsi/libsas/sas_internal.h
--- a/drivers/scsi/libsas/sas_internal.h Tue Oct 17 13:24:04 2006 -0700
+++ b/drivers/scsi/libsas/sas_internal.h Tue Oct 17 13:25:28 2006 -0700
@@ -80,7 +80,7 @@ static inline void sas_queue_event(int e
static inline void sas_queue_event(int event, spinlock_t *lock,
unsigned long *pending,
struct work_struct *work,
- struct Scsi_Host *shost)
+ struct sas_ha_struct *sas_ha)
{
unsigned long flags;
@@ -91,7 +91,12 @@ static inline void sas_queue_event(int e
}
__set_bit(event, pending);
spin_unlock_irqrestore(lock, flags);
- scsi_queue_work(shost, work);
+
+ spin_lock_irqsave(&sas_ha->state_lock, flags);
+ if (sas_ha->state != SAS_HA_UNREGISTERED) {
+ scsi_queue_work(sas_ha->core.shost, work);
+ }
+ spin_unlock_irqrestore(&sas_ha->state_lock, flags);
}
static inline void sas_begin_event(int event, spinlock_t *lock,
diff -r 8fe3b862f968 include/scsi/libsas.h
--- a/include/scsi/libsas.h Tue Oct 17 13:24:04 2006 -0700
+++ b/include/scsi/libsas.h Tue Oct 17 13:25:28 2006 -0700
@@ -307,11 +307,19 @@ struct scsi_core {
int queue_thread_kill;
};
+enum sas_ha_state {
+ SAS_HA_REGISTERED,
+ SAS_HA_UNREGISTERED
+};
+
struct sas_ha_struct {
/* private: */
spinlock_t event_lock;
struct work_struct ha_events[HA_NUM_EVENTS];
unsigned long pending;
+
+ enum sas_ha_state state;
+ spinlock_t state_lock;
struct scsi_core core;
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] fix to serialize unload and discovery
2006-10-19 3:23 [PATCH] fix to serialize unload and discovery malahal
@ 2006-10-19 20:58 ` Luben Tuikov
2006-10-19 21:20 ` malahal
0 siblings, 1 reply; 4+ messages in thread
From: Luben Tuikov @ 2006-10-19 20:58 UTC (permalink / raw)
To: malahal, linux-scsi
--- malahal@us.ibm.com wrote:
> sas_discover_domain() and sas_deform_port() assume that they are single
> threaded and never run in parallel.
This is indeed not true.
> That is mostly true as we have a
> single threaded work queue to handle discovery and other events.
Hint: when the port is gone, it is gone...
> The
> only race I find is unloading a module will call sas_deform_port()
> without watching if there is a discovery event is in progress. This
> patch will stop queuing further events, flush the queue, and then call
> sas_deform_port() to avoid the race.
You need a much more elaborate (read: general solution, aka algorithm)
framework to solve this.
Needless to say, I don't observe these bugs and errors in my version
of the SAS Stack.
Luben
>
> Signed-off-by: Malahal Naineni <malahal@us.ibm.com>
>
>
> diff -r 8fe3b862f968 drivers/scsi/libsas/sas_discover.c
> --- a/drivers/scsi/libsas/sas_discover.c Tue Oct 17 13:24:04 2006 -0700
> +++ b/drivers/scsi/libsas/sas_discover.c Tue Oct 17 13:25:28 2006 -0700
> @@ -722,7 +722,7 @@ int sas_discover_event(struct asd_sas_po
> BUG_ON(ev >= DISC_NUM_EVENTS);
>
> sas_queue_event(ev, &disc->disc_event_lock, &disc->pending,
> - &disc->disc_work[ev], port->ha->core.shost);
> + &disc->disc_work[ev], port->ha);
>
> return 0;
> }
> diff -r 8fe3b862f968 drivers/scsi/libsas/sas_event.c
> --- a/drivers/scsi/libsas/sas_event.c Tue Oct 17 13:24:04 2006 -0700
> +++ b/drivers/scsi/libsas/sas_event.c Tue Oct 17 13:25:28 2006 -0700
> @@ -31,7 +31,7 @@ static void notify_ha_event(struct sas_h
> BUG_ON(event >= HA_NUM_EVENTS);
>
> sas_queue_event(event, &sas_ha->event_lock, &sas_ha->pending,
> - &sas_ha->ha_events[event], sas_ha->core.shost);
> + &sas_ha->ha_events[event], sas_ha);
> }
>
> static void notify_port_event(struct asd_sas_phy *phy, enum port_event event)
> @@ -41,7 +41,7 @@ static void notify_port_event(struct asd
> BUG_ON(event >= PORT_NUM_EVENTS);
>
> sas_queue_event(event, &ha->event_lock, &phy->port_events_pending,
> - &phy->port_events[event], ha->core.shost);
> + &phy->port_events[event], ha);
> }
>
> static void notify_phy_event(struct asd_sas_phy *phy, enum phy_event event)
> @@ -51,7 +51,7 @@ static void notify_phy_event(struct asd_
> BUG_ON(event >= PHY_NUM_EVENTS);
>
> sas_queue_event(event, &ha->event_lock, &phy->phy_events_pending,
> - &phy->phy_events[event], ha->core.shost);
> + &phy->phy_events[event], ha);
> }
>
> int sas_init_events(struct sas_ha_struct *sas_ha)
> diff -r 8fe3b862f968 drivers/scsi/libsas/sas_init.c
> --- a/drivers/scsi/libsas/sas_init.c Tue Oct 17 13:24:04 2006 -0700
> +++ b/drivers/scsi/libsas/sas_init.c Tue Oct 17 13:29:02 2006 -0700
> @@ -85,6 +85,9 @@ int sas_register_ha(struct sas_ha_struct
> else if (sas_ha->lldd_queue_size == -1)
> sas_ha->lldd_queue_size = 128; /* Sanity */
>
> + sas_ha->state = SAS_HA_REGISTERED;
> + spin_lock_init(&sas_ha->state_lock);
> +
> error = sas_register_phys(sas_ha);
> if (error) {
> printk(KERN_NOTICE "couldn't register sas phys:%d\n", error);
> @@ -123,9 +126,18 @@ Undo_phys:
>
> int sas_unregister_ha(struct sas_ha_struct *sas_ha)
> {
> + unsigned long flags;
> +
> if (sas_ha->lldd_max_execute_num > 1) {
> sas_shutdown_queue(sas_ha);
> }
> +
> + /* Set the state to unregistered to avoid further
> + * events to be queued */
> + spin_lock_irqsave(&sas_ha->state_lock, flags);
> + sas_ha->state = SAS_HA_UNREGISTERED;
> + spin_unlock_irqrestore(&sas_ha->state_lock, flags);
> + scsi_flush_work(sas_ha->core.shost);
>
> sas_unregister_ports(sas_ha);
>
> diff -r 8fe3b862f968 drivers/scsi/libsas/sas_internal.h
> --- a/drivers/scsi/libsas/sas_internal.h Tue Oct 17 13:24:04 2006 -0700
> +++ b/drivers/scsi/libsas/sas_internal.h Tue Oct 17 13:25:28 2006 -0700
> @@ -80,7 +80,7 @@ static inline void sas_queue_event(int e
> static inline void sas_queue_event(int event, spinlock_t *lock,
> unsigned long *pending,
> struct work_struct *work,
> - struct Scsi_Host *shost)
> + struct sas_ha_struct *sas_ha)
> {
> unsigned long flags;
>
> @@ -91,7 +91,12 @@ static inline void sas_queue_event(int e
> }
> __set_bit(event, pending);
> spin_unlock_irqrestore(lock, flags);
> - scsi_queue_work(shost, work);
> +
> + spin_lock_irqsave(&sas_ha->state_lock, flags);
> + if (sas_ha->state != SAS_HA_UNREGISTERED) {
> + scsi_queue_work(sas_ha->core.shost, work);
> + }
> + spin_unlock_irqrestore(&sas_ha->state_lock, flags);
> }
>
> static inline void sas_begin_event(int event, spinlock_t *lock,
> diff -r 8fe3b862f968 include/scsi/libsas.h
> --- a/include/scsi/libsas.h Tue Oct 17 13:24:04 2006 -0700
> +++ b/include/scsi/libsas.h Tue Oct 17 13:25:28 2006 -0700
> @@ -307,11 +307,19 @@ struct scsi_core {
> int queue_thread_kill;
> };
>
> +enum sas_ha_state {
> + SAS_HA_REGISTERED,
> + SAS_HA_UNREGISTERED
> +};
> +
> struct sas_ha_struct {
> /* private: */
> spinlock_t event_lock;
> struct work_struct ha_events[HA_NUM_EVENTS];
> unsigned long pending;
> +
> + enum sas_ha_state state;
> + spinlock_t state_lock;
>
> struct scsi_core core;
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] fix to serialize unload and discovery
2006-10-19 20:58 ` Luben Tuikov
@ 2006-10-19 21:20 ` malahal
2006-10-19 22:41 ` Luben Tuikov
0 siblings, 1 reply; 4+ messages in thread
From: malahal @ 2006-10-19 21:20 UTC (permalink / raw)
To: Luben Tuikov; +Cc: linux-scsi
Lubin, I am not sure if you are referring to your version of the driver
or the mainline version! If I read the mainline version correctly,
sas_deform_port() actually removes a phy from a port and also removes
all attached devices if it is the last phy in the port.
When a phy/port is gone, an event is posted to the work queue to
eventually call sas_deform_port(). Let me know if I misunderstood the
mainline version.
Thanks, Malahal.
Luben Tuikov [ltuikov@yahoo.com] wrote:
> --- malahal@us.ibm.com wrote:
> > sas_discover_domain() and sas_deform_port() assume that they are single
> > threaded and never run in parallel.
>
> This is indeed not true.
>
> > That is mostly true as we have a
> > single threaded work queue to handle discovery and other events.
>
> Hint: when the port is gone, it is gone...
>
> > The
> > only race I find is unloading a module will call sas_deform_port()
> > without watching if there is a discovery event is in progress. This
> > patch will stop queuing further events, flush the queue, and then call
> > sas_deform_port() to avoid the race.
>
> You need a much more elaborate (read: general solution, aka algorithm)
> framework to solve this.
>
> Needless to say, I don't observe these bugs and errors in my version
> of the SAS Stack.
>
> Luben
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] fix to serialize unload and discovery
2006-10-19 21:20 ` malahal
@ 2006-10-19 22:41 ` Luben Tuikov
0 siblings, 0 replies; 4+ messages in thread
From: Luben Tuikov @ 2006-10-19 22:41 UTC (permalink / raw)
To: malahal; +Cc: linux-scsi
--- malahal@us.ibm.com wrote:
> Lubin, I am not sure if you are referring to your version of the driver
> or the mainline version!
Malahal,
I'm not referring to the bottomleyised version of my code.
> If I read the mainline version correctly,
> sas_deform_port() actually removes a phy from a port and also removes
> all attached devices if it is the last phy in the port.
>
> When a phy/port is gone, an event is posted to the work queue to
> eventually call sas_deform_port(). Let me know if I misunderstood the
> mainline version.
I'm not sure if you misunderstood the mainline version of my code --
you have to ask bottomley for that, since this code has been appropriated
by bottomley who has also done some "architectural" changes.
Good luck!
Luben
>
> Thanks, Malahal.
>
> Luben Tuikov [ltuikov@yahoo.com] wrote:
> > --- malahal@us.ibm.com wrote:
> > > sas_discover_domain() and sas_deform_port() assume that they are single
> > > threaded and never run in parallel.
> >
> > This is indeed not true.
> >
> > > That is mostly true as we have a
> > > single threaded work queue to handle discovery and other events.
> >
> > Hint: when the port is gone, it is gone...
> >
> > > The
> > > only race I find is unloading a module will call sas_deform_port()
> > > without watching if there is a discovery event is in progress. This
> > > patch will stop queuing further events, flush the queue, and then call
> > > sas_deform_port() to avoid the race.
> >
> > You need a much more elaborate (read: general solution, aka algorithm)
> > framework to solve this.
> >
> > Needless to say, I don't observe these bugs and errors in my version
> > of the SAS Stack.
> >
> > Luben
> >
> -
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2006-10-19 22:41 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-10-19 3:23 [PATCH] fix to serialize unload and discovery malahal
2006-10-19 20:58 ` Luben Tuikov
2006-10-19 21:20 ` malahal
2006-10-19 22:41 ` Luben Tuikov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox