[PATCH 6/6] staging: hv: Gracefully handle SCSI resets

virtualization.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 6/6] staging: hv: Gracefully handle SCSI resets
@ 2010-08-03 17:31 Hank Janssen
  2010-08-03 17:47 ` Greg KH
  0 siblings, 1 reply; 3+ messages in thread
From: Hank Janssen @ 2010-08-03 17:31 UTC (permalink / raw)
  To: 'linux-kernel@vger.kernel.org',
	'devel@driverdev.osuosl.org',
	'virtualization@lists.osdl.org'
  Cc: Haiyang Zhang, 'gregkh@suse.de'

From: Hank Janssen <hjanssen@microsoft.com>

If we get a SCSI host bus reset we now gracefully handle it, and we take the device offline. 
This before sometimes caused hangs.

Signed-off-by:Hank Janssen <hjanssen@microsoft.com>
Signed-off-by:Haiyang Zhang <haiyangz@microsoft.com>


---
 drivers/staging/hv/storvsc.c |   36 +++++++++++++++++++++++++++++++++++-
 1 files changed, 35 insertions(+), 1 deletions(-)

diff --git a/drivers/staging/hv/storvsc.c b/drivers/staging/hv/storvsc.c index 6bd2ff1..5f222cf 100644
--- a/drivers/staging/hv/storvsc.c
+++ b/drivers/staging/hv/storvsc.c
@@ -48,7 +48,9 @@ struct storvsc_device {
 
 	/* 0 indicates the device is being destroyed */
 	atomic_t RefCount;
-
+	
+	int reset;
+	spinlock_t lock;
 	atomic_t NumOutstandingRequests;
 
 	/*
@@ -93,6 +95,9 @@ static inline struct storvsc_device *AllocStorDevice(struct hv_device *Device)
 	atomic_cmpxchg(&storDevice->RefCount, 0, 2);
 
 	storDevice->Device = Device;
+	storDevice->reset  = 0;
+	spin_lock_init(&storDevice->lock);
+
 	Device->Extension = storDevice;
 
 	return storDevice;
@@ -101,6 +106,7 @@ static inline struct storvsc_device *AllocStorDevice(struct hv_device *Device)  static inline void FreeStorDevice(struct storvsc_device *Device)  {
 	/* ASSERT(atomic_read(&Device->RefCount) == 0); */
+	/*kfree(Device->lock);*/
 	kfree(Device);
 }
 
@@ -108,13 +114,24 @@ static inline void FreeStorDevice(struct storvsc_device *Device)  static inline struct storvsc_device *GetStorDevice(struct hv_device *Device)  {
 	struct storvsc_device *storDevice;
+	unsigned long flags;
 
 	storDevice = (struct storvsc_device *)Device->Extension;
+
+	spin_lock_irqsave(&storDevice->lock, flags);
+
+	if (storDevice->reset == 1) {
+		spin_unlock_irqrestore(&storDevice->lock, flags);
+		return NULL;
+	}
+
 	if (storDevice && atomic_read(&storDevice->RefCount) > 1)
 		atomic_inc(&storDevice->RefCount);
 	else
 		storDevice = NULL;
 
+	spin_unlock_irqrestore(&storDevice->lock, flags);
+
 	return storDevice;
 }
 
@@ -122,13 +139,19 @@ static inline struct storvsc_device *GetStorDevice(struct hv_device *Device)  static inline struct storvsc_device *MustGetStorDevice(struct hv_device *Device)  {
 	struct storvsc_device *storDevice;
+	unsigned long flags;
 
 	storDevice = (struct storvsc_device *)Device->Extension;
+
+	spin_lock_irqsave(&storDevice->lock, flags);
+
 	if (storDevice && atomic_read(&storDevice->RefCount))
 		atomic_inc(&storDevice->RefCount);
 	else
 		storDevice = NULL;
 
+	spin_unlock_irqrestore(&storDevice->lock, flags);
+
 	return storDevice;
 }
 
@@ -614,6 +637,7 @@ int StorVscOnHostReset(struct hv_device *Device)
 	struct storvsc_device *storDevice;
 	struct storvsc_request_extension *request;
 	struct vstor_packet *vstorPacket;
+	unsigned long flags;
 	int ret;
 
 	DPRINT_INFO(STORVSC, "resetting host adapter..."); @@ -625,6 +649,16 @@ int StorVscOnHostReset(struct hv_device *Device)
 		return -1;
 	}
 
+	spin_lock_irqsave(&storDevice->lock, flags);
+	storDevice->reset = 1;
+	spin_unlock_irqrestore(&storDevice->lock, flags);
+
+	/*
+	 * Wait for traffic in transit to complete
+	 */
+	while (atomic_read(&storDevice->NumOutstandingRequests))
+		udelay(1000);
+
 	request = &storDevice->ResetRequest;
 	vstorPacket = &request->VStorPacket;
 
--
1.6.0.2

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 6/6] staging: hv: Gracefully handle SCSI resets
  2010-08-03 17:31 [PATCH 6/6] staging: hv: Gracefully handle SCSI resets Hank Janssen
@ 2010-08-03 17:47 ` Greg KH
  2010-08-04  4:28   ` Hank Janssen
  0 siblings, 1 reply; 3+ messages in thread
From: Greg KH @ 2010-08-03 17:47 UTC (permalink / raw)
  To: Hank Janssen
  Cc: 'linux-kernel@vger.kernel.org',
	'devel@driverdev.osuosl.org',
	'virtualization@lists.osdl.org', Haiyang Zhang

On Tue, Aug 03, 2010 at 05:31:56PM +0000, Hank Janssen wrote:
> From: Hank Janssen <hjanssen@microsoft.com>
> 
> If we get a SCSI host bus reset we now gracefully handle it, and we take the device offline. 
> This before sometimes caused hangs.

Is this a problem for all older versions as well?  If so, should it be
backported to the -stable kernel releases?

> 
> Signed-off-by:Hank Janssen <hjanssen@microsoft.com>
> Signed-off-by:Haiyang Zhang <haiyangz@microsoft.com>
> 
> 
> ---
>  drivers/staging/hv/storvsc.c |   36 +++++++++++++++++++++++++++++++++++-
>  1 files changed, 35 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/staging/hv/storvsc.c b/drivers/staging/hv/storvsc.c index 6bd2ff1..5f222cf 100644
> --- a/drivers/staging/hv/storvsc.c
> +++ b/drivers/staging/hv/storvsc.c
> @@ -48,7 +48,9 @@ struct storvsc_device {
>  
>  	/* 0 indicates the device is being destroyed */
>  	atomic_t RefCount;
> -
> +	

Trailing whitespace :(

> +	int reset;

Can't this be a bool?

> +	spinlock_t lock;
>  	atomic_t NumOutstandingRequests;
>  
>  	/*
> @@ -93,6 +95,9 @@ static inline struct storvsc_device *AllocStorDevice(struct hv_device *Device)
>  	atomic_cmpxchg(&storDevice->RefCount, 0, 2);
>  
>  	storDevice->Device = Device;
> +	storDevice->reset  = 0;
> +	spin_lock_init(&storDevice->lock);
> +
>  	Device->Extension = storDevice;
>  
>  	return storDevice;
> @@ -101,6 +106,7 @@ static inline struct storvsc_device *AllocStorDevice(struct hv_device *Device)  static inline void FreeStorDevice(struct storvsc_device *Device)  {
>  	/* ASSERT(atomic_read(&Device->RefCount) == 0); */
> +	/*kfree(Device->lock);*/

Why add a commented out line?  Especially one that is incorrect?  :)

>  	kfree(Device);
>  }
>  
> @@ -108,13 +114,24 @@ static inline void FreeStorDevice(struct storvsc_device *Device)  static inline struct storvsc_device *GetStorDevice(struct hv_device *Device)  {
>  	struct storvsc_device *storDevice;
> +	unsigned long flags;
>  
>  	storDevice = (struct storvsc_device *)Device->Extension;
> +
> +	spin_lock_irqsave(&storDevice->lock, flags);
> +
> +	if (storDevice->reset == 1) {
> +		spin_unlock_irqrestore(&storDevice->lock, flags);
> +		return NULL;

Don't return here, jump to the end of the function and return there.
That way you only have one lock/unlock pair and it's much easier to
maintain and audit over time that you got everything correct.

> +	}
> +
>  	if (storDevice && atomic_read(&storDevice->RefCount) > 1)
>  		atomic_inc(&storDevice->RefCount);
>  	else
>  		storDevice = NULL;
>  
> +	spin_unlock_irqrestore(&storDevice->lock, flags);
> +
>  	return storDevice;
>  }
>  
> @@ -122,13 +139,19 @@ static inline struct storvsc_device *GetStorDevice(struct hv_device *Device)  static inline struct storvsc_device *MustGetStorDevice(struct hv_device *Device)  {
>  	struct storvsc_device *storDevice;
> +	unsigned long flags;
>  
>  	storDevice = (struct storvsc_device *)Device->Extension;
> +
> +	spin_lock_irqsave(&storDevice->lock, flags);
> +
>  	if (storDevice && atomic_read(&storDevice->RefCount))
>  		atomic_inc(&storDevice->RefCount);
>  	else
>  		storDevice = NULL;
>  
> +	spin_unlock_irqrestore(&storDevice->lock, flags);
> +
>  	return storDevice;
>  }
>  
> @@ -614,6 +637,7 @@ int StorVscOnHostReset(struct hv_device *Device)
>  	struct storvsc_device *storDevice;
>  	struct storvsc_request_extension *request;
>  	struct vstor_packet *vstorPacket;
> +	unsigned long flags;
>  	int ret;
>  
>  	DPRINT_INFO(STORVSC, "resetting host adapter..."); @@ -625,6 +649,16 @@ int StorVscOnHostReset(struct hv_device *Device)
>  		return -1;
>  	}
>  
> +	spin_lock_irqsave(&storDevice->lock, flags);
> +	storDevice->reset = 1;
> +	spin_unlock_irqrestore(&storDevice->lock, flags);
> +
> +	/*
> +	 * Wait for traffic in transit to complete
> +	 */
> +	while (atomic_read(&storDevice->NumOutstandingRequests))
> +		udelay(1000);

What's ever going to get us out of this loop?  You need a fall-back in
case this read never succeeds.

And why an atomic value if you have a lock protecting it?  That's major
overkill and is probably not needed.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH 6/6] staging: hv: Gracefully handle SCSI resets
  2010-08-03 17:47 ` Greg KH
@ 2010-08-04  4:28   ` Hank Janssen
  0 siblings, 0 replies; 3+ messages in thread
From: Hank Janssen @ 2010-08-04  4:28 UTC (permalink / raw)
  To: Greg KH
  Cc: 'linux-kernel@vger.kernel.org',
	'devel@driverdev.osuosl.org',
	'virtualization@lists.osdl.org', Haiyang Zhang

And then Greg spoke;
>>On Tue, Aug 03, 2010 at 05:31:56PM +0000, Hank Janssen wrote:
>> From: Hank Janssen <hjanssen@microsoft.com>
>> 
>> If we get a SCSI host bus reset we now gracefully handle it, and we take the device offline. 
>> This before sometimes caused hangs.
>
>Is this a problem for all older versions as well?  If so, should it be backported to the -stable kernel releases?

Yes, this should be backported to all stable kernel releases that have the Hyper-V drivers
In them.  This fixes a VM freeze.

All patches I submitted in the batch of 6 (Except for the TODO patch) fix bugs. This one
Fixes the worst one.

Thanks,

Hank.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-08-04  4:28 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-08-03 17:31 [PATCH 6/6] staging: hv: Gracefully handle SCSI resets Hank Janssen
2010-08-03 17:47 ` Greg KH
2010-08-04  4:28   ` Hank Janssen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).