linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 5/7] lpfc 8.3.13: BSG management fixes
@ 2010-05-12 13:32 James Smart
  2010-05-12 17:38 ` Mike Christie
  2010-05-14  9:29 ` FUJITA Tomonori
  0 siblings, 2 replies; 9+ messages in thread
From: James Smart @ 2010-05-12 13:32 UTC (permalink / raw)
  To: linux-scsi


- Add reference counting to prevent module removal when
  there are outstanding BSG requests.
- Clean up some duplicate code.

 Signed-off-by: Alex Iannicelli <alex.iannicelli@emulex.com>
 Signed-off-by: James Smart <james.smart@emulex.com>

 ---

 lpfc_bsg.c |  103 +++++++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 91 insertions(+), 12 deletions(-)


diff -upNr a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
--- a/drivers/scsi/lpfc/lpfc_bsg.c	2010-05-05 08:52:38.000000000 -0400
+++ b/drivers/scsi/lpfc/lpfc_bsg.c	2010-05-12 09:04:43.000000000 -0400
@@ -141,6 +141,47 @@ struct lpfc_dmabufext {
 };
 
 /**
+ * lpfc_bsg_get_kboject - gets a reference on the drivers module kobject
+ * @phba: Pointer to HBA context object
+ *
+ * This function bumps the reference on the drivers module kobject preventing
+ * the driver from being removed. Prevents the sysfs tree from being
+ * corrupted because the driver was removed while bsg jobs are unfinished,
+ * this would be the case for registered events or any command sent to hardware
+ * and the driver needs to wait for the command to complete.
+ **/
+static void
+lpfc_bsg_get_kboject(struct lpfc_hba *phba)
+{
+	struct lpfc_vport *vport = phba->pport;
+	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+	unsigned long flags;
+
+	spin_lock_irqsave(shost->host_lock, flags);
+	__module_get(shost->dma_dev->driver->owner);
+	spin_unlock_irqrestore(shost->host_lock, flags);
+}
+
+/**
+ * lpfc_bsg_put_kboject - puts back a reference the drivers module kobject
+ * @phba: Pointer to HBA context object
+ *
+ * This function decrements the reference on the drivers module kobject
+ * allowing the module to be removed if no other references are outstanding.
+ **/
+static void
+lpfc_bsg_put_kboject(struct lpfc_hba *phba)
+{
+	struct lpfc_vport *vport = phba->pport;
+	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+	unsigned long flags;
+
+	spin_lock_irqsave(shost->host_lock, flags);
+	module_put(shost->dma_dev->driver->owner);
+	spin_unlock_irqrestore(shost->host_lock, flags);
+}
+
+/**
  * lpfc_bsg_send_mgmt_cmd_cmp - lpfc_bsg_send_mgmt_cmd's completion handler
  * @phba: Pointer to HBA context object.
  * @cmdiocbq: Pointer to command iocb.
@@ -230,6 +271,7 @@ lpfc_bsg_send_mgmt_cmd_cmp(struct lpfc_h
 	/* complete the job back to userspace */
 	job->job_done(job);
 	spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
+	lpfc_bsg_put_kboject(phba);
 	return;
 }
 
@@ -400,6 +442,7 @@ no_dd_data:
 	/* make error code available to userspace */
 	job->reply->result = rc;
 	job->dd_data = NULL;
+	lpfc_bsg_put_kboject(phba);
 	return rc;
 }
 
@@ -490,6 +533,7 @@ lpfc_bsg_rport_els_cmp(struct lpfc_hba *
 	/* complete the job back to userspace */
 	job->job_done(job);
 	spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
+	lpfc_bsg_put_kboject(phba);
 	return;
 }
 
@@ -645,6 +689,7 @@ no_dd_data:
 	/* make error code available to userspace */
 	job->reply->result = rc;
 	job->dd_data = NULL;
+	lpfc_bsg_put_kboject(phba);
 	return rc;
 }
 
@@ -981,6 +1026,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba 
 			/* complete the job back to userspace */
 			spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
 			job->job_done(job);
+			lpfc_bsg_put_kboject(phba);
 			spin_lock_irqsave(&phba->ct_ev_lock, flags);
 		}
 	}
@@ -997,6 +1043,11 @@ error_ct_unsol_exit:
 /**
  * lpfc_bsg_hba_set_event - process a SET_EVENT bsg vendor command
  * @job: SET_EVENT fc_bsg_job
+ *
+ * On error decrement the kobject count, otherwise we are holding
+ * the job in the event structure to complete later.
+ * The unsolicited event handler will decrement the count when it finds
+ * this job on the ct_ev_waiters queue.
  **/
 static int
 lpfc_bsg_hba_set_event(struct fc_bsg_job *job)
@@ -1074,12 +1125,15 @@ job_error:
 		kfree(dd_data);
 
 	job->dd_data = NULL;
+	lpfc_bsg_put_kboject(phba);
 	return rc;
 }
 
 /**
  * lpfc_bsg_hba_get_event - process a GET_EVENT bsg vendor command
  * @job: GET_EVENT fc_bsg_job
+ *
+ * This job is finished with or without an error so decrement the kobject.
  **/
 static int
 lpfc_bsg_hba_get_event(struct fc_bsg_job *job)
@@ -1160,11 +1214,13 @@ lpfc_bsg_hba_get_event(struct fc_bsg_job
 	job->dd_data = NULL;
 	job->reply->result = 0;
 	job->job_done(job);
+	lpfc_bsg_put_kboject(phba);
 	return 0;
 
 job_error:
 	job->dd_data = NULL;
 	job->reply->result = rc;
+	lpfc_bsg_put_kboject(phba);
 	return rc;
 }
 
@@ -1244,6 +1300,7 @@ lpfc_issue_ct_rsp_cmp(struct lpfc_hba *p
 	/* complete the job back to userspace */
 	job->job_done(job);
 	spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
+	lpfc_bsg_put_kboject(phba);
 	return;
 }
 
@@ -1365,6 +1422,9 @@ no_dd_data:
 /**
  * lpfc_bsg_send_mgmt_rsp - process a SEND_MGMT_RESP bsg vendor command
  * @job: SEND_MGMT_RESP fc_bsg_job
+ *
+ * This job is completed with or without an error so decrement the kobject
+ * count.
  **/
 static int
 lpfc_bsg_send_mgmt_rsp(struct fc_bsg_job *job)
@@ -1435,6 +1495,7 @@ send_mgmt_rsp_exit:
 	/* make error code available to userspace */
 	job->reply->result = rc;
 	job->dd_data = NULL;
+	lpfc_bsg_put_kboject(phba);
 	return rc;
 }
 
@@ -1594,6 +1655,7 @@ job_error:
 	/* complete the job back to userspace if no error */
 	if (rc == 0)
 		job->job_done(job);
+	lpfc_bsg_put_kboject(phba);
 	return rc;
 }
 
@@ -2328,6 +2390,7 @@ loopback_test_exit:
 	/* complete the job back to userspace if no error */
 	if (rc == 0)
 		job->job_done(job);
+	lpfc_bsg_put_kboject(phba);
 	return rc;
 }
 
@@ -2374,6 +2437,7 @@ job_error:
 	job->reply->result = rc;
 	if (rc == 0)
 		job->job_done(job);
+	lpfc_bsg_put_kboject(phba);
 	return rc;
 }
 
@@ -2477,6 +2541,7 @@ lpfc_bsg_wake_mbox_wait(struct lpfc_hba 
 		kfree(dd_data->context_un.mbox.rxbmp);
 	}
 	kfree(dd_data);
+	lpfc_bsg_put_kboject(phba);
 	return;
 }
 
@@ -2688,15 +2753,6 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phb
 
 		pmboxq->context2 = ext;
 		pmboxq->in_ext_byte_len =
-			mbox_req->inExtWLen *
-			sizeof(uint32_t);
-		pmboxq->out_ext_byte_len =
-			mbox_req->outExtWLen *
-			sizeof(uint32_t);
-		pmboxq->mbox_offset_word =
-			mbox_req->mbOffset;
-		pmboxq->context2 = ext;
-		pmboxq->in_ext_byte_len =
 			mbox_req->inExtWLen * sizeof(uint32_t);
 		pmboxq->out_ext_byte_len =
 			mbox_req->outExtWLen * sizeof(uint32_t);
@@ -2945,7 +3001,7 @@ job_done:
 		kfree(rxbmp);
 	}
 	kfree(dd_data);
-
+	lpfc_bsg_put_kboject(phba);
 	return rc;
 }
 
@@ -2994,13 +3050,15 @@ job_error:
 		job->reply->result = 0;
 		job->dd_data = NULL;
 		job->job_done(job);
+		lpfc_bsg_put_kboject(phba);
 	} else if (rc == 1)
-		/* job submitted, will complete later*/
+		/* will complete the job later so hold onto the kobject */
 		rc = 0; /* return zero, no error */
 	else {
 		/* some error occurred */
 		job->reply->result = rc;
 		job->dd_data = NULL;
+		lpfc_bsg_put_kboject(phba);
 	}
 
 	return rc;
@@ -3100,6 +3158,7 @@ lpfc_bsg_menlo_cmd_cmp(struct lpfc_hba *
 	/* complete the job back to userspace */
 	job->job_done(job);
 	spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
+	lpfc_bsg_put_kboject(phba);
 	return;
 }
 
@@ -3295,6 +3354,7 @@ no_dd_data:
 	/* make error code available to userspace */
 	job->reply->result = rc;
 	job->dd_data = NULL;
+	lpfc_bsg_put_kboject(phba);
 	return rc;
 }
 /**
@@ -3304,6 +3364,8 @@ no_dd_data:
 static int
 lpfc_bsg_hst_vendor(struct fc_bsg_job *job)
 {
+	struct lpfc_vport *vport = (struct lpfc_vport *)job->shost->hostdata;
+	struct lpfc_hba *phba = vport->phba;
 	int command = job->request->rqst_data.h_vendor.vendor_cmd[0];
 	int rc;
 
@@ -3338,6 +3400,7 @@ lpfc_bsg_hst_vendor(struct fc_bsg_job *j
 		job->reply->reply_payload_rcv_len = 0;
 		/* make error code available to userspace */
 		job->reply->result = rc;
+		lpfc_bsg_put_kboject(phba);
 		break;
 	}
 
@@ -3347,12 +3410,21 @@ lpfc_bsg_hst_vendor(struct fc_bsg_job *j
 /**
  * lpfc_bsg_request - handle a bsg request from the FC transport
  * @job: fc_bsg_job to handle
+ *
+ * We increment the scsi host drivers kobject until we are completely
+ * finished with the job with or without error. By doing so the module
+ * cannot be removed preventing the scsi transport sysfs entries from
+ * becoming corrupt. This mimics the older ioctl device file.
  **/
 int
 lpfc_bsg_request(struct fc_bsg_job *job)
 {
 	uint32_t msgcode;
 	int rc;
+	struct lpfc_vport *vport = (struct lpfc_vport *)job->shost->hostdata;
+	struct lpfc_hba *phba = vport->phba;
+
+	lpfc_bsg_get_kboject(phba);
 
 	msgcode = job->request->msgcode;
 	switch (msgcode) {
@@ -3370,6 +3442,7 @@ lpfc_bsg_request(struct fc_bsg_job *job)
 		job->reply->reply_payload_rcv_len = 0;
 		/* make error code available to userspace */
 		job->reply->result = rc;
+		lpfc_bsg_put_kboject(phba);
 		break;
 	}
 
@@ -3429,6 +3502,7 @@ lpfc_bsg_timeout(struct fc_bsg_job *job)
 		job->reply->result = -EAGAIN;
 		spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
 		job->job_done(job);
+		lpfc_bsg_put_kboject(phba);
 		break;
 	case TYPE_MBOX:
 		mbox = &dd_data->context_un.mbox;
@@ -3437,9 +3511,14 @@ lpfc_bsg_timeout(struct fc_bsg_job *job)
 		job->dd_data = NULL;
 		job->reply->reply_payload_rcv_len = 0;
 		job->reply->result = -EAGAIN;
-		/* the mbox completion handler can now be run */
+		/* the mbox completion handler can now run but it wont
+		 * find a job so it will just cleanup, we have a ref count
+		 * on the module while the command is outstanding.
+		 */
 		spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
 		job->job_done(job);
+		/* decrement count as we are done with this job */
+		lpfc_bsg_put_kboject(phba);
 		break;
 	case TYPE_MENLO:
 		menlo = &dd_data->context_un.menlo;



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 5/7] lpfc 8.3.13: BSG management fixes
  2010-05-12 13:32 [PATCH 5/7] lpfc 8.3.13: BSG management fixes James Smart
@ 2010-05-12 17:38 ` Mike Christie
  2010-05-12 19:00   ` James Smart
  2010-05-14  9:29 ` FUJITA Tomonori
  1 sibling, 1 reply; 9+ messages in thread
From: Mike Christie @ 2010-05-12 17:38 UTC (permalink / raw)
  To: james.smart; +Cc: linux-scsi

On 05/12/2010 08:32 AM, James Smart wrote:
> + * lpfc_bsg_get_kboject - gets a reference on the drivers module kobject


I think you wanted to name this lpfc_bsg_get_kobject and name the put 
function lpfc_bsg_put_kobject?



> + * @phba: Pointer to HBA context object
> + *
> + * This function bumps the reference on the drivers module kobject preventing
> + * the driver from being removed. Prevents the sysfs tree from being
> + * corrupted because the driver was removed while bsg jobs are unfinished,
> + * this would be the case for registered events or any command sent to hardware
> + * and the driver needs to wait for the command to complete.
> + **/
> +static void
> +lpfc_bsg_get_kboject(struct lpfc_hba *phba)
> +{
> +	struct lpfc_vport *vport = phba->pport;
> +	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(shost->host_lock, flags);
> +	__module_get(shost->dma_dev->driver->owner);
> +	spin_unlock_irqrestore(shost->host_lock, flags);
> +}

> +

What is the host_lock use for?

With this patch is is still possible for the module to unload right 
before lpfc_bsg_request is called? Should this use a try_module_get and 
if that fails fail the request?

Why not just add the module get() and put() calls to the fc class (would 
it just push the problem up?)?

Or what about in bsg.c, have bsg_open do a module_get? Could we add a 
module pointer to the bsg's devices request_queue and have bsg do the 
get when the device is opened?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 5/7] lpfc 8.3.13: BSG management fixes
  2010-05-12 17:38 ` Mike Christie
@ 2010-05-12 19:00   ` James Smart
  0 siblings, 0 replies; 9+ messages in thread
From: James Smart @ 2010-05-12 19:00 UTC (permalink / raw)
  To: Mike Christie; +Cc: linux-scsi@vger.kernel.org



Mike Christie wrote:
> What is the host_lock use for?
> 
> With this patch is is still possible for the module to unload right 
> before lpfc_bsg_request is called? Should this use a try_module_get and 
> if that fails fail the request?
> 
> Why not just add the module get() and put() calls to the fc class (would 
> it just push the problem up?)?
> 
> Or what about in bsg.c, have bsg_open do a module_get? Could we add a 
> module pointer to the bsg's devices request_queue and have bsg do the 
> get when the device is opened?
> 

The correct thing to do would be to have bsg_open do a class-object get and 
bsg_close a class-object put - whatever object the bsg queue is bound to. 
I'll look into what this means (how to get to the object).

-- james s

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 5/7] lpfc 8.3.13: BSG management fixes
  2010-05-12 13:32 [PATCH 5/7] lpfc 8.3.13: BSG management fixes James Smart
  2010-05-12 17:38 ` Mike Christie
@ 2010-05-14  9:29 ` FUJITA Tomonori
  2010-05-14 13:46   ` James Smart
  1 sibling, 1 reply; 9+ messages in thread
From: FUJITA Tomonori @ 2010-05-14  9:29 UTC (permalink / raw)
  To: james.smart; +Cc: linux-scsi, michaelc

On Wed, 12 May 2010 09:32:14 -0400
James Smart <james.smart@emulex.com> wrote:

> - Add reference counting to prevent module removal when
>   there are outstanding BSG requests.

Didn't I worked on it about scsi_transport_sas?

bsg holds the ref count on the device that passed in
bsg_register_queue() while there are outstanding bsg requests. It is
supposed to prevent the module of the hold device from be removed.

Do you get the oops message?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 5/7] lpfc 8.3.13: BSG management fixes
  2010-05-14  9:29 ` FUJITA Tomonori
@ 2010-05-14 13:46   ` James Smart
  2010-05-14 14:30     ` James Smart
  0 siblings, 1 reply; 9+ messages in thread
From: James Smart @ 2010-05-14 13:46 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: linux-scsi@vger.kernel.org, michaelc@cs.wisc.edu

Slightly different. I believe it's ok relative to when there's a BSG request 
in flight. But in this case there isn't, but there is a program w/ the bsg 
node open, and the driver/transport/etc can all be unloaded as bsg_open 
doesn't take a reference anywhere.

I look at it the other day, and it appears to be an issue with bsg itself. It 
should just take a reference to the bsg class dev in open, and release it in 
release. But the bsg_unregister_queue() path doesn't track whether the node 
was open or not and the teardown needs to be split up so that it really is 
reference based.

-- james s


FUJITA Tomonori wrote:
> On Wed, 12 May 2010 09:32:14 -0400
> James Smart <james.smart@emulex.com> wrote:
> 
>> - Add reference counting to prevent module removal when
>>   there are outstanding BSG requests.
> 
> Didn't I worked on it about scsi_transport_sas?
> 
> bsg holds the ref count on the device that passed in
> bsg_register_queue() while there are outstanding bsg requests. It is
> supposed to prevent the module of the hold device from be removed.
> 
> Do you get the oops message?
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 5/7] lpfc 8.3.13: BSG management fixes
  2010-05-14 13:46   ` James Smart
@ 2010-05-14 14:30     ` James Smart
  2010-05-17  6:24       ` FUJITA Tomonori
  0 siblings, 1 reply; 9+ messages in thread
From: James Smart @ 2010-05-14 14:30 UTC (permalink / raw)
  To: James Smart
  Cc: FUJITA Tomonori, linux-scsi@vger.kernel.org, michaelc@cs.wisc.edu

Actually, it may be just checking the file->private_data pointer for NULL at 
the file entry points.  Although, the minor should not be deallocated from bsg 
until all releases are called.

-- james s


James Smart wrote:
> Slightly different. I believe it's ok relative to when there's a BSG request 
> in flight. But in this case there isn't, but there is a program w/ the bsg 
> node open, and the driver/transport/etc can all be unloaded as bsg_open 
> doesn't take a reference anywhere.
> 
> I look at it the other day, and it appears to be an issue with bsg itself. It 
> should just take a reference to the bsg class dev in open, and release it in 
> release. But the bsg_unregister_queue() path doesn't track whether the node 
> was open or not and the teardown needs to be split up so that it really is 
> reference based.
> 
> -- james s
> 
> 
> FUJITA Tomonori wrote:
>> On Wed, 12 May 2010 09:32:14 -0400
>> James Smart <james.smart@emulex.com> wrote:
>>
>>> - Add reference counting to prevent module removal when
>>>   there are outstanding BSG requests.
>> Didn't I worked on it about scsi_transport_sas?
>>
>> bsg holds the ref count on the device that passed in
>> bsg_register_queue() while there are outstanding bsg requests. It is
>> supposed to prevent the module of the hold device from be removed.
>>
>> Do you get the oops message?
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 5/7] lpfc 8.3.13: BSG management fixes
  2010-05-14 14:30     ` James Smart
@ 2010-05-17  6:24       ` FUJITA Tomonori
  2010-05-19 18:35         ` James Smart
  0 siblings, 1 reply; 9+ messages in thread
From: FUJITA Tomonori @ 2010-05-17  6:24 UTC (permalink / raw)
  To: james.smart; +Cc: fujita.tomonori, linux-scsi, michaelc

On Fri, 14 May 2010 10:30:47 -0400
James Smart <james.smart@emulex.com> wrote:

> Actually, it may be just checking the file->private_data pointer for NULL at 
> the file entry points.

bsg clears file->private_data in bsg_release() so nobody should not
see file->private_data NULL pointer...


>  Although, the minor should not be deallocated from bsg 
> until all releases are called.

bsg deallocate minor in bsg_unregister_queue(). It means, for example,
after a sas LLD frees a remote port, user-space application can't
access to it. I guess that it's the right thing.

The tricky part is, when a sas LLD frees a remote port, there might be
some user-space applications that still open a bsg device. So you
can't call blk_cleanup_queue() at that time.

You might hit the similar problem to the commit
93c20a59af4624aedf53f8320606b355aa951bc1. The following fix works?

=
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Subject: [PATCH] scsi_transport_fc: fix the lifetime of sas bsg objects

fc_bsg_remove can't call blk_cleanup_queue() since there might be
applications that open a fc_host (or rport).

The commit 93c20a59af4624aedf53f8320606b355aa951bc1 fixed the same
lifetime problem of scsi_transport_sas.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 drivers/scsi/scsi_transport_fc.c |   17 +++++++++++++----
 1 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 0681378..885b26b 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -1897,6 +1897,10 @@ static int fc_target_match(struct attribute_container *cont,
 static void fc_rport_dev_release(struct device *dev)
 {
 	struct fc_rport *rport = dev_to_rport(dev);
+
+	if (rport->rqst_q)
+		blk_cleanup_queue(rport->rqst_q);
+
 	put_device(dev->parent);
 	kfree(rport);
 }
@@ -3944,6 +3948,13 @@ fc_bsg_rport_handler(struct request_queue *q)
 	fc_bsg_request_handler(q, shost, rport, &rport->dev);
 }
 
+static void fc_host_bsg_release(struct device *dev)
+{
+	struct fc_host_attrs *fc_host = shost_to_fc_host(dev_to_shost(dev));
+
+	if (fc_host->rqst_q)
+		blk_cleanup_queue(fc_host->rqst_q);
+}
 
 /**
  * fc_bsg_hostadd - Create and add the bsg hooks so we can receive requests
@@ -3981,7 +3992,7 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
 	blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
 	blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT);
 
-	err = bsg_register_queue(q, dev, bsg_name, NULL);
+	err = bsg_register_queue(q, dev, bsg_name, fc_host_bsg_release);
 	if (err) {
 		printk(KERN_ERR "fc_host%d: bsg interface failed to "
 				"initialize - register queue\n",
@@ -4048,10 +4059,8 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport)
 static void
 fc_bsg_remove(struct request_queue *q)
 {
-	if (q) {
+	if (q)
 		bsg_unregister_queue(q);
-		blk_cleanup_queue(q);
-	}
 }
 
 
-- 
1.6.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 5/7] lpfc 8.3.13: BSG management fixes
  2010-05-17  6:24       ` FUJITA Tomonori
@ 2010-05-19 18:35         ` James Smart
  2010-05-20 11:41           ` FUJITA Tomonori
  0 siblings, 1 reply; 9+ messages in thread
From: James Smart @ 2010-05-19 18:35 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: linux-scsi@vger.kernel.org, michaelc@cs.wisc.edu



FUJITA Tomonori wrote:
> The tricky part is, when a sas LLD frees a remote port, there might be
> some user-space applications that still open a bsg device. So you
> can't call blk_cleanup_queue() at that time.
> 
> You might hit the similar problem to the commit
> 93c20a59af4624aedf53f8320606b355aa951bc1. The following fix works?

It is similar to this problem....   There is an application with the bsg 
device open, but no request outstanding, when a request is made to the driver 
to unload.  The driver detaches, with the transport calling 
bsg_unregister_queue() (which succeeds happily) and the driver fully unloads.

But, at some point the app does do a bsg request.

However - things are all messed up at this point...   the bd->queue is no 
longer valid as the owner of the queue (the transport) has returned the memory 
to the kernel. Depending on what else is allocating, it may be used by 
something else.  Thus you get extremely weird/bad behavior when the queue is 
referenced later in the bsg request processing.

I don't know how the patch, which changes when blk_cleanup_queue() is done, 
would fix things, as the larger issue of the queue structure possibly being 
realloc'd by someone else is the killer.  I did test it in our scenario, and 
it failed too.

If I have identified this issue properly (agree ?) I see a couple of options:
a) Set bd->queue pointer to NULL on bsg_unregister_queue(). Then add checks in 
the file op calls.

b) Track opens against the queue, and fail the deregister if open count > 0. 
Rather ugly as the interface is a void right now, and the callers need to deal 
with it too. I know in the fc_transport, we're ill equipped to deal with the 
deregister failing and trying to remove it sometime later.

c) Is all we need to do is take another parent reference in bsg_open, and 
remove it in bsg_release ?  Given the multiple pieces that must play well on 
this, I'm not sure it really works.

Insight appreciated...

-- james s


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 5/7] lpfc 8.3.13: BSG management fixes
  2010-05-19 18:35         ` James Smart
@ 2010-05-20 11:41           ` FUJITA Tomonori
  0 siblings, 0 replies; 9+ messages in thread
From: FUJITA Tomonori @ 2010-05-20 11:41 UTC (permalink / raw)
  To: james.smart; +Cc: fujita.tomonori, linux-scsi, michaelc

On Wed, 19 May 2010 14:35:29 -0400
James Smart <james.smart@emulex.com> wrote:

> 
> 
> FUJITA Tomonori wrote:
> > The tricky part is, when a sas LLD frees a remote port, there might be
> > some user-space applications that still open a bsg device. So you
> > can't call blk_cleanup_queue() at that time.
> > 
> > You might hit the similar problem to the commit
> > 93c20a59af4624aedf53f8320606b355aa951bc1. The following fix works?
> 
> It is similar to this problem....   There is an application with the bsg 
> device open, but no request outstanding, when a request is made to the driver 
> to unload.  The driver detaches, with the transport calling 
> bsg_unregister_queue() (which succeeds happily) and the driver fully unloads.
> 
> But, at some point the app does do a bsg request.
> 
> However - things are all messed up at this point...   the bd->queue is no 
> longer valid as the owner of the queue (the transport) has returned the memory 
> to the kernel.

That's the root problem that the patch fixes?

blk_put_queue() frees a queue. So the owner can't call blk_put_queue()
as long as there are still applications that open the bsg device of
the queue.


> Depending on what else is allocating, it may be used by 
> something else.  Thus you get extremely weird/bad behavior when the queue is 
> referenced later in the bsg request processing.
> 
> I don't know how the patch, which changes when blk_cleanup_queue() is done, 

The patch is supposed to call blk_put_queue() when the last
application closes the bsg device (that guarantees that no more
requests come via the queue).

The release function pointer in bsg_register_queue() is supposed to be
called when the the last application closes the bsg device.


> would fix things, as the larger issue of the queue structure possibly being 
> realloc'd by someone else is the killer.  I did test it in our scenario, and 
> it failed too.

Oh, sorry. Seems that I miss something...

Can you tell me how I can reproduce your scenario?


> If I have identified this issue properly (agree ?) I see a couple of options:
> a) Set bd->queue pointer to NULL on bsg_unregister_queue(). Then add checks in 
> the file op calls.
> 
> b) Track opens against the queue, and fail the deregister if open count > 0. 
> Rather ugly as the interface is a void right now, and the callers need to deal 
> with it too. I know in the fc_transport, we're ill equipped to deal with the 
> deregister failing and trying to remove it sometime later.
> 
> c) Is all we need to do is take another parent reference in bsg_open, and 
> remove it in bsg_release ?  Given the multiple pieces that must play well on 
> this, I'm not sure it really works.
> 
> Insight appreciated...
> 
> -- james s
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2010-05-20 11:41 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-12 13:32 [PATCH 5/7] lpfc 8.3.13: BSG management fixes James Smart
2010-05-12 17:38 ` Mike Christie
2010-05-12 19:00   ` James Smart
2010-05-14  9:29 ` FUJITA Tomonori
2010-05-14 13:46   ` James Smart
2010-05-14 14:30     ` James Smart
2010-05-17  6:24       ` FUJITA Tomonori
2010-05-19 18:35         ` James Smart
2010-05-20 11:41           ` FUJITA Tomonori

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).