linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] drivers/scsi/st.c: add reference count and related fixes
@ 2005-07-05 17:10 Dailey, Nate
  2005-07-06 20:01 ` Brian King
  2005-07-07 11:11 ` Kai Makisara
  0 siblings, 2 replies; 10+ messages in thread
From: Dailey, Nate @ 2005-07-05 17:10 UTC (permalink / raw)
  To: Kai.Makisara; +Cc: linux-scsi

[-- Attachment #1: Type: text/plain, Size: 2538 bytes --]

These patches (against a 2.6.9 kernel, Redhat AS 4) for st add a
reference count, and fix a few bugs I hit during testing (administrative
removes and cable pulls with IO in progress).

The primary change is adding a kref to the Scsi_Tape structure, to avoid
an oops when the tape drive is removed while open. This includes
scsi_tape_get/put routines and scsi_tape_release, and changes to st_open
and st_release. This is all based on the SCSI disk & CD reference
counting code.


I hit some bugs while I was testing this code. Fixes for these are also
included in the patch:

- When an IO gets a "dead device" error, we need to check the rq_status
to determine if the request actually completed (scsi_wait_req does
this)... else there's no notification to the caller that the IO failed.
There are two changes for this, one in st_do_scsi and one in
write_behind_check.
  
- When an async IO gets a "dead device" error, we're notified of the
completion by the block layer (end_that_request_last), so last_SRpnt
doesn't get set (as it would if the complete were done by
st_sleep_done). So I changed how last_SRpnt is used to get around
this... it's now set only for async IOs, just before the IO is issued
(and nulled out when the IO comes back). 


I also hit the following problem when I briefly tested on a SUSE kernel
(it wasn't in the Redhat kernel I did most of my testing with, but it is
in 2.6.12):

- In these kernels, we no longer get a "complete" from
end_that_request_last when a device goes away and an IO is rejected. I
therefore added to st_do_scsi (but it's commented in this patch) setting
SRpnt->sr_request->end_io to blk_end_sync_rq, which will do the
complete. Since blk_end_sync_rq nulls out rq->waiting, I changed
st_do_scsi to wait on a local variable, and added a check for null in
st_sleep_done (just in case).


And just one more thing:

- I noticed that in 2.6.12, there is what appears to be a bogus use of
last_SRpnt in st_chk_result (this doesn't exist in the kernels I tested
with). This would cause problems if these patches are ported to 2.6.12,
because of how I changed the use of last_SRpnt. Either my code has to
change, or st_chk_result shouldn't use last_SRpnt.


Anyway... as I said, these are against a 2.6.9 kernel (Redhat AS 4). If
you're interested, I could port these changes to 2.6.12... can't promise
I'll be able to test with that kernel, though.

Nate Dailey
Stratus Technologies


Signed-off-by: Nate Dailey <nate.dailey@stratus.com>

[-- Attachment #2: st.c.patch --]
[-- Type: application/octet-stream, Size: 7397 bytes --]

--- st.c.orig	2005-06-22 15:08:33.142895016 -0400
+++ st.c	2005-06-22 16:01:03.563099838 -0400
@@ -212,6 +212,12 @@ static int switch_partition(Scsi_Tape *)
 
 static int st_int_ioctl(Scsi_Tape *, unsigned int, unsigned long);
 
+static void scsi_tape_release(struct kref *kref);
+
+#define to_scsi_tape(obj) container_of(obj,Scsi_Tape,kref)
+
+static DECLARE_MUTEX(st_ref_sem);
+
 \f
 #include "osst_detect.h"
 #ifndef SIGS_FROM_OSST
@@ -223,6 +229,46 @@ static int st_int_ioctl(Scsi_Tape *, uns
 	{"OnStream", "FW-", "", "osst"}
 #endif
 
+static Scsi_Tape *scsi_tape_get(int dev)
+{
+	Scsi_Tape *STp = NULL;
+
+	down(&st_ref_sem);
+	write_lock(&st_dev_arr_lock);
+
+	if (dev < st_dev_max && scsi_tapes != NULL)
+		STp = scsi_tapes[dev];
+	if (!STp) goto out;
+
+	kref_get(&STp->kref);
+
+	if (!STp->device)
+		goto out_put;
+
+	if (scsi_device_get(STp->device))
+		goto out_put;
+
+	goto out;
+
+out_put:
+	kref_put(&STp->kref, scsi_tape_release);
+	STp = NULL;
+out:
+	write_unlock(&st_dev_arr_lock);
+	up(&st_ref_sem);
+	return STp;
+}
+
+static void scsi_tape_put(Scsi_Tape *STp)
+{
+	struct scsi_device *sdev = STp->device;
+
+	down(&st_ref_sem);
+	kref_put(&STp->kref, scsi_tape_release);
+	scsi_device_put(sdev);
+	up(&st_ref_sem);
+}
+
 struct st_reject_data {
 	char *vendor;
 	char *model;
@@ -376,10 +422,10 @@ static void st_sleep_done(Scsi_Cmnd * SC
 	} else
 		(STp->buffer)->midlevel_result = SCpnt->result;
 	SCpnt->request->rq_status = RQ_SCSI_DONE;
-	(STp->buffer)->last_SRpnt = SCpnt->sc_request;
 	DEB( STp->write_pending = 0; )
 
-	complete(SCpnt->request->waiting);
+	if (SCpnt->request->waiting)
+		complete(SCpnt->request->waiting);
 }
 
 /* Do the scsi command. Waits until command performed if do_wait is true.
@@ -389,8 +435,17 @@ static Scsi_Request *
  st_do_scsi(Scsi_Request * SRpnt, Scsi_Tape * STp, unsigned char *cmd, int bytes,
 	    int direction, int timeout, int retries, int do_wait)
 {
+	struct completion *waiting;
 	unsigned char *bp;
 
+	/* if async, make sure there's no command outstanding */
+	if (!do_wait && ((STp->buffer)->last_SRpnt)) {
+		printk(KERN_ERR "%s: Async command already active.\n",
+		       tape_name(STp));
+		(STp->buffer)->syscall_result = (-EBUSY);
+		return NULL;
+	}
+
 	if (SRpnt == NULL) {
 		SRpnt = scsi_allocate_request(STp->device, GFP_ATOMIC);
 		if (SRpnt == NULL) {
@@ -404,7 +459,11 @@ static Scsi_Request *
 		}
 	}
 
-	init_completion(&STp->wait);
+	if (!do_wait)
+		(STp->buffer)->last_SRpnt = SRpnt;
+
+	waiting = &STp->wait;
+	init_completion(waiting);
 	SRpnt->sr_use_sg = STp->buffer->do_dio || (bytes > (STp->buffer)->frp[0].length);
 	if (SRpnt->sr_use_sg) {
 		if (!STp->buffer->do_dio)
@@ -415,16 +474,19 @@ static Scsi_Request *
 		bp = (STp->buffer)->b_data;
 	SRpnt->sr_data_direction = direction;
 	SRpnt->sr_cmd_len = 0;
-	SRpnt->sr_request->waiting = &(STp->wait);
+	SRpnt->sr_request->waiting = waiting;
 	SRpnt->sr_request->rq_status = RQ_SCSI_BUSY;
 	SRpnt->sr_request->rq_disk = STp->disk;
+/*	SRpnt->sr_request->end_io = blk_end_sync_rq; */
 
 	scsi_do_req(SRpnt, (void *) cmd, bp, bytes,
 		    st_sleep_done, timeout, retries);
 
 	if (do_wait) {
-		wait_for_completion(SRpnt->sr_request->waiting);
+		wait_for_completion(waiting);
 		SRpnt->sr_request->waiting = NULL;
+		if (SRpnt->sr_request->rq_status != RQ_SCSI_DONE)
+			SRpnt->sr_result |= (DRIVER_ERROR << 24);
 		(STp->buffer)->syscall_result = st_chk_result(STp, SRpnt);
 	}
 	return SRpnt;
@@ -436,8 +498,11 @@ static void write_behind_check(Scsi_Tape
 {
 	ST_buffer *STbuffer;
 	ST_partstat *STps;
+	Scsi_Request * SRpnt;
 
 	STbuffer = STp->buffer;
+	SRpnt = STbuffer->last_SRpnt;
+	STbuffer->last_SRpnt = NULL;
 
         DEB(
 	if (STp->write_pending)
@@ -447,10 +512,11 @@ static void write_behind_check(Scsi_Tape
         ) /* end DEB */
 
 	wait_for_completion(&(STp->wait));
-	(STp->buffer)->last_SRpnt->sr_request->waiting = NULL;
-
-	(STp->buffer)->syscall_result = st_chk_result(STp, (STp->buffer)->last_SRpnt);
-	scsi_release_request((STp->buffer)->last_SRpnt);
+	SRpnt->sr_request->waiting = NULL;
+	if (SRpnt->sr_request->rq_status != RQ_SCSI_DONE)
+		SRpnt->sr_result |= (DRIVER_ERROR << 24);
+	(STp->buffer)->syscall_result = st_chk_result(STp, SRpnt);
+	scsi_release_request(SRpnt);
 
 	STbuffer->buffer_bytes -= STbuffer->writing;
 	STps = &(STp->ps[STp->partition]);
@@ -1002,25 +1068,21 @@ static int st_open(struct inode *inode, 
 	char *name;
 
 	nonseekable_open(inode, filp);
+
+	if (!(STp = scsi_tape_get(dev)))
+		return -ENXIO;
+
 	write_lock(&st_dev_arr_lock);
-	if (dev >= st_dev_max || scsi_tapes == NULL ||
-	    ((STp = scsi_tapes[dev]) == NULL)) {
-		write_unlock(&st_dev_arr_lock);
-		return (-ENXIO);
-	}
 	filp->private_data = STp;
 	name = tape_name(STp);
 
 	if (STp->in_use) {
 		write_unlock(&st_dev_arr_lock);
+		scsi_tape_put(STp);
 		DEB( printk(ST_DEB_MSG "%s: Device already in use.\n", name); )
 		return (-EBUSY);
 	}
 
-	if(scsi_device_get(STp->device)) {
-		write_unlock(&st_dev_arr_lock);
-		return (-ENXIO);
-	}
 	STp->in_use = 1;
 	write_unlock(&st_dev_arr_lock);
 	STp->rew_at_close = STp->autorew_dev = (iminor(inode) & 0x80) == 0;
@@ -1065,7 +1127,7 @@ static int st_open(struct inode *inode, 
  err_out:
 	normalize_buffer(STp->buffer);
 	STp->in_use = 0;
-	scsi_device_put(STp->device);
+	scsi_tape_put(STp);
 	return retval;
 
 }
@@ -1198,7 +1260,7 @@ static int st_release(struct inode *inod
 	write_lock(&st_dev_arr_lock);
 	STp->in_use = 0;
 	write_unlock(&st_dev_arr_lock);
-	scsi_device_put(STp->device);
+	scsi_tape_put(STp);
 
 	return result;
 }
@@ -3799,6 +3861,7 @@ static int st_probe(struct device *dev)
 		goto out_put_disk;
 	}
 	memset(tpnt, 0, sizeof(Scsi_Tape));
+	kref_init(&tpnt->kref);
 	tpnt->disk = disk;
 	sprintf(disk->disk_name, "st%d", i);
 	disk->private_data = &tpnt->driver;
@@ -3814,6 +3877,7 @@ static int st_probe(struct device *dev)
 		tpnt->tape_type = MT_ISSCSI2;
 
 	tpnt->buffer = buffer;
+	tpnt->buffer->last_SRpnt = NULL;
 
 	tpnt->inited = 0;
 	tpnt->dirty = 0;
@@ -3986,15 +4050,10 @@ static int st_remove(struct device *dev)
 					tpnt->modes[mode].cdevs[j] = NULL;
 				}
 			}
-			tpnt->device = NULL;
 
-			if (tpnt->buffer) {
-				tpnt->buffer->orig_frp_segs = 0;
-				normalize_buffer(tpnt->buffer);
-				kfree(tpnt->buffer);
-			}
-			put_disk(tpnt->disk);
-			kfree(tpnt);
+			down(&st_ref_sem);
+			kref_put(&tpnt->kref, scsi_tape_release);
+			up(&st_ref_sem);
 			return 0;
 		}
 	}
@@ -4003,6 +4062,34 @@ static int st_remove(struct device *dev)
 	return 0;
 }
 
+/**
+ *      scsi_tape_release - Called to free the Scsi_Tape structure
+ *      @kref: pointer to embedded kref
+ *
+ *      st_ref_sem must be held entering this routine.  Because it is
+ *      called on last put, you should always use the scsi_tape_get()
+ *      scsi_tape_put() helpers which manipulate the semaphore directly
+ *      and never do a direct kref_put().
+ **/
+static void scsi_tape_release(struct kref *kref)
+{
+	Scsi_Tape *tpnt = to_scsi_tape(kref);
+	struct gendisk *disk = tpnt->disk;
+
+	tpnt->device = NULL;
+
+	if (tpnt->buffer) {
+		tpnt->buffer->orig_frp_segs = 0;
+		normalize_buffer(tpnt->buffer);
+		kfree(tpnt->buffer);
+	}
+
+	disk->private_data = NULL;
+	put_disk(disk);
+	kfree(tpnt);
+	return;
+}
+
 static void st_intr(struct scsi_cmnd *SCpnt)
 {
 	scsi_io_completion(SCpnt, (SCpnt->result ? 0: SCpnt->bufflen), 1);

[-- Attachment #3: st.h.patch --]
[-- Type: application/octet-stream, Size: 418 bytes --]

--- st.h.orig	2005-06-22 15:08:36.596454229 -0400
+++ st.h	2005-06-22 15:03:25.078170293 -0400
@@ -6,6 +6,7 @@
 #include "scsi.h"
 #endif
 #include <linux/completion.h>
+#include <linux/kref.h>
 
 /* The tape buffer descriptor. */
 typedef struct {
@@ -146,6 +147,7 @@ typedef struct {
 	unsigned char last_sense[16];
 #endif
 	struct gendisk *disk;
+	struct kref     kref;
 } Scsi_Tape;
 
 /* Bit masks for use_pf */

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drivers/scsi/st.c: add reference count and related fixes
  2005-07-05 17:10 Dailey, Nate
@ 2005-07-06 20:01 ` Brian King
  2005-07-07 11:11 ` Kai Makisara
  1 sibling, 0 replies; 10+ messages in thread
From: Brian King @ 2005-07-06 20:01 UTC (permalink / raw)
  To: Dailey, Nate; +Cc: Kai.Makisara, linux-scsi

Dailey, Nate wrote:
> These patches (against a 2.6.9 kernel, Redhat AS 4) for st add a
> reference count, and fix a few bugs I hit during testing (administrative
> removes and cable pulls with IO in progress).
> 
> The primary change is adding a kref to the Scsi_Tape structure, to avoid
> an oops when the tape drive is removed while open. This includes
> scsi_tape_get/put routines and scsi_tape_release, and changes to st_open
> and st_release. This is all based on the SCSI disk & CD reference
> counting code.

I've actually been working on a similar patch for mainline.

I'd be happy to send out my patch, which has the kref added to Scsi_Tape
and has been tested on current mainline. I haven't hit the additional
bugs you mentioned below, but am still testing different scenarios.

-Brian


> 
> 
> I hit some bugs while I was testing this code. Fixes for these are also
> included in the patch:
> 
> - When an IO gets a "dead device" error, we need to check the rq_status
> to determine if the request actually completed (scsi_wait_req does
> this)... else there's no notification to the caller that the IO failed.
> There are two changes for this, one in st_do_scsi and one in
> write_behind_check.
>   
> - When an async IO gets a "dead device" error, we're notified of the
> completion by the block layer (end_that_request_last), so last_SRpnt
> doesn't get set (as it would if the complete were done by
> st_sleep_done). So I changed how last_SRpnt is used to get around
> this... it's now set only for async IOs, just before the IO is issued
> (and nulled out when the IO comes back). 
> 
> 
> I also hit the following problem when I briefly tested on a SUSE kernel
> (it wasn't in the Redhat kernel I did most of my testing with, but it is
> in 2.6.12):
> 
> - In these kernels, we no longer get a "complete" from
> end_that_request_last when a device goes away and an IO is rejected. I
> therefore added to st_do_scsi (but it's commented in this patch) setting
> SRpnt->sr_request->end_io to blk_end_sync_rq, which will do the
> complete. Since blk_end_sync_rq nulls out rq->waiting, I changed
> st_do_scsi to wait on a local variable, and added a check for null in
> st_sleep_done (just in case).
> 
> 
> And just one more thing:
> 
> - I noticed that in 2.6.12, there is what appears to be a bogus use of
> last_SRpnt in st_chk_result (this doesn't exist in the kernels I tested
> with). This would cause problems if these patches are ported to 2.6.12,
> because of how I changed the use of last_SRpnt. Either my code has to
> change, or st_chk_result shouldn't use last_SRpnt.
> 
> 
> Anyway... as I said, these are against a 2.6.9 kernel (Redhat AS 4). If
> you're interested, I could port these changes to 2.6.12... can't promise
> I'll be able to test with that kernel, though.
> 
> Nate Dailey
> Stratus Technologies
> 
> 
> Signed-off-by: Nate Dailey <nate.dailey@stratus.com>


-- 
Brian King
eServer Storage I/O
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drivers/scsi/st.c: add reference count and related fixes
  2005-07-05 17:10 Dailey, Nate
  2005-07-06 20:01 ` Brian King
@ 2005-07-07 11:11 ` Kai Makisara
  2005-07-07 13:24   ` Brian King
  1 sibling, 1 reply; 10+ messages in thread
From: Kai Makisara @ 2005-07-07 11:11 UTC (permalink / raw)
  To: Dailey, Nate; +Cc: Brian King, linux-scsi

This is an area where st can be improved. I am happy to see that people 
(probably) having resources to test the changes are working on this.

On Tue, 5 Jul 2005, Dailey, Nate wrote:

> These patches (against a 2.6.9 kernel, Redhat AS 4) for st add a
> reference count, and fix a few bugs I hit during testing (administrative
> removes and cable pulls with IO in progress).
> 
> The primary change is adding a kref to the Scsi_Tape structure, to avoid
> an oops when the tape drive is removed while open. This includes
> scsi_tape_get/put routines and scsi_tape_release, and changes to st_open
> and st_release. This is all based on the SCSI disk & CD reference
> counting code.
> 
You have added a new kref for this. Have you considered using the 
existing cdev refcounting instead of a separate kref?

(When the new character device code was introduced, I got the impression 
that the proper way for the st would be to embed the struct into struct 
scsi_tape and make the destructor. The scsi_tapes[] array would not be 
necessary any more because the struct scsi_tape would be accessible through 
containerof(inode->i_cdev, struct scsi_tape, cdev).
(An array would be still necessary for tape number allocation.)
I did not implement this at that time because there was no real need for 
the change and it seemed too complex to test just for fun.)

> 
> I hit some bugs while I was testing this code. Fixes for these are also
> included in the patch:
> 
> - When an IO gets a "dead device" error, we need to check the rq_status
> to determine if the request actually completed (scsi_wait_req does
> this)... else there's no notification to the caller that the IO failed.
> There are two changes for this, one in st_do_scsi and one in
> write_behind_check.
>   
OK

> - When an async IO gets a "dead device" error, we're notified of the
> completion by the block layer (end_that_request_last), so last_SRpnt
> doesn't get set (as it would if the complete were done by
> st_sleep_done). So I changed how last_SRpnt is used to get around
> this... it's now set only for async IOs, just before the IO is issued
> (and nulled out when the IO comes back). 
> 
Fine. Could you add a comment somewhere documenting when last_SRpnt is 
valid?

> 
> I also hit the following problem when I briefly tested on a SUSE kernel
> (it wasn't in the Redhat kernel I did most of my testing with, but it is
> in 2.6.12):
> 
> - In these kernels, we no longer get a "complete" from
> end_that_request_last when a device goes away and an IO is rejected. I
> therefore added to st_do_scsi (but it's commented in this patch) setting
> SRpnt->sr_request->end_io to blk_end_sync_rq, which will do the
> complete. Since blk_end_sync_rq nulls out rq->waiting, I changed
> st_do_scsi to wait on a local variable, and added a check for null in
> st_sleep_done (just in case).
> 
Looks OK as far as I can see. This area will change if/when all ULDs are 
converted to using blk_* functions directly (a change some people are 
pushing for 2.6.13).

> 
> And just one more thing:
> 
> - I noticed that in 2.6.12, there is what appears to be a bogus use of
> last_SRpnt in st_chk_result (this doesn't exist in the kernels I tested
> with). This would cause problems if these patches are ported to 2.6.12,
> because of how I changed the use of last_SRpnt. Either my code has to
> change, or st_chk_result shouldn't use last_SRpnt.
> 
This use is not bogus unless I have missed something. Handling of sense 
data in st was changed to use the generic functions introduced by Doug 
Gilbert. At the same time I moved pieces of error handling around so that 
as much as possible was done in the context of the calling process.

last_SRpnt is used to access the sense data from the previous command. It 
is currently used with scsi_request_normalize_sense() that requires a 
struct scsi_request pointer but scsi_normalize_sense() could be used as 
well if the sense data is accessible otherwise.

> 
> Anyway... as I said, these are against a 2.6.9 kernel (Redhat AS 4). If
> you're interested, I could port these changes to 2.6.12... can't promise
> I'll be able to test with that kernel, though.
> 
Testing in any fairly recent kernel helps a lot.

> Nate Dailey
> Stratus Technologies
> 
> 
> Signed-off-by: Nate Dailey <nate.dailey@stratus.com>
> 

-- 
Kai

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drivers/scsi/st.c: add reference count and related fixes
  2005-07-07 11:11 ` Kai Makisara
@ 2005-07-07 13:24   ` Brian King
  2005-07-07 17:06     ` Kai Makisara
  0 siblings, 1 reply; 10+ messages in thread
From: Brian King @ 2005-07-07 13:24 UTC (permalink / raw)
  To: Kai Makisara; +Cc: Dailey, Nate, linux-scsi

Kai Makisara wrote:
>>The primary change is adding a kref to the Scsi_Tape structure, to avoid
>>an oops when the tape drive is removed while open. This includes
>>scsi_tape_get/put routines and scsi_tape_release, and changes to st_open
>>and st_release. This is all based on the SCSI disk & CD reference
>>counting code.
>>
> 
> You have added a new kref for this. Have you considered using the 
> existing cdev refcounting instead of a separate kref?
> 
> (When the new character device code was introduced, I got the impression 
> that the proper way for the st would be to embed the struct into struct 
> scsi_tape and make the destructor. The scsi_tapes[] array would not be 
> necessary any more because the struct scsi_tape would be accessible through 
> containerof(inode->i_cdev, struct scsi_tape, cdev).
> (An array would be still necessary for tape number allocation.)
> I did not implement this at that time because there was no real need for 
> the change and it seemed too complex to test just for fun.)

I'm not sure how you would do this since there is currently no way to
embed data in a cdev. Also, since there are multiple cdevs per scsi_tape,
it seems cleaner to use a new kref.


-- 
Brian King
eServer Storage I/O
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drivers/scsi/st.c: add reference count and related fixes
  2005-07-07 13:24   ` Brian King
@ 2005-07-07 17:06     ` Kai Makisara
  0 siblings, 0 replies; 10+ messages in thread
From: Kai Makisara @ 2005-07-07 17:06 UTC (permalink / raw)
  To: Brian King; +Cc: Dailey, Nate, linux-scsi

On Thu, 7 Jul 2005, Brian King wrote:

> Kai Makisara wrote:
> >>The primary change is adding a kref to the Scsi_Tape structure, to avoid
> >>an oops when the tape drive is removed while open. This includes
> >>scsi_tape_get/put routines and scsi_tape_release, and changes to st_open
> >>and st_release. This is all based on the SCSI disk & CD reference
> >>counting code.
> >>
> > 
> > You have added a new kref for this. Have you considered using the 
> > existing cdev refcounting instead of a separate kref?
> > 
> > (When the new character device code was introduced, I got the impression 
> > that the proper way for the st would be to embed the struct into struct 
> > scsi_tape and make the destructor. The scsi_tapes[] array would not be 
> > necessary any more because the struct scsi_tape would be accessible through 
> > containerof(inode->i_cdev, struct scsi_tape, cdev).
> > (An array would be still necessary for tape number allocation.)
> > I did not implement this at that time because there was no real need for 
> > the change and it seemed too complex to test just for fun.)
> 
> I'm not sure how you would do this since there is currently no way to
> embed data in a cdev.

True. This is why it has to be done the other way: embed cdev(s) into 
struct scsi_tape. The file linux/drivers/char/snsc.c is an example how 
this works except for the release (deallocation) part. I just searched
linux/fs/char_dev.c for a solution for this part but did not find one. The 
URL http://lkml.org/lkml/2003/9/22/220 quite well describes what the 
code looks like. Seems best to forget using cdevs for deallocation.

The tape devices now appear in the sysfs class scsi_tape. One possibility 
might be to use refcounting in these objects as suggested by the URL. This 
does not remove the search loop through scsi_tapes[] in st_open() and the 
added code and data would be only for deallocation.

> Also, since there are multiple cdevs per scsi_tape,
> it seems cleaner to use a new kref.
> 
The new kref may well be the best way overall. I just want to explore the 
existing possibilities that don't need updating when the world around st 
changes.

-- 
Kai

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] drivers/scsi/st.c: add reference count and related fixes
@ 2005-07-12 15:10 Dailey, Nate
  2005-07-12 20:07 ` Kai Makisara
  0 siblings, 1 reply; 10+ messages in thread
From: Dailey, Nate @ 2005-07-12 15:10 UTC (permalink / raw)
  To: Kai Makisara; +Cc: Brian King, linux-scsi

I'll try porting my changes to a recent kernel, verify that the bugs I
found are still bugs, and get back to you with a new patch.

In the meantime, responses to your comments:

- using a new kref in the scsi_tape structure: I've given this some
thought, and I think using a new kref is defintely the best way to go.
If nothing else, this is the same way sd and sr do reference counting...
seems easier to maintain if the code in st is as similar as possible to
these other drivers.

- my changes to how last_SRpnt is used: I'll add a comment documenting
when last_SRpnt is valid (it's valid only for async IOs that have been
issued, nulled out by write_behind_check when they complete)

- Here's why I think the use of last_SRpnt in st_chk_result is bogus:
one of the inputs to st_chk_result is "struct scsi_request * SRpnt", and
it seems like this is the SRpnt you want to use, rather than last_SRpnt.
This input will be the command that st_do_scsi or write_behind_check
just waited on. This might not cause any problems with the current st
code, but it will definitely cause problems with the changes I made to
last_SRpnt. Let me know if I'm wrong about this, though, and I can
change my last_SRpnt code.

Nate Dailey



> -----Original Message-----
> From: Kai Makisara [mailto:Kai.Makisara@kolumbus.fi] 
> Sent: Thursday, July 07, 2005 7:12 AM
> To: Dailey, Nate
> Cc: Brian King; linux-scsi@vger.kernel.org
> Subject: Re: [PATCH] drivers/scsi/st.c: add reference count 
> and related fixes
> 
> 
> This is an area where st can be improved. I am happy to see 
> that people 
> (probably) having resources to test the changes are working on this.
> 
> On Tue, 5 Jul 2005, Dailey, Nate wrote:
> 
> > These patches (against a 2.6.9 kernel, Redhat AS 4) for st add a
> > reference count, and fix a few bugs I hit during testing 
> (administrative
> > removes and cable pulls with IO in progress).
> > 
> > The primary change is adding a kref to the Scsi_Tape 
> structure, to avoid
> > an oops when the tape drive is removed while open. This includes
> > scsi_tape_get/put routines and scsi_tape_release, and 
> changes to st_open
> > and st_release. This is all based on the SCSI disk & CD reference
> > counting code.
> > 
> You have added a new kref for this. Have you considered using the 
> existing cdev refcounting instead of a separate kref?
> 
> (When the new character device code was introduced, I got the 
> impression 
> that the proper way for the st would be to embed the struct 
> into struct 
> scsi_tape and make the destructor. The scsi_tapes[] array 
> would not be 
> necessary any more because the struct scsi_tape would be 
> accessible through 
> containerof(inode->i_cdev, struct scsi_tape, cdev).
> (An array would be still necessary for tape number allocation.)
> I did not implement this at that time because there was no 
> real need for 
> the change and it seemed too complex to test just for fun.)
> 
> > 
> > I hit some bugs while I was testing this code. Fixes for 
> these are also
> > included in the patch:
> > 
> > - When an IO gets a "dead device" error, we need to check 
> the rq_status
> > to determine if the request actually completed (scsi_wait_req does
> > this)... else there's no notification to the caller that 
> the IO failed.
> > There are two changes for this, one in st_do_scsi and one in
> > write_behind_check.
> >   
> OK
> 
> > - When an async IO gets a "dead device" error, we're notified of the
> > completion by the block layer (end_that_request_last), so last_SRpnt
> > doesn't get set (as it would if the complete were done by
> > st_sleep_done). So I changed how last_SRpnt is used to get around
> > this... it's now set only for async IOs, just before the IO 
> is issued
> > (and nulled out when the IO comes back). 
> > 
> Fine. Could you add a comment somewhere documenting when 
> last_SRpnt is 
> valid?
> 
> > 
> > I also hit the following problem when I briefly tested on a 
> SUSE kernel
> > (it wasn't in the Redhat kernel I did most of my testing 
> with, but it is
> > in 2.6.12):
> > 
> > - In these kernels, we no longer get a "complete" from
> > end_that_request_last when a device goes away and an IO is 
> rejected. I
> > therefore added to st_do_scsi (but it's commented in this 
> patch) setting
> > SRpnt->sr_request->end_io to blk_end_sync_rq, which will do the
> > complete. Since blk_end_sync_rq nulls out rq->waiting, I changed
> > st_do_scsi to wait on a local variable, and added a check 
> for null in
> > st_sleep_done (just in case).
> > 
> Looks OK as far as I can see. This area will change if/when 
> all ULDs are 
> converted to using blk_* functions directly (a change some people are 
> pushing for 2.6.13).
> 
> > 
> > And just one more thing:
> > 
> > - I noticed that in 2.6.12, there is what appears to be a 
> bogus use of
> > last_SRpnt in st_chk_result (this doesn't exist in the 
> kernels I tested
> > with). This would cause problems if these patches are 
> ported to 2.6.12,
> > because of how I changed the use of last_SRpnt. Either my 
> code has to
> > change, or st_chk_result shouldn't use last_SRpnt.
> > 
> This use is not bogus unless I have missed something. 
> Handling of sense 
> data in st was changed to use the generic functions 
> introduced by Doug 
> Gilbert. At the same time I moved pieces of error handling 
> around so that 
> as much as possible was done in the context of the calling process.
> 
> last_SRpnt is used to access the sense data from the previous 
> command. It 
> is currently used with scsi_request_normalize_sense() that requires a 
> struct scsi_request pointer but scsi_normalize_sense() could 
> be used as 
> well if the sense data is accessible otherwise.
> 
> > 
> > Anyway... as I said, these are against a 2.6.9 kernel 
> (Redhat AS 4). If
> > you're interested, I could port these changes to 2.6.12... 
> can't promise
> > I'll be able to test with that kernel, though.
> > 
> Testing in any fairly recent kernel helps a lot.
> 
> > Nate Dailey
> > Stratus Technologies
> > 
> > 
> > Signed-off-by: Nate Dailey <nate.dailey@stratus.com>
> > 
> 
> -- 
> Kai
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] drivers/scsi/st.c: add reference count and related fixes
  2005-07-12 15:10 Dailey, Nate
@ 2005-07-12 20:07 ` Kai Makisara
  0 siblings, 0 replies; 10+ messages in thread
From: Kai Makisara @ 2005-07-12 20:07 UTC (permalink / raw)
  To: Dailey, Nate; +Cc: Brian King, linux-scsi

On Tue, 12 Jul 2005, Dailey, Nate wrote:

> I'll try porting my changes to a recent kernel, verify that the bugs I
> found are still bugs, and get back to you with a new patch.
> 
> In the meantime, responses to your comments:
> 
> - using a new kref in the scsi_tape structure: I've given this some
> thought, and I think using a new kref is defintely the best way to go.
> If nothing else, this is the same way sd and sr do reference counting...
> seems easier to maintain if the code in st is as similar as possible to
> these other drivers.
> 
st is a character device and it should use the character device 
refcounting if convenient. However, I now agree that the new kref is the 
simplest solution.

> - my changes to how last_SRpnt is used: I'll add a comment documenting
> when last_SRpnt is valid (it's valid only for async IOs that have been
> issued, nulled out by write_behind_check when they complete)
> 
Good.

> - Here's why I think the use of last_SRpnt in st_chk_result is bogus:
> one of the inputs to st_chk_result is "struct scsi_request * SRpnt", and
> it seems like this is the SRpnt you want to use, rather than last_SRpnt.
> This input will be the command that st_do_scsi or write_behind_check
> just waited on. This might not cause any problems with the current st
> code, but it will definitely cause problems with the changes I made to
> last_SRpnt. Let me know if I'm wrong about this, though, and I can
> change my last_SRpnt code.
> 
OK. I now see your point and it seems that the solution is simple: use the 
SRpnt argument of st_chk_result(). The reason why the function is using 
last_SRpnt is moving code from one place to another and not being careful 
enough to change everything (I knew that in that code last_SRpnt was valid 
always and so this did not look like an error ;-).

-- 
Kai

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] drivers/scsi/st.c: add reference count and related fixes
@ 2005-07-13 19:12 Dailey, Nate
  2005-07-25 19:54 ` Brian King
  2005-08-02 10:40 ` Kai Makisara
  0 siblings, 2 replies; 10+ messages in thread
From: Dailey, Nate @ 2005-07-13 19:12 UTC (permalink / raw)
  To: Kai Makisara; +Cc: Brian King, linux-scsi

[-- Attachment #1: Type: text/plain, Size: 3123 bytes --]

I've attached patches against 2.6.13rc2. These are basically identical
to my earlier patches, as I found that all issues I'd seen in earlier
kernels still existed in this kernel.

To summarize, the changes are: (more details in my original email)

- add a kref to the scsi_tape structure, and associate reference
counting stuff

- set sr_request->end_io = blk_end_sync_rq so we get notified when an IO
is rejected when the device goes away

- check rq_status when IOs complete, else we don't know that IOs
rejected for a dead device in fact did not complete

- change last_SRpnt so it's set before an async IO is issued (in case
st_sleep_done is bypassed)

- fix a bogus use of last_SRpnt in st_chk_result


Nate Dailey
Stratus Technologies

Signed-off-by: Nate Dailey <nate.dailey@stratus.com>


> -----Original Message-----
> From: Kai Makisara [mailto:Kai.Makisara@kolumbus.fi] 
> Sent: Tuesday, July 12, 2005 4:08 PM
> To: Dailey, Nate
> Cc: Brian King; linux-scsi@vger.kernel.org
> Subject: RE: [PATCH] drivers/scsi/st.c: add reference count 
> and related fixes
> 
> 
> On Tue, 12 Jul 2005, Dailey, Nate wrote:
> 
> > I'll try porting my changes to a recent kernel, verify that 
> the bugs I
> > found are still bugs, and get back to you with a new patch.
> > 
> > In the meantime, responses to your comments:
> > 
> > - using a new kref in the scsi_tape structure: I've given this some
> > thought, and I think using a new kref is defintely the best 
> way to go.
> > If nothing else, this is the same way sd and sr do 
> reference counting...
> > seems easier to maintain if the code in st is as similar as 
> possible to
> > these other drivers.
> > 
> st is a character device and it should use the character device 
> refcounting if convenient. However, I now agree that the new 
> kref is the 
> simplest solution.
> 
> > - my changes to how last_SRpnt is used: I'll add a comment 
> documenting
> > when last_SRpnt is valid (it's valid only for async IOs 
> that have been
> > issued, nulled out by write_behind_check when they complete)
> > 
> Good.
> 
> > - Here's why I think the use of last_SRpnt in st_chk_result 
> is bogus:
> > one of the inputs to st_chk_result is "struct scsi_request 
> * SRpnt", and
> > it seems like this is the SRpnt you want to use, rather 
> than last_SRpnt.
> > This input will be the command that st_do_scsi or write_behind_check
> > just waited on. This might not cause any problems with the 
> current st
> > code, but it will definitely cause problems with the 
> changes I made to
> > last_SRpnt. Let me know if I'm wrong about this, though, and I can
> > change my last_SRpnt code.
> > 
> OK. I now see your point and it seems that the solution is 
> simple: use the 
> SRpnt argument of st_chk_result(). The reason why the 
> function is using 
> last_SRpnt is moving code from one place to another and not 
> being careful 
> enough to change everything (I knew that in that code 
> last_SRpnt was valid 
> always and so this did not look like an error ;-).
> 
> -- 
> Kai
> 

[-- Attachment #2: st.c2613rc2nd.patch --]
[-- Type: application/octet-stream, Size: 8090 bytes --]

--- st.c.orig	2005-07-13 14:29:01.000000000 -0400
+++ st.c	2005-07-13 14:28:22.000000000 -0400
@@ -219,6 +219,12 @@ static int switch_partition(struct scsi_
 
 static int st_int_ioctl(struct scsi_tape *, unsigned int, unsigned long);
 
+static void scsi_tape_release(struct kref *kref);
+
+#define to_scsi_tape(obj) container_of(obj,struct scsi_tape,kref)
+
+static DECLARE_MUTEX(st_ref_sem);
+
 \f
 #include "osst_detect.h"
 #ifndef SIGS_FROM_OSST
@@ -230,6 +236,46 @@ static int st_int_ioctl(struct scsi_tape
 	{"OnStream", "FW-", "", "osst"}
 #endif
 
+static struct scsi_tape *scsi_tape_get(int dev)
+{
+	struct scsi_tape *STp = NULL;
+
+	down(&st_ref_sem);
+	write_lock(&st_dev_arr_lock);
+
+	if (dev < st_dev_max && scsi_tapes != NULL)
+		STp = scsi_tapes[dev];
+	if (!STp) goto out;
+
+	kref_get(&STp->kref);
+
+	if (!STp->device)
+		goto out_put;
+
+	if (scsi_device_get(STp->device))
+		goto out_put;
+
+	goto out;
+
+out_put:
+	kref_put(&STp->kref, scsi_tape_release);
+	STp = NULL;
+out:
+	write_unlock(&st_dev_arr_lock);
+	up(&st_ref_sem);
+	return STp;
+}
+
+static void scsi_tape_put(struct scsi_tape *STp)
+{
+	struct scsi_device *sdev = STp->device;
+
+	down(&st_ref_sem);
+	kref_put(&STp->kref, scsi_tape_release);
+	scsi_device_put(sdev);
+	up(&st_ref_sem);
+}
+
 struct st_reject_data {
 	char *vendor;
 	char *model;
@@ -311,7 +357,7 @@ static int st_chk_result(struct scsi_tap
 		return 0;
 
 	cmdstatp = &STp->buffer->cmdstat;
-	st_analyze_sense(STp->buffer->last_SRpnt, cmdstatp);
+	st_analyze_sense(SRpnt, cmdstatp);
 
 	if (cmdstatp->have_sense)
 		scode = STp->buffer->cmdstat.sense_hdr.sense_key;
@@ -399,10 +445,10 @@ static void st_sleep_done(struct scsi_cm
 
 	(STp->buffer)->cmdstat.midlevel_result = SCpnt->result;
 	SCpnt->request->rq_status = RQ_SCSI_DONE;
-	(STp->buffer)->last_SRpnt = SCpnt->sc_request;
 	DEB( STp->write_pending = 0; )
 
-	complete(SCpnt->request->waiting);
+	if (SCpnt->request->waiting)
+		complete(SCpnt->request->waiting);
 }
 
 /* Do the scsi command. Waits until command performed if do_wait is true.
@@ -412,8 +458,20 @@ static struct scsi_request *
 st_do_scsi(struct scsi_request * SRpnt, struct scsi_tape * STp, unsigned char *cmd,
 	   int bytes, int direction, int timeout, int retries, int do_wait)
 {
+	struct completion *waiting;
 	unsigned char *bp;
 
+	/* if async, make sure there's no command outstanding */
+	if (!do_wait && ((STp->buffer)->last_SRpnt)) {
+		printk(KERN_ERR "%s: Async command already active.\n",
+		       tape_name(STp));
+		if (signal_pending(current))
+			(STp->buffer)->syscall_result = (-EINTR);
+		else
+			(STp->buffer)->syscall_result = (-EBUSY);
+		return NULL;
+	}
+
 	if (SRpnt == NULL) {
 		SRpnt = scsi_allocate_request(STp->device, GFP_ATOMIC);
 		if (SRpnt == NULL) {
@@ -427,7 +485,13 @@ st_do_scsi(struct scsi_request * SRpnt, 
 		}
 	}
 
-	init_completion(&STp->wait);
+	/* If async IO, set last_SRpnt. This ptr tells write_behind_check
+	   which IO is outstanding. It's nulled out when the IO completes. */
+	if (!do_wait)
+		(STp->buffer)->last_SRpnt = SRpnt;
+
+	waiting = &STp->wait;
+	init_completion(waiting);
 	SRpnt->sr_use_sg = STp->buffer->do_dio || (bytes > (STp->buffer)->frp[0].length);
 	if (SRpnt->sr_use_sg) {
 		if (!STp->buffer->do_dio)
@@ -438,17 +502,20 @@ st_do_scsi(struct scsi_request * SRpnt, 
 		bp = (STp->buffer)->b_data;
 	SRpnt->sr_data_direction = direction;
 	SRpnt->sr_cmd_len = 0;
-	SRpnt->sr_request->waiting = &(STp->wait);
+	SRpnt->sr_request->waiting = waiting;
 	SRpnt->sr_request->rq_status = RQ_SCSI_BUSY;
 	SRpnt->sr_request->rq_disk = STp->disk;
+	SRpnt->sr_request->end_io = blk_end_sync_rq;
 	STp->buffer->cmdstat.have_sense = 0;
 
 	scsi_do_req(SRpnt, (void *) cmd, bp, bytes,
 		    st_sleep_done, timeout, retries);
 
 	if (do_wait) {
-		wait_for_completion(SRpnt->sr_request->waiting);
+		wait_for_completion(waiting);
 		SRpnt->sr_request->waiting = NULL;
+		if (SRpnt->sr_request->rq_status != RQ_SCSI_DONE)
+			SRpnt->sr_result |= (DRIVER_ERROR << 24);
 		(STp->buffer)->syscall_result = st_chk_result(STp, SRpnt);
 	}
 	return SRpnt;
@@ -465,6 +532,7 @@ static int write_behind_check(struct scs
 	struct st_buffer *STbuffer;
 	struct st_partstat *STps;
 	struct st_cmdstatus *cmdstatp;
+	struct scsi_request *SRpnt;
 
 	STbuffer = STp->buffer;
 	if (!STbuffer->writing)
@@ -478,10 +546,14 @@ static int write_behind_check(struct scs
         ) /* end DEB */
 
 	wait_for_completion(&(STp->wait));
-	(STp->buffer)->last_SRpnt->sr_request->waiting = NULL;
+	SRpnt = STbuffer->last_SRpnt;
+	STbuffer->last_SRpnt = NULL;
+	SRpnt->sr_request->waiting = NULL;
+	if (SRpnt->sr_request->rq_status != RQ_SCSI_DONE)
+		SRpnt->sr_result |= (DRIVER_ERROR << 24);
 
-	(STp->buffer)->syscall_result = st_chk_result(STp, (STp->buffer)->last_SRpnt);
-	scsi_release_request((STp->buffer)->last_SRpnt);
+	(STp->buffer)->syscall_result = st_chk_result(STp, SRpnt);
+	scsi_release_request(SRpnt);
 
 	STbuffer->buffer_bytes -= STbuffer->writing;
 	STps = &(STp->ps[STp->partition]);
@@ -1055,25 +1127,20 @@ static int st_open(struct inode *inode, 
 	 */
 	filp->f_mode &= ~(FMODE_PREAD | FMODE_PWRITE);
 
+	if (!(STp = scsi_tape_get(dev)))
+		return -ENXIO;
+
 	write_lock(&st_dev_arr_lock);
-	if (dev >= st_dev_max || scsi_tapes == NULL ||
-	    ((STp = scsi_tapes[dev]) == NULL)) {
-		write_unlock(&st_dev_arr_lock);
-		return (-ENXIO);
-	}
 	filp->private_data = STp;
 	name = tape_name(STp);
 
 	if (STp->in_use) {
 		write_unlock(&st_dev_arr_lock);
+		scsi_tape_put(STp);
 		DEB( printk(ST_DEB_MSG "%s: Device already in use.\n", name); )
 		return (-EBUSY);
 	}
 
-	if(scsi_device_get(STp->device)) {
-		write_unlock(&st_dev_arr_lock);
-		return (-ENXIO);
-	}
 	STp->in_use = 1;
 	write_unlock(&st_dev_arr_lock);
 	STp->rew_at_close = STp->autorew_dev = (iminor(inode) & 0x80) == 0;
@@ -1118,7 +1185,7 @@ static int st_open(struct inode *inode, 
  err_out:
 	normalize_buffer(STp->buffer);
 	STp->in_use = 0;
-	scsi_device_put(STp->device);
+	scsi_tape_put(STp);
 	return retval;
 
 }
@@ -1250,7 +1317,7 @@ static int st_release(struct inode *inod
 	write_lock(&st_dev_arr_lock);
 	STp->in_use = 0;
 	write_unlock(&st_dev_arr_lock);
-	scsi_device_put(STp->device);
+	scsi_tape_put(STp);
 
 	return result;
 }
@@ -3887,6 +3954,7 @@ static int st_probe(struct device *dev)
 		goto out_put_disk;
 	}
 	memset(tpnt, 0, sizeof(struct scsi_tape));
+	kref_init(&tpnt->kref);
 	tpnt->disk = disk;
 	sprintf(disk->disk_name, "st%d", i);
 	disk->private_data = &tpnt->driver;
@@ -3902,6 +3970,7 @@ static int st_probe(struct device *dev)
 		tpnt->tape_type = MT_ISSCSI2;
 
 	tpnt->buffer = buffer;
+	tpnt->buffer->last_SRpnt = NULL;
 
 	tpnt->inited = 0;
 	tpnt->dirty = 0;
@@ -4076,15 +4145,10 @@ static int st_remove(struct device *dev)
 					tpnt->modes[mode].cdevs[j] = NULL;
 				}
 			}
-			tpnt->device = NULL;
 
-			if (tpnt->buffer) {
-				tpnt->buffer->orig_frp_segs = 0;
-				normalize_buffer(tpnt->buffer);
-				kfree(tpnt->buffer);
-			}
-			put_disk(tpnt->disk);
-			kfree(tpnt);
+			down(&st_ref_sem);
+			kref_put(&tpnt->kref, scsi_tape_release);
+			up(&st_ref_sem);
 			return 0;
 		}
 	}
@@ -4093,6 +4157,34 @@ static int st_remove(struct device *dev)
 	return 0;
 }
 
+/**
+ *      scsi_tape_release - Called to free the Scsi_Tape structure
+ *      @kref: pointer to embedded kref
+ *
+ *      st_ref_sem must be held entering this routine.  Because it is
+ *      called on last put, you should always use the scsi_tape_get()
+ *      scsi_tape_put() helpers which manipulate the semaphore directly
+ *      and never do a direct kref_put().
+ **/
+static void scsi_tape_release(struct kref *kref)
+{
+	struct scsi_tape *tpnt = to_scsi_tape(kref);
+	struct gendisk *disk = tpnt->disk;
+
+	tpnt->device = NULL;
+
+	if (tpnt->buffer) {
+		tpnt->buffer->orig_frp_segs = 0;
+		normalize_buffer(tpnt->buffer);
+		kfree(tpnt->buffer);
+	}
+
+	disk->private_data = NULL;
+	put_disk(disk);
+	kfree(tpnt);
+	return;
+}
+
 static void st_intr(struct scsi_cmnd *SCpnt)
 {
 	scsi_io_completion(SCpnt, (SCpnt->result ? 0: SCpnt->bufflen), 1);

[-- Attachment #3: st.h2613rc2nd.patch --]
[-- Type: application/octet-stream, Size: 414 bytes --]

--- st.h.orig	2005-07-13 14:28:56.000000000 -0400
+++ st.h	2005-07-13 14:28:31.000000000 -0400
@@ -3,7 +3,7 @@
 #define _ST_H
 
 #include <linux/completion.h>
-
+#include <linux/kref.h>
 
 /* Descriptor for analyzed sense data */
 struct st_cmdstatus {
@@ -156,6 +156,7 @@ struct scsi_tape {
 	unsigned char last_sense[16];
 #endif
 	struct gendisk *disk;
+	struct kref     kref;
 };
 
 /* Bit masks for use_pf */

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drivers/scsi/st.c: add reference count and related fixes
  2005-07-13 19:12 [PATCH] drivers/scsi/st.c: add reference count and related fixes Dailey, Nate
@ 2005-07-25 19:54 ` Brian King
  2005-08-02 10:40 ` Kai Makisara
  1 sibling, 0 replies; 10+ messages in thread
From: Brian King @ 2005-07-25 19:54 UTC (permalink / raw)
  To: Dailey, Nate; +Cc: Kai Makisara, linux-scsi

I just tested out your patch on the latest git tree and this patch
fixes the scsi tape hot unplug problems I was seeing as well.


Brian

Dailey, Nate wrote:
> I've attached patches against 2.6.13rc2. These are basically identical
> to my earlier patches, as I found that all issues I'd seen in earlier
> kernels still existed in this kernel.
> 
> To summarize, the changes are: (more details in my original email)
> 
> - add a kref to the scsi_tape structure, and associate reference
> counting stuff
> 
> - set sr_request->end_io = blk_end_sync_rq so we get notified when an IO
> is rejected when the device goes away
> 
> - check rq_status when IOs complete, else we don't know that IOs
> rejected for a dead device in fact did not complete
> 
> - change last_SRpnt so it's set before an async IO is issued (in case
> st_sleep_done is bypassed)
> 
> - fix a bogus use of last_SRpnt in st_chk_result
> 
> 
> Nate Dailey
> Stratus Technologies
> 
> Signed-off-by: Nate Dailey <nate.dailey@stratus.com>
> 
> 
>>-----Original Message-----
>>From: Kai Makisara [mailto:Kai.Makisara@kolumbus.fi] 
>>Sent: Tuesday, July 12, 2005 4:08 PM
>>To: Dailey, Nate
>>Cc: Brian King; linux-scsi@vger.kernel.org
>>Subject: RE: [PATCH] drivers/scsi/st.c: add reference count 
>>and related fixes
>>
>>
>>On Tue, 12 Jul 2005, Dailey, Nate wrote:
>>
>>
>>>I'll try porting my changes to a recent kernel, verify that 
>>
>>the bugs I
>>
>>>found are still bugs, and get back to you with a new patch.
>>>
>>>In the meantime, responses to your comments:
>>>
>>>- using a new kref in the scsi_tape structure: I've given this some
>>>thought, and I think using a new kref is defintely the best 
>>
>>way to go.
>>
>>>If nothing else, this is the same way sd and sr do 
>>
>>reference counting...
>>
>>>seems easier to maintain if the code in st is as similar as 
>>
>>possible to
>>
>>>these other drivers.
>>>
>>
>>st is a character device and it should use the character device 
>>refcounting if convenient. However, I now agree that the new 
>>kref is the 
>>simplest solution.
>>
>>
>>>- my changes to how last_SRpnt is used: I'll add a comment 
>>
>>documenting
>>
>>>when last_SRpnt is valid (it's valid only for async IOs 
>>
>>that have been
>>
>>>issued, nulled out by write_behind_check when they complete)
>>>
>>
>>Good.
>>
>>
>>>- Here's why I think the use of last_SRpnt in st_chk_result 
>>
>>is bogus:
>>
>>>one of the inputs to st_chk_result is "struct scsi_request 
>>
>>* SRpnt", and
>>
>>>it seems like this is the SRpnt you want to use, rather 
>>
>>than last_SRpnt.
>>
>>>This input will be the command that st_do_scsi or write_behind_check
>>>just waited on. This might not cause any problems with the 
>>
>>current st
>>
>>>code, but it will definitely cause problems with the 
>>
>>changes I made to
>>
>>>last_SRpnt. Let me know if I'm wrong about this, though, and I can
>>>change my last_SRpnt code.
>>>
>>
>>OK. I now see your point and it seems that the solution is 
>>simple: use the 
>>SRpnt argument of st_chk_result(). The reason why the 
>>function is using 
>>last_SRpnt is moving code from one place to another and not 
>>being careful 
>>enough to change everything (I knew that in that code 
>>last_SRpnt was valid 
>>always and so this did not look like an error ;-).
>>
>>-- 
>>Kai
>>


-- 
Brian King
eServer Storage I/O
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] drivers/scsi/st.c: add reference count and related fixes
  2005-07-13 19:12 [PATCH] drivers/scsi/st.c: add reference count and related fixes Dailey, Nate
  2005-07-25 19:54 ` Brian King
@ 2005-08-02 10:40 ` Kai Makisara
  1 sibling, 0 replies; 10+ messages in thread
From: Kai Makisara @ 2005-08-02 10:40 UTC (permalink / raw)
  To: Dailey, Nate; +Cc: Brian King, linux-scsi

I have rediffed the patch against 2.6.13-rc5, done a couple of cosmetic
cleanups, and run some tests.  Brian King has acknowledged that it fixes the
problems he has seen. Seems mature enough for inclusion into 2.6.14 (or
later)?

Nate's explanation of the changes:

I've attached patches against 2.6.13rc2. These are basically identical
to my earlier patches, as I found that all issues I'd seen in earlier
kernels still existed in this kernel.

To summarize, the changes are: (more details in my original email)

- add a kref to the scsi_tape structure, and associate reference
counting stuff

- set sr_request->end_io = blk_end_sync_rq so we get notified when an IO
is rejected when the device goes away

- check rq_status when IOs complete, else we don't know that IOs
rejected for a dead device in fact did not complete

- change last_SRpnt so it's set before an async IO is issued (in case
st_sleep_done is bypassed)

- fix a bogus use of last_SRpnt in st_chk_result


Nate Dailey
Stratus Technologies

Signed-off-by: Nate Dailey <nate.dailey@stratus.com>
Signed-off-by: Kai Makisara <kai.makisara@kolumbus.fi>

--- linux-2.6.13-rc5-k1/drivers/scsi/st.c	2005-08-02 12:34:48.000000000 +0300
+++ linux-2.6.13-rc5-k2/drivers/scsi/st.c	2005-08-02 12:32:21.000000000 +0300
@@ -17,7 +17,7 @@
    Last modified: 18-JAN-1998 Richard Gooch <rgooch@atnf.csiro.au> Devfs support
  */
 
-static char *verstr = "20050501";
+static char *verstr = "20050802";
 
 #include <linux/module.h>
 
@@ -219,6 +219,12 @@ static int switch_partition(struct scsi_
 
 static int st_int_ioctl(struct scsi_tape *, unsigned int, unsigned long);
 
+static void scsi_tape_release(struct kref *);
+
+#define to_scsi_tape(obj) container_of(obj, struct scsi_tape, kref)
+
+static DECLARE_MUTEX(st_ref_sem);
+
 \f

 #include "osst_detect.h"
 #ifndef SIGS_FROM_OSST
@@ -230,6 +236,46 @@ static int st_int_ioctl(struct scsi_tape
 	{"OnStream", "FW-", "", "osst"}
 #endif
 
+static struct scsi_tape *scsi_tape_get(int dev)
+{
+	struct scsi_tape *STp = NULL;
+
+	down(&st_ref_sem);
+	write_lock(&st_dev_arr_lock);
+
+	if (dev < st_dev_max && scsi_tapes != NULL)
+		STp = scsi_tapes[dev];
+	if (!STp) goto out;
+
+	kref_get(&STp->kref);
+
+	if (!STp->device)
+		goto out_put;
+
+	if (scsi_device_get(STp->device))
+		goto out_put;
+
+	goto out;
+
+out_put:
+	kref_put(&STp->kref, scsi_tape_release);
+	STp = NULL;
+out:
+	write_unlock(&st_dev_arr_lock);
+	up(&st_ref_sem);
+	return STp;
+}
+
+static void scsi_tape_put(struct scsi_tape *STp)
+{
+	struct scsi_device *sdev = STp->device;
+
+	down(&st_ref_sem);
+	kref_put(&STp->kref, scsi_tape_release);
+	scsi_device_put(sdev);
+	up(&st_ref_sem);
+}
+
 struct st_reject_data {
 	char *vendor;
 	char *model;
@@ -311,7 +357,7 @@ static int st_chk_result(struct scsi_tap
 		return 0;
 
 	cmdstatp = &STp->buffer->cmdstat;
-	st_analyze_sense(STp->buffer->last_SRpnt, cmdstatp);
+	st_analyze_sense(SRpnt, cmdstatp);
 
 	if (cmdstatp->have_sense)
 		scode = STp->buffer->cmdstat.sense_hdr.sense_key;
@@ -399,10 +445,10 @@ static void st_sleep_done(struct scsi_cm
 
 	(STp->buffer)->cmdstat.midlevel_result = SCpnt->result;
 	SCpnt->request->rq_status = RQ_SCSI_DONE;
-	(STp->buffer)->last_SRpnt = SCpnt->sc_request;
 	DEB( STp->write_pending = 0; )
 
-	complete(SCpnt->request->waiting);
+	if (SCpnt->request->waiting)
+		complete(SCpnt->request->waiting);
 }
 
 /* Do the scsi command. Waits until command performed if do_wait is true.
@@ -412,8 +458,20 @@ static struct scsi_request *
 st_do_scsi(struct scsi_request * SRpnt, struct scsi_tape * STp, unsigned char *cmd,
 	   int bytes, int direction, int timeout, int retries, int do_wait)
 {
+	struct completion *waiting;
 	unsigned char *bp;
 
+	/* if async, make sure there's no command outstanding */
+	if (!do_wait && ((STp->buffer)->last_SRpnt)) {
+		printk(KERN_ERR "%s: Async command already active.\n",
+		       tape_name(STp));
+		if (signal_pending(current))
+			(STp->buffer)->syscall_result = (-EINTR);
+		else
+			(STp->buffer)->syscall_result = (-EBUSY);
+		return NULL;
+	}
+
 	if (SRpnt == NULL) {
 		SRpnt = scsi_allocate_request(STp->device, GFP_ATOMIC);
 		if (SRpnt == NULL) {
@@ -427,7 +485,13 @@ st_do_scsi(struct scsi_request * SRpnt, 
 		}
 	}
 
-	init_completion(&STp->wait);
+	/* If async IO, set last_SRpnt. This ptr tells write_behind_check
+	   which IO is outstanding. It's nulled out when the IO completes. */
+	if (!do_wait)
+		(STp->buffer)->last_SRpnt = SRpnt;
+
+	waiting = &STp->wait;
+	init_completion(waiting);
 	SRpnt->sr_use_sg = STp->buffer->do_dio || (bytes > (STp->buffer)->frp[0].length);
 	if (SRpnt->sr_use_sg) {
 		if (!STp->buffer->do_dio)
@@ -438,17 +502,20 @@ st_do_scsi(struct scsi_request * SRpnt, 
 		bp = (STp->buffer)->b_data;
 	SRpnt->sr_data_direction = direction;
 	SRpnt->sr_cmd_len = 0;
-	SRpnt->sr_request->waiting = &(STp->wait);
+	SRpnt->sr_request->waiting = waiting;
 	SRpnt->sr_request->rq_status = RQ_SCSI_BUSY;
 	SRpnt->sr_request->rq_disk = STp->disk;
+	SRpnt->sr_request->end_io = blk_end_sync_rq;
 	STp->buffer->cmdstat.have_sense = 0;
 
 	scsi_do_req(SRpnt, (void *) cmd, bp, bytes,
 		    st_sleep_done, timeout, retries);
 
 	if (do_wait) {
-		wait_for_completion(SRpnt->sr_request->waiting);
+		wait_for_completion(waiting);
 		SRpnt->sr_request->waiting = NULL;
+		if (SRpnt->sr_request->rq_status != RQ_SCSI_DONE)
+			SRpnt->sr_result |= (DRIVER_ERROR << 24);
 		(STp->buffer)->syscall_result = st_chk_result(STp, SRpnt);
 	}
 	return SRpnt;
@@ -465,6 +532,7 @@ static int write_behind_check(struct scs
 	struct st_buffer *STbuffer;
 	struct st_partstat *STps;
 	struct st_cmdstatus *cmdstatp;
+	struct scsi_request *SRpnt;
 
 	STbuffer = STp->buffer;
 	if (!STbuffer->writing)
@@ -478,10 +546,14 @@ static int write_behind_check(struct scs
         ) /* end DEB */
 
 	wait_for_completion(&(STp->wait));
-	(STp->buffer)->last_SRpnt->sr_request->waiting = NULL;
+	SRpnt = STbuffer->last_SRpnt;
+	STbuffer->last_SRpnt = NULL;
+	SRpnt->sr_request->waiting = NULL;
+	if (SRpnt->sr_request->rq_status != RQ_SCSI_DONE)
+		SRpnt->sr_result |= (DRIVER_ERROR << 24);
 
-	(STp->buffer)->syscall_result = st_chk_result(STp, (STp->buffer)->last_SRpnt);
-	scsi_release_request((STp->buffer)->last_SRpnt);
+	(STp->buffer)->syscall_result = st_chk_result(STp, SRpnt);
+	scsi_release_request(SRpnt);
 
 	STbuffer->buffer_bytes -= STbuffer->writing;
 	STps = &(STp->ps[STp->partition]);
@@ -1055,25 +1127,20 @@ static int st_open(struct inode *inode, 
 	 */
 	filp->f_mode &= ~(FMODE_PREAD | FMODE_PWRITE);
 
+	if (!(STp = scsi_tape_get(dev)))
+		return -ENXIO;
+
 	write_lock(&st_dev_arr_lock);
-	if (dev >= st_dev_max || scsi_tapes == NULL ||
-	    ((STp = scsi_tapes[dev]) == NULL)) {
-		write_unlock(&st_dev_arr_lock);
-		return (-ENXIO);
-	}
 	filp->private_data = STp;
 	name = tape_name(STp);
 
 	if (STp->in_use) {
 		write_unlock(&st_dev_arr_lock);
+		scsi_tape_put(STp);
 		DEB( printk(ST_DEB_MSG "%s: Device already in use.\n", name); )
 		return (-EBUSY);
 	}
 
-	if(scsi_device_get(STp->device)) {
-		write_unlock(&st_dev_arr_lock);
-		return (-ENXIO);
-	}
 	STp->in_use = 1;
 	write_unlock(&st_dev_arr_lock);
 	STp->rew_at_close = STp->autorew_dev = (iminor(inode) & 0x80) == 0;
@@ -1118,7 +1185,7 @@ static int st_open(struct inode *inode, 
  err_out:
 	normalize_buffer(STp->buffer);
 	STp->in_use = 0;
-	scsi_device_put(STp->device);
+	scsi_tape_put(STp);
 	return retval;
 
 }
@@ -1250,7 +1317,7 @@ static int st_release(struct inode *inod
 	write_lock(&st_dev_arr_lock);
 	STp->in_use = 0;
 	write_unlock(&st_dev_arr_lock);
-	scsi_device_put(STp->device);
+	scsi_tape_put(STp);
 
 	return result;
 }
@@ -3887,6 +3954,7 @@ static int st_probe(struct device *dev)
 		goto out_put_disk;
 	}
 	memset(tpnt, 0, sizeof(struct scsi_tape));
+	kref_init(&tpnt->kref);
 	tpnt->disk = disk;
 	sprintf(disk->disk_name, "st%d", i);
 	disk->private_data = &tpnt->driver;
@@ -3902,6 +3970,7 @@ static int st_probe(struct device *dev)
 		tpnt->tape_type = MT_ISSCSI2;
 
 	tpnt->buffer = buffer;
+	tpnt->buffer->last_SRpnt = NULL;
 
 	tpnt->inited = 0;
 	tpnt->dirty = 0;
@@ -4076,15 +4145,10 @@ static int st_remove(struct device *dev)
 					tpnt->modes[mode].cdevs[j] = NULL;
 				}
 			}
-			tpnt->device = NULL;
 
-			if (tpnt->buffer) {
-				tpnt->buffer->orig_frp_segs = 0;
-				normalize_buffer(tpnt->buffer);
-				kfree(tpnt->buffer);
-			}
-			put_disk(tpnt->disk);
-			kfree(tpnt);
+			down(&st_ref_sem);
+			kref_put(&tpnt->kref, scsi_tape_release);
+			up(&st_ref_sem);
 			return 0;
 		}
 	}
@@ -4093,6 +4157,34 @@ static int st_remove(struct device *dev)
 	return 0;
 }
 
+/**
+ *      scsi_tape_release - Called to free the Scsi_Tape structure
+ *      @kref: pointer to embedded kref
+ *
+ *      st_ref_sem must be held entering this routine.  Because it is
+ *      called on last put, you should always use the scsi_tape_get()
+ *      scsi_tape_put() helpers which manipulate the semaphore directly
+ *      and never do a direct kref_put().
+ **/
+static void scsi_tape_release(struct kref *kref)
+{
+	struct scsi_tape *tpnt = to_scsi_tape(kref);
+	struct gendisk *disk = tpnt->disk;
+
+	tpnt->device = NULL;
+
+	if (tpnt->buffer) {
+		tpnt->buffer->orig_frp_segs = 0;
+		normalize_buffer(tpnt->buffer);
+		kfree(tpnt->buffer);
+	}
+
+	disk->private_data = NULL;
+	put_disk(disk);
+	kfree(tpnt);
+	return;
+}
+
 static void st_intr(struct scsi_cmnd *SCpnt)
 {
 	scsi_io_completion(SCpnt, (SCpnt->result ? 0: SCpnt->bufflen), 1);
--- linux-2.6.13-rc5-k1/drivers/scsi/st.h	2005-06-18 09:44:25.000000000 +0300
+++ linux-2.6.13-rc5-k2/drivers/scsi/st.h	2005-08-02 12:31:23.000000000 +0300
@@ -3,7 +3,7 @@
 #define _ST_H
 
 #include <linux/completion.h>
-
+#include <linux/kref.h>
 
 /* Descriptor for analyzed sense data */
 struct st_cmdstatus {
@@ -156,6 +156,7 @@ struct scsi_tape {
 	unsigned char last_sense[16];
 #endif
 	struct gendisk *disk;
+	struct kref     kref;
 };
 
 /* Bit masks for use_pf */

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2005-08-02 10:36 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-07-13 19:12 [PATCH] drivers/scsi/st.c: add reference count and related fixes Dailey, Nate
2005-07-25 19:54 ` Brian King
2005-08-02 10:40 ` Kai Makisara
  -- strict thread matches above, loose matches on Subject: below --
2005-07-12 15:10 Dailey, Nate
2005-07-12 20:07 ` Kai Makisara
2005-07-05 17:10 Dailey, Nate
2005-07-06 20:01 ` Brian King
2005-07-07 11:11 ` Kai Makisara
2005-07-07 13:24   ` Brian King
2005-07-07 17:06     ` Kai Makisara

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).