From: Brian King <brking@us.ibm.com>
To: dougg@torque.net
Cc: James.Bottomley@steeleye.com, linux-scsi@vger.kernel.org
Subject: Re: [PATCH] sg_cmd_done - another oops
Date: Tue, 31 Aug 2004 15:16:28 -0500 [thread overview]
Message-ID: <4134DC9C.5010100@us.ibm.com> (raw)
In-Reply-To: <40C3D988.3090709@torque.net>
James,
This patch appears to have been dropped.
-Brian
Douglas Gilbert wrote:
> Brian King wrote:
>
>> The following patch fixes a race condition in sg
>
> > of sg_cmd_done racing with sg_release. I've seen this
> > bug hit several times on test machines and the following
> > patch fixes it. The race is that if srp->done is set
> > and the waiting thread gets a spurious wakeup immediately
> > afterwards, then the waiting thread can end up executing
> > and completing, then getting closed, freeing sfp before
> > the wake_up_interruptible is called, which then will result
> > in an oops. The oops is fixed by locking around the
>
>> setting srp->done to 1 and the wake_up, and also locking
>
> > around the checking of srp->done, which guarantees that
> > the wake_up_interruptible will always occur before the
> > sleeping thread gets a chance to run.
>
> Brian,
> Thanks for this analysis; I'm still trying to get my head
> around it :-) These "spurious wakeups" are a worry. Does
> your testing produce them or do they occur randomly?
>
> The critical part of your patch seems to be the write
> lock in sg_cmd_done() making sure srp->done is set _and_
> wake_up_interruptible() gets executed before the
> __wait_event_interruptible macro in sg_read() or
> sg_remove(_sfp) can proceed.
>
> I thought about making srp->done atomic (of type atomic_t)
> but that does not seem to be sufficient. So I'm happy for
> James to apply this patch.
>
>
> An attempt to consolidate... Almost all of the changes to
> sg that were discussed in the "2.6.6-rc3 ia64
> smp_call_function() called with interrupts disabled" thread
> (about a month ago) are now in lk 2.6.7-rc2. The janitors
> have also been at work with "__user" type modifiers.
> Some changes that Patrick Mansfield proposed are not in
> rc2:
> - up max number of devices to 32K
> - revert vmalloc()s to kmalloc()s since the former are
> problematic in big configurations
>
> The attached patch merges Brian patches with those proposed
> by Patrick. The patch is against lk 2.6.7-rc2 (and sg hasn't
> changed between rc2 and rc2-bk5).
>
> Doug Gilbert
>
>
>
> ------------------------------------------------------------------------
>
> --- linux/drivers/scsi/sg.c 2004-06-03 12:45:12.000000000 +1000
> +++ linux/drivers/scsi/sg.c267rc2bk1pl 2004-06-07 12:29:52.879588688 +1000
> @@ -42,7 +42,6 @@
> #include <linux/fcntl.h>
> #include <linux/init.h>
> #include <linux/poll.h>
> -#include <linux/vmalloc.h>
> #include <linux/smp_lock.h>
> #include <linux/moduleparam.h>
> #include <linux/devfs_fs_kernel.h>
> @@ -60,7 +59,7 @@
>
> #ifdef CONFIG_SCSI_PROC_FS
> #include <linux/proc_fs.h>
> -static char *sg_version_date = "20040513";
> +static char *sg_version_date = "20040607";
>
> static int sg_proc_init(void);
> static void sg_proc_cleanup(void);
> @@ -73,7 +72,7 @@
> #define SG_ALLOW_DIO_DEF 0
> #define SG_ALLOW_DIO_CODE /* compile out by commenting this define */
>
> -#define SG_MAX_DEVS 8192
> +#define SG_MAX_DEVS 32768
>
> /*
> * Suppose you want to calculate the formula muldiv(x,m,d)=int(x * m / d)
> @@ -723,6 +722,18 @@
> }
>
> static int
> +sg_srp_done(Sg_request *srp, Sg_fd *sfp)
> +{
> + unsigned long iflags;
> + int done;
> +
> + read_lock_irqsave(&sfp->rq_list_lock, iflags);
> + done = srp->done;
> + read_unlock_irqrestore(&sfp->rq_list_lock, iflags);
> + return done;
> +}
> +
> +static int
> sg_ioctl(struct inode *inode, struct file *filp,
> unsigned int cmd_in, unsigned long arg)
> {
> @@ -761,7 +772,7 @@
> while (1) {
> result = 0; /* following macro to beat race condition */
> __wait_event_interruptible(sfp->read_wait,
> - (sdp->detached || sfp->closed || srp->done),
> + (sdp->detached || sfp->closed || sg_srp_done(srp, sfp)),
> result);
> if (sdp->detached)
> return -ENODEV;
> @@ -772,7 +783,9 @@
> srp->orphan = 1;
> return result; /* -ERESTARTSYS because signal hit process */
> }
> + write_lock_irqsave(&sfp->rq_list_lock, iflags);
> srp->done = 2;
> + write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
> result = sg_new_read(sfp, p, SZ_SG_IO_HDR, srp);
> return (result < 0) ? result : 0;
> }
> @@ -1215,6 +1228,7 @@
> Sg_device *sdp = NULL;
> Sg_fd *sfp;
> Sg_request *srp = NULL;
> + unsigned long iflags;
>
> if (SCpnt && (SRpnt = SCpnt->sc_request))
> srp = (Sg_request *) SRpnt->upper_private_data;
> @@ -1303,8 +1317,10 @@
> if (sfp && srp) {
> /* Now wake up any sg_read() that is waiting for this packet. */
> kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN);
> + write_lock_irqsave(&sfp->rq_list_lock, iflags);
> srp->done = 1;
> wake_up_interruptible(&sfp->read_wait);
> + write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
> }
> }
>
> @@ -1331,17 +1347,18 @@
> void *old_sg_dev_arr = NULL;
> int k, error;
>
> - sdp = vmalloc(sizeof(Sg_device));
> - if (!sdp)
> + sdp = kmalloc(sizeof(Sg_device), GFP_KERNEL);
> + if (!sdp) {
> + printk(KERN_WARNING "sg_alloc: kmalloc Sg_device failure\n");
> return -ENOMEM;
> -
> + }
> write_lock_irqsave(&sg_dev_arr_lock, iflags);
> if (unlikely(sg_nr_dev >= sg_dev_max)) { /* try to resize */
> Sg_device **tmp_da;
> int tmp_dev_max = sg_nr_dev + SG_DEV_ARR_LUMP;
> write_unlock_irqrestore(&sg_dev_arr_lock, iflags);
>
> - tmp_da = vmalloc(tmp_dev_max * sizeof(Sg_device *));
> + tmp_da = kmalloc(tmp_dev_max * sizeof(Sg_device *), GFP_KERNEL);
> if (unlikely(!tmp_da))
> goto expand_failed;
>
> @@ -1375,8 +1392,8 @@
>
> out:
> if (error < 0)
> - vfree(sdp);
> - vfree(old_sg_dev_arr);
> + kfree(sdp);
> + kfree(old_sg_dev_arr);
> return error;
>
> expand_failed:
> @@ -1404,14 +1421,18 @@
> int error, k;
>
> disk = alloc_disk(1);
> - if (!disk)
> + if (!disk) {
> + printk(KERN_WARNING "alloc_disk failed\n");
> return -ENOMEM;
> + }
> disk->major = SCSI_GENERIC_MAJOR;
>
> error = -ENOMEM;
> cdev = cdev_alloc();
> - if (!cdev)
> + if (!cdev) {
> + printk(KERN_WARNING "cdev_alloc failed\n");
> goto out;
> + }
> cdev->owner = THIS_MODULE;
> cdev->ops = &sg_fops;
>
> @@ -1490,7 +1511,7 @@
> tsfp = sfp->nextfp;
> for (srp = sfp->headrp; srp; srp = tsrp) {
> tsrp = srp->nextrp;
> - if (sfp->closed || (0 == srp->done))
> + if (sfp->closed || (0 == sg_srp_done(srp, sfp)))
> sg_finish_rem_req(srp);
> }
> if (sfp->closed) {
> @@ -1525,7 +1546,7 @@
> put_disk(sdp->disk);
> sdp->disk = NULL;
> if (NULL == sdp->headfp)
> - vfree((char *) sdp);
> + kfree((char *) sdp);
> }
>
> if (delay)
> @@ -1590,7 +1611,7 @@
> unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0),
> SG_MAX_DEVS);
> if (sg_dev_arr != NULL) {
> - vfree((char *) sg_dev_arr);
> + kfree((char *) sg_dev_arr);
> sg_dev_arr = NULL;
> }
> sg_dev_max = 0;
> @@ -2472,7 +2493,7 @@
>
> for (srp = sfp->headrp; srp; srp = tsrp) {
> tsrp = srp->nextrp;
> - if (srp->done)
> + if (sg_srp_done(srp, sfp))
> sg_finish_rem_req(srp);
> else
> ++dirty;
> @@ -2492,7 +2513,7 @@
> }
> if (k < maxd)
> sg_dev_arr[k] = NULL;
> - vfree((char *) sdp);
> + kfree((char *) sdp);
> res = 1;
> }
> write_unlock_irqrestore(&sg_dev_arr_lock, iflags);
--
Brian King
eServer Storage I/O
IBM Linux Technology Center
prev parent reply other threads:[~2004-08-31 20:18 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-06-04 15:34 [PATCH] sg_cmd_done - another oops Brian King
2004-06-05 12:57 ` Guennadi Liakhovetski
2004-06-06 8:49 ` Jens Axboe
2004-06-06 14:32 ` Guennadi Liakhovetski
2004-06-07 2:57 ` Douglas Gilbert
2004-06-09 14:44 ` Brian King
2004-08-31 20:16 ` Brian King [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4134DC9C.5010100@us.ibm.com \
--to=brking@us.ibm.com \
--cc=James.Bottomley@steeleye.com \
--cc=dougg@torque.net \
--cc=linux-scsi@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox