From: Douglas Gilbert <dougg@torque.net>
To: Brian King <brking@us.ibm.com>
Cc: James.Bottomley@steeleye.com, linux-scsi@vger.kernel.org
Subject: Re: [PATCH] sg_cmd_done - another oops
Date: Mon, 07 Jun 2004 12:57:12 +1000 [thread overview]
Message-ID: <40C3D988.3090709@torque.net> (raw)
In-Reply-To: <40C0968E.4090309@us.ibm.com>
[-- Attachment #1: Type: text/plain, Size: 1963 bytes --]
Brian King wrote:
> The following patch fixes a race condition in sg
> of sg_cmd_done racing with sg_release. I've seen this
> bug hit several times on test machines and the following
> patch fixes it. The race is that if srp->done is set
> and the waiting thread gets a spurious wakeup immediately
> afterwards, then the waiting thread can end up executing
> and completing, then getting closed, freeing sfp before
> the wake_up_interruptible is called, which then will result
> in an oops. The oops is fixed by locking around the
> setting srp->done to 1 and the wake_up, and also locking
> around the checking of srp->done, which guarantees that
> the wake_up_interruptible will always occur before the
> sleeping thread gets a chance to run.
Brian,
Thanks for this analysis; I'm still trying to get my head
around it :-) These "spurious wakeups" are a worry. Does
your testing produce them or do they occur randomly?
The critical part of your patch seems to be the write
lock in sg_cmd_done() making sure srp->done is set _and_
wake_up_interruptible() gets executed before the
__wait_event_interruptible macro in sg_read() or
sg_remove(_sfp) can proceed.
I thought about making srp->done atomic (of type atomic_t)
but that does not seem to be sufficient. So I'm happy for
James to apply this patch.
An attempt to consolidate... Almost all of the changes to
sg that were discussed in the "2.6.6-rc3 ia64
smp_call_function() called with interrupts disabled" thread
(about a month ago) are now in lk 2.6.7-rc2. The janitors
have also been at work with "__user" type modifiers.
Some changes that Patrick Mansfield proposed are not in
rc2:
- up max number of devices to 32K
- revert vmalloc()s to kmalloc()s since the former are
problematic in big configurations
The attached patch merges Brian patches with those proposed
by Patrick. The patch is against lk 2.6.7-rc2 (and sg hasn't
changed between rc2 and rc2-bk5).
Doug Gilbert
[-- Attachment #2: sg267rc2.diff --]
[-- Type: text/x-patch, Size: 4732 bytes --]
--- linux/drivers/scsi/sg.c 2004-06-03 12:45:12.000000000 +1000
+++ linux/drivers/scsi/sg.c267rc2bk1pl 2004-06-07 12:29:52.879588688 +1000
@@ -42,7 +42,6 @@
#include <linux/fcntl.h>
#include <linux/init.h>
#include <linux/poll.h>
-#include <linux/vmalloc.h>
#include <linux/smp_lock.h>
#include <linux/moduleparam.h>
#include <linux/devfs_fs_kernel.h>
@@ -60,7 +59,7 @@
#ifdef CONFIG_SCSI_PROC_FS
#include <linux/proc_fs.h>
-static char *sg_version_date = "20040513";
+static char *sg_version_date = "20040607";
static int sg_proc_init(void);
static void sg_proc_cleanup(void);
@@ -73,7 +72,7 @@
#define SG_ALLOW_DIO_DEF 0
#define SG_ALLOW_DIO_CODE /* compile out by commenting this define */
-#define SG_MAX_DEVS 8192
+#define SG_MAX_DEVS 32768
/*
* Suppose you want to calculate the formula muldiv(x,m,d)=int(x * m / d)
@@ -723,6 +722,18 @@
}
static int
+sg_srp_done(Sg_request *srp, Sg_fd *sfp)
+{
+ unsigned long iflags;
+ int done;
+
+ read_lock_irqsave(&sfp->rq_list_lock, iflags);
+ done = srp->done;
+ read_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+ return done;
+}
+
+static int
sg_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd_in, unsigned long arg)
{
@@ -761,7 +772,7 @@
while (1) {
result = 0; /* following macro to beat race condition */
__wait_event_interruptible(sfp->read_wait,
- (sdp->detached || sfp->closed || srp->done),
+ (sdp->detached || sfp->closed || sg_srp_done(srp, sfp)),
result);
if (sdp->detached)
return -ENODEV;
@@ -772,7 +783,9 @@
srp->orphan = 1;
return result; /* -ERESTARTSYS because signal hit process */
}
+ write_lock_irqsave(&sfp->rq_list_lock, iflags);
srp->done = 2;
+ write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
result = sg_new_read(sfp, p, SZ_SG_IO_HDR, srp);
return (result < 0) ? result : 0;
}
@@ -1215,6 +1228,7 @@
Sg_device *sdp = NULL;
Sg_fd *sfp;
Sg_request *srp = NULL;
+ unsigned long iflags;
if (SCpnt && (SRpnt = SCpnt->sc_request))
srp = (Sg_request *) SRpnt->upper_private_data;
@@ -1303,8 +1317,10 @@
if (sfp && srp) {
/* Now wake up any sg_read() that is waiting for this packet. */
kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN);
+ write_lock_irqsave(&sfp->rq_list_lock, iflags);
srp->done = 1;
wake_up_interruptible(&sfp->read_wait);
+ write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
}
}
@@ -1331,17 +1347,18 @@
void *old_sg_dev_arr = NULL;
int k, error;
- sdp = vmalloc(sizeof(Sg_device));
- if (!sdp)
+ sdp = kmalloc(sizeof(Sg_device), GFP_KERNEL);
+ if (!sdp) {
+ printk(KERN_WARNING "sg_alloc: kmalloc Sg_device failure\n");
return -ENOMEM;
-
+ }
write_lock_irqsave(&sg_dev_arr_lock, iflags);
if (unlikely(sg_nr_dev >= sg_dev_max)) { /* try to resize */
Sg_device **tmp_da;
int tmp_dev_max = sg_nr_dev + SG_DEV_ARR_LUMP;
write_unlock_irqrestore(&sg_dev_arr_lock, iflags);
- tmp_da = vmalloc(tmp_dev_max * sizeof(Sg_device *));
+ tmp_da = kmalloc(tmp_dev_max * sizeof(Sg_device *), GFP_KERNEL);
if (unlikely(!tmp_da))
goto expand_failed;
@@ -1375,8 +1392,8 @@
out:
if (error < 0)
- vfree(sdp);
- vfree(old_sg_dev_arr);
+ kfree(sdp);
+ kfree(old_sg_dev_arr);
return error;
expand_failed:
@@ -1404,14 +1421,18 @@
int error, k;
disk = alloc_disk(1);
- if (!disk)
+ if (!disk) {
+ printk(KERN_WARNING "alloc_disk failed\n");
return -ENOMEM;
+ }
disk->major = SCSI_GENERIC_MAJOR;
error = -ENOMEM;
cdev = cdev_alloc();
- if (!cdev)
+ if (!cdev) {
+ printk(KERN_WARNING "cdev_alloc failed\n");
goto out;
+ }
cdev->owner = THIS_MODULE;
cdev->ops = &sg_fops;
@@ -1490,7 +1511,7 @@
tsfp = sfp->nextfp;
for (srp = sfp->headrp; srp; srp = tsrp) {
tsrp = srp->nextrp;
- if (sfp->closed || (0 == srp->done))
+ if (sfp->closed || (0 == sg_srp_done(srp, sfp)))
sg_finish_rem_req(srp);
}
if (sfp->closed) {
@@ -1525,7 +1546,7 @@
put_disk(sdp->disk);
sdp->disk = NULL;
if (NULL == sdp->headfp)
- vfree((char *) sdp);
+ kfree((char *) sdp);
}
if (delay)
@@ -1590,7 +1611,7 @@
unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0),
SG_MAX_DEVS);
if (sg_dev_arr != NULL) {
- vfree((char *) sg_dev_arr);
+ kfree((char *) sg_dev_arr);
sg_dev_arr = NULL;
}
sg_dev_max = 0;
@@ -2472,7 +2493,7 @@
for (srp = sfp->headrp; srp; srp = tsrp) {
tsrp = srp->nextrp;
- if (srp->done)
+ if (sg_srp_done(srp, sfp))
sg_finish_rem_req(srp);
else
++dirty;
@@ -2492,7 +2513,7 @@
}
if (k < maxd)
sg_dev_arr[k] = NULL;
- vfree((char *) sdp);
+ kfree((char *) sdp);
res = 1;
}
write_unlock_irqrestore(&sg_dev_arr_lock, iflags);
next prev parent reply other threads:[~2004-06-07 7:02 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-06-04 15:34 [PATCH] sg_cmd_done - another oops Brian King
2004-06-05 12:57 ` Guennadi Liakhovetski
2004-06-06 8:49 ` Jens Axboe
2004-06-06 14:32 ` Guennadi Liakhovetski
2004-06-07 2:57 ` Douglas Gilbert [this message]
2004-06-09 14:44 ` Brian King
2004-08-31 20:16 ` Brian King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=40C3D988.3090709@torque.net \
--to=dougg@torque.net \
--cc=James.Bottomley@steeleye.com \
--cc=brking@us.ibm.com \
--cc=linux-scsi@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox