From mboxrd@z Thu Jan 1 00:00:00 1970 From: Douglas Gilbert Subject: Re: [PATCH] sg_cmd_done - another oops Date: Mon, 07 Jun 2004 12:57:12 +1000 Sender: linux-scsi-owner@vger.kernel.org Message-ID: <40C3D988.3090709@torque.net> References: <40C0968E.4090309@us.ibm.com> Reply-To: dougg@torque.net Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------070702020208000305030403" Return-path: Received: from bunyip.cc.uq.edu.au ([130.102.2.1]:48392 "EHLO bunyip.cc.uq.edu.au") by vger.kernel.org with ESMTP id S264329AbUFGHCj (ORCPT ); Mon, 7 Jun 2004 03:02:39 -0400 In-Reply-To: <40C0968E.4090309@us.ibm.com> List-Id: linux-scsi@vger.kernel.org To: Brian King Cc: James.Bottomley@steeleye.com, linux-scsi@vger.kernel.org This is a multi-part message in MIME format. --------------070702020208000305030403 Content-Type: text/plain; charset=us-ascii; format=flowed Content-Transfer-Encoding: 7bit Brian King wrote: > The following patch fixes a race condition in sg > of sg_cmd_done racing with sg_release. I've seen this > bug hit several times on test machines and the following > patch fixes it. The race is that if srp->done is set > and the waiting thread gets a spurious wakeup immediately > afterwards, then the waiting thread can end up executing > and completing, then getting closed, freeing sfp before > the wake_up_interruptible is called, which then will result > in an oops. The oops is fixed by locking around the > setting srp->done to 1 and the wake_up, and also locking > around the checking of srp->done, which guarantees that > the wake_up_interruptible will always occur before the > sleeping thread gets a chance to run. Brian, Thanks for this analysis; I'm still trying to get my head around it :-) These "spurious wakeups" are a worry. Does your testing produce them or do they occur randomly? The critical part of your patch seems to be the write lock in sg_cmd_done() making sure srp->done is set _and_ wake_up_interruptible() gets executed before the __wait_event_interruptible macro in sg_read() or sg_remove(_sfp) can proceed. I thought about making srp->done atomic (of type atomic_t) but that does not seem to be sufficient. So I'm happy for James to apply this patch. An attempt to consolidate... Almost all of the changes to sg that were discussed in the "2.6.6-rc3 ia64 smp_call_function() called with interrupts disabled" thread (about a month ago) are now in lk 2.6.7-rc2. The janitors have also been at work with "__user" type modifiers. Some changes that Patrick Mansfield proposed are not in rc2: - up max number of devices to 32K - revert vmalloc()s to kmalloc()s since the former are problematic in big configurations The attached patch merges Brian patches with those proposed by Patrick. The patch is against lk 2.6.7-rc2 (and sg hasn't changed between rc2 and rc2-bk5). Doug Gilbert --------------070702020208000305030403 Content-Type: text/x-patch; name="sg267rc2.diff" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="sg267rc2.diff" --- linux/drivers/scsi/sg.c 2004-06-03 12:45:12.000000000 +1000 +++ linux/drivers/scsi/sg.c267rc2bk1pl 2004-06-07 12:29:52.879588688 +1000 @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -60,7 +59,7 @@ #ifdef CONFIG_SCSI_PROC_FS #include -static char *sg_version_date = "20040513"; +static char *sg_version_date = "20040607"; static int sg_proc_init(void); static void sg_proc_cleanup(void); @@ -73,7 +72,7 @@ #define SG_ALLOW_DIO_DEF 0 #define SG_ALLOW_DIO_CODE /* compile out by commenting this define */ -#define SG_MAX_DEVS 8192 +#define SG_MAX_DEVS 32768 /* * Suppose you want to calculate the formula muldiv(x,m,d)=int(x * m / d) @@ -723,6 +722,18 @@ } static int +sg_srp_done(Sg_request *srp, Sg_fd *sfp) +{ + unsigned long iflags; + int done; + + read_lock_irqsave(&sfp->rq_list_lock, iflags); + done = srp->done; + read_unlock_irqrestore(&sfp->rq_list_lock, iflags); + return done; +} + +static int sg_ioctl(struct inode *inode, struct file *filp, unsigned int cmd_in, unsigned long arg) { @@ -761,7 +772,7 @@ while (1) { result = 0; /* following macro to beat race condition */ __wait_event_interruptible(sfp->read_wait, - (sdp->detached || sfp->closed || srp->done), + (sdp->detached || sfp->closed || sg_srp_done(srp, sfp)), result); if (sdp->detached) return -ENODEV; @@ -772,7 +783,9 @@ srp->orphan = 1; return result; /* -ERESTARTSYS because signal hit process */ } + write_lock_irqsave(&sfp->rq_list_lock, iflags); srp->done = 2; + write_unlock_irqrestore(&sfp->rq_list_lock, iflags); result = sg_new_read(sfp, p, SZ_SG_IO_HDR, srp); return (result < 0) ? result : 0; } @@ -1215,6 +1228,7 @@ Sg_device *sdp = NULL; Sg_fd *sfp; Sg_request *srp = NULL; + unsigned long iflags; if (SCpnt && (SRpnt = SCpnt->sc_request)) srp = (Sg_request *) SRpnt->upper_private_data; @@ -1303,8 +1317,10 @@ if (sfp && srp) { /* Now wake up any sg_read() that is waiting for this packet. */ kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); + write_lock_irqsave(&sfp->rq_list_lock, iflags); srp->done = 1; wake_up_interruptible(&sfp->read_wait); + write_unlock_irqrestore(&sfp->rq_list_lock, iflags); } } @@ -1331,17 +1347,18 @@ void *old_sg_dev_arr = NULL; int k, error; - sdp = vmalloc(sizeof(Sg_device)); - if (!sdp) + sdp = kmalloc(sizeof(Sg_device), GFP_KERNEL); + if (!sdp) { + printk(KERN_WARNING "sg_alloc: kmalloc Sg_device failure\n"); return -ENOMEM; - + } write_lock_irqsave(&sg_dev_arr_lock, iflags); if (unlikely(sg_nr_dev >= sg_dev_max)) { /* try to resize */ Sg_device **tmp_da; int tmp_dev_max = sg_nr_dev + SG_DEV_ARR_LUMP; write_unlock_irqrestore(&sg_dev_arr_lock, iflags); - tmp_da = vmalloc(tmp_dev_max * sizeof(Sg_device *)); + tmp_da = kmalloc(tmp_dev_max * sizeof(Sg_device *), GFP_KERNEL); if (unlikely(!tmp_da)) goto expand_failed; @@ -1375,8 +1392,8 @@ out: if (error < 0) - vfree(sdp); - vfree(old_sg_dev_arr); + kfree(sdp); + kfree(old_sg_dev_arr); return error; expand_failed: @@ -1404,14 +1421,18 @@ int error, k; disk = alloc_disk(1); - if (!disk) + if (!disk) { + printk(KERN_WARNING "alloc_disk failed\n"); return -ENOMEM; + } disk->major = SCSI_GENERIC_MAJOR; error = -ENOMEM; cdev = cdev_alloc(); - if (!cdev) + if (!cdev) { + printk(KERN_WARNING "cdev_alloc failed\n"); goto out; + } cdev->owner = THIS_MODULE; cdev->ops = &sg_fops; @@ -1490,7 +1511,7 @@ tsfp = sfp->nextfp; for (srp = sfp->headrp; srp; srp = tsrp) { tsrp = srp->nextrp; - if (sfp->closed || (0 == srp->done)) + if (sfp->closed || (0 == sg_srp_done(srp, sfp))) sg_finish_rem_req(srp); } if (sfp->closed) { @@ -1525,7 +1546,7 @@ put_disk(sdp->disk); sdp->disk = NULL; if (NULL == sdp->headfp) - vfree((char *) sdp); + kfree((char *) sdp); } if (delay) @@ -1590,7 +1611,7 @@ unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS); if (sg_dev_arr != NULL) { - vfree((char *) sg_dev_arr); + kfree((char *) sg_dev_arr); sg_dev_arr = NULL; } sg_dev_max = 0; @@ -2472,7 +2493,7 @@ for (srp = sfp->headrp; srp; srp = tsrp) { tsrp = srp->nextrp; - if (srp->done) + if (sg_srp_done(srp, sfp)) sg_finish_rem_req(srp); else ++dirty; @@ -2492,7 +2513,7 @@ } if (k < maxd) sg_dev_arr[k] = NULL; - vfree((char *) sdp); + kfree((char *) sdp); res = 1; } write_unlock_irqrestore(&sg_dev_arr_lock, iflags); --------------070702020208000305030403--