* [PATCH 1/2] [RFC] block: replace BKL with global mutex
@ 2010-04-14 20:36 Arnd Bergmann
2010-04-14 22:48 ` Douglas Gilbert
0 siblings, 1 reply; 10+ messages in thread
From: Arnd Bergmann @ 2010-04-14 20:36 UTC (permalink / raw)
Cc: Arnd Bergmann, Jens Axboe, Vivek Goyal, Tejun Heo,
Frederic Weisbecker, FUJITA Tomonori, Martin K. Petersen,
Steven Rostedt, Ingo Molnar, John Kacur, linux-scsi, linux-kernel
The block layer is one of the remaining users
of the Big Kernel Lock. In there, it is used
mainly for serializing the blkdev_get and
blkdev_put functions and some ioctl implementations
as well as some less common open functions of
related character devices following a previous
pushdown and parts of the blktrace code.
The only one that seems to be a bit nasty is the
blkdev_get function which is actually recursive
and may try to get the BKL twice.
All users except the one in blkdev_get seem to
be outermost locks, meaning we don't rely on
the release-on-sleep semantics to avoid deadlocks.
The ctl_mutex (pktcdvd.ko), raw_mutex (raw.ko),
state_mutex (dasd.ko), reconfig_mutex (md.ko),
and jfs_log_mutex (jfs.ko) may be held when
blkdev_get is called, but as far as I can tell,
these mutexes are never acquired from any of the
functions that get converted in this patch.
In order to get rid of the BKL, this introduces
a new global mutex called blkdev_mutex, which
replaces the BKL in all drivers that directly
interact with the block layer. In case of blkdev_get,
the mutex is moved outside of the function itself
in order to avoid the recursive taking of blkdev_mutex.
Testing so far has shown no problems whatsoever
from this patch, but the usage in blkdev_get
may introduce extra latencies, and I may have
missed corner cases where an block device ioctl
function sleeps for a significant amount of time,
which may be harmful to the performance of other
threads.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: linux-scsi@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
block/bsg.c | 2 --
block/compat_ioctl.c | 8 ++++----
block/ioctl.c | 24 ++++++++++++------------
drivers/block/DAC960.c | 4 ++--
drivers/block/cciss.c | 4 ++--
drivers/block/paride/pg.c | 4 ++--
drivers/block/paride/pt.c | 16 ++++++++--------
drivers/scsi/sg.c | 22 ++++++++++++++++------
fs/block_dev.c | 20 +++++++++++++-------
include/linux/blkdev.h | 6 ++++++
kernel/trace/blktrace.c | 14 ++++++++------
11 files changed, 73 insertions(+), 51 deletions(-)
diff --git a/block/bsg.c b/block/bsg.c
index 82d5882..7c50a26 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -843,9 +843,7 @@ static int bsg_open(struct inode *inode, struct file *file)
{
struct bsg_device *bd;
- lock_kernel();
bd = bsg_get_device(inode, file);
- unlock_kernel();
if (IS_ERR(bd))
return PTR_ERR(bd);
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index f26051f..11cfd3c 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -802,16 +802,16 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return compat_put_u64(arg, bdev->bd_inode->i_size);
case BLKTRACESETUP32:
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
ret = compat_blk_trace_setup(bdev, compat_ptr(arg));
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
return ret;
case BLKTRACESTART: /* compatible */
case BLKTRACESTOP: /* compatible */
case BLKTRACETEARDOWN: /* compatible */
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
ret = blk_trace_ioctl(bdev, cmd, compat_ptr(arg));
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
return ret;
default:
if (disk->fops->compat_ioctl)
diff --git a/block/ioctl.c b/block/ioctl.c
index 8905d2a..272cdce 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -169,9 +169,9 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
return disk->fops->ioctl(bdev, mode, cmd, arg);
if (disk->fops->locked_ioctl) {
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
ret = disk->fops->locked_ioctl(bdev, mode, cmd, arg);
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
return ret;
}
@@ -206,10 +206,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
if (ret != -EINVAL && ret != -ENOTTY)
return ret;
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
fsync_bdev(bdev);
invalidate_bdev(bdev);
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
return 0;
case BLKROSET:
@@ -221,9 +221,9 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
return -EACCES;
if (get_user(n, (int __user *)(arg)))
return -EFAULT;
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
set_device_ro(bdev, n);
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
return 0;
case BLKDISCARD: {
@@ -309,14 +309,14 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
bd_release(bdev);
return ret;
case BLKPG:
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg);
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
break;
case BLKRRPART:
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
ret = blkdev_reread_part(bdev);
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
break;
case BLKGETSIZE:
size = bdev->bd_inode->i_size;
@@ -329,9 +329,9 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKTRACESTOP:
case BLKTRACESETUP:
case BLKTRACETEARDOWN:
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
ret = blk_trace_ioctl(bdev, cmd, (char __user *) arg);
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
break;
default:
ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index c5f22bb..8faaa12 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -6620,7 +6620,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
long ErrorCode = 0;
if (!capable(CAP_SYS_ADMIN)) return -EACCES;
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
switch (Request)
{
case DAC960_IOCTL_GET_CONTROLLER_COUNT:
@@ -7051,7 +7051,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
default:
ErrorCode = -ENOTTY;
}
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
return ErrorCode;
}
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index eb5ff05..0e21a9d 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1027,9 +1027,9 @@ static int do_ioctl(struct block_device *bdev, fmode_t mode,
unsigned cmd, unsigned long arg)
{
int ret;
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
ret = cciss_ioctl(bdev, mode, cmd, arg);
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
return ret;
}
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index c397b3d..81660d0 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -518,7 +518,7 @@ static int pg_open(struct inode *inode, struct file *file)
struct pg *dev = &devices[unit];
int ret = 0;
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
if ((unit >= PG_UNITS) || (!dev->present)) {
ret = -ENODEV;
goto out;
@@ -547,7 +547,7 @@ static int pg_open(struct inode *inode, struct file *file)
file->private_data = dev;
out:
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
return ret;
}
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index bc5825f..05e272e 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -650,9 +650,9 @@ static int pt_open(struct inode *inode, struct file *file)
struct pt_unit *tape = pt + unit;
int err;
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
if (unit >= PT_UNITS || (!tape->present)) {
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
return -ENODEV;
}
@@ -681,12 +681,12 @@ static int pt_open(struct inode *inode, struct file *file)
}
file->private_data = tape;
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
return 0;
out:
atomic_inc(&tape->available);
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
return err;
}
@@ -704,15 +704,15 @@ static long pt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
switch (mtop.mt_op) {
case MTREW:
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
pt_rewind(tape);
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
return 0;
case MTWEOF:
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
pt_write_fm(tape);
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
return 0;
default:
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index dee1c96..ec5b43e 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -229,7 +229,7 @@ sg_open(struct inode *inode, struct file *filp)
int res;
int retval;
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
nonseekable_open(inode, filp);
SCSI_LOG_TIMEOUT(3, printk("sg_open: dev=%d, flags=0x%x\n", dev, flags));
sdp = sg_get_dev(dev);
@@ -307,7 +307,7 @@ error_out:
sg_put:
if (sdp)
sg_put_dev(sdp);
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
return retval;
}
@@ -757,9 +757,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
return 0;
}
-static int
-sg_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd_in, unsigned long arg)
+static long sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
{
void __user *p = (void __user *)arg;
int __user *ip = p;
@@ -1078,6 +1076,17 @@ sg_ioctl(struct inode *inode, struct file *filp,
}
}
+static long sg_unlocked_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
+{
+ int ret;
+
+ mutex_lock(&blkdev_mutex);
+ ret = sg_ioctl(filp, cmd_in, arg);
+ mutex_unlock(&blkdev_mutex);
+
+ return ret;
+}
+
#ifdef CONFIG_COMPAT
static long sg_compat_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
{
@@ -1322,7 +1331,8 @@ static const struct file_operations sg_fops = {
.read = sg_read,
.write = sg_write,
.poll = sg_poll,
- .ioctl = sg_ioctl,
+ .llseek = generic_file_llseek,
+ .unlocked_ioctl = sg_unlocked_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = sg_compat_ioctl,
#endif
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2a6d019..f43d24f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -29,6 +29,9 @@
#include <asm/uaccess.h>
#include "internal.h"
+DEFINE_MUTEX(blkdev_mutex);
+EXPORT_SYMBOL_GPL(blkdev_mutex);
+
struct bdev_inode {
struct block_device bdev;
struct inode vfs_inode;
@@ -1191,7 +1194,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
return ret;
}
- lock_kernel();
restart:
ret = -ENXIO;
@@ -1277,7 +1279,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
if (for_part)
bdev->bd_part_count++;
mutex_unlock(&bdev->bd_mutex);
- unlock_kernel();
return 0;
out_clear:
@@ -1291,7 +1292,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
out_unlock_kernel:
- unlock_kernel();
if (disk)
module_put(disk->fops->owner);
@@ -1303,7 +1303,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
int blkdev_get(struct block_device *bdev, fmode_t mode)
{
- return __blkdev_get(bdev, mode, 0);
+ int ret;
+ mutex_lock(&blkdev_mutex);
+ ret = __blkdev_get(bdev, mode, 0);
+ mutex_unlock(&blkdev_mutex);
+ return ret;
}
EXPORT_SYMBOL(blkdev_get);
@@ -1357,7 +1361,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
struct block_device *victim = NULL;
mutex_lock_nested(&bdev->bd_mutex, for_part);
- lock_kernel();
if (for_part)
bdev->bd_part_count--;
@@ -1382,7 +1385,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
victim = bdev->bd_contains;
bdev->bd_contains = NULL;
}
- unlock_kernel();
mutex_unlock(&bdev->bd_mutex);
bdput(bdev);
if (victim)
@@ -1392,7 +1394,11 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
int blkdev_put(struct block_device *bdev, fmode_t mode)
{
- return __blkdev_put(bdev, mode, 0);
+ int ret;
+ mutex_lock(&blkdev_mutex);
+ ret = __blkdev_put(bdev, mode, 0);
+ mutex_unlock(&blkdev_mutex);
+ return ret;
}
EXPORT_SYMBOL(blkdev_put);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6690e8b..7810a2d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1292,6 +1292,12 @@ struct block_device_operations {
extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
unsigned long);
+
+/*
+ * replaces the big kernel lock in the block subsystem
+ */
+extern struct mutex blkdev_mutex;
+
#else /* CONFIG_BLOCK */
/*
* stubs for when the block layer is configured out
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b3bc91a..5012fe1 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -305,7 +305,7 @@ static int blk_dropped_open(struct inode *inode, struct file *filp)
{
filp->private_data = inode->i_private;
- return 0;
+ return nonseekable_open(inode, filp);
}
static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
@@ -323,13 +323,14 @@ static const struct file_operations blk_dropped_fops = {
.owner = THIS_MODULE,
.open = blk_dropped_open,
.read = blk_dropped_read,
+ .llseek = no_llseek,
};
static int blk_msg_open(struct inode *inode, struct file *filp)
{
filp->private_data = inode->i_private;
- return 0;
+ return nonseekable_open(inode, filp);
}
static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
@@ -362,6 +363,7 @@ static const struct file_operations blk_msg_fops = {
.owner = THIS_MODULE,
.open = blk_msg_open,
.write = blk_msg_write,
+ .llseek = no_llseek,
};
/*
@@ -1581,7 +1583,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct block_device *bdev;
ssize_t ret = -ENXIO;
- lock_kernel();
+ mutex_lock(&blkdev_mutex);
bdev = bdget(part_devt(p));
if (bdev == NULL)
goto out_unlock_kernel;
@@ -1613,7 +1615,7 @@ out_unlock_bdev:
out_bdput:
bdput(bdev);
out_unlock_kernel:
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
return ret;
}
@@ -1643,7 +1645,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
ret = -ENXIO;
- lock_kernel();
+ mutex_unlock(&blkdev_mutex);
p = dev_to_part(dev);
bdev = bdget(part_devt(p));
if (bdev == NULL)
@@ -1683,7 +1685,7 @@ out_unlock_bdev:
out_bdput:
bdput(bdev);
out_unlock_kernel:
- unlock_kernel();
+ mutex_unlock(&blkdev_mutex);
out:
return ret ? ret : count;
}
--
1.7.0.4
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2] [RFC] block: replace BKL with global mutex
2010-04-14 20:36 [PATCH 1/2] [RFC] block: replace BKL with global mutex Arnd Bergmann
@ 2010-04-14 22:48 ` Douglas Gilbert
2010-04-15 7:11 ` Arnd Bergmann
0 siblings, 1 reply; 10+ messages in thread
From: Douglas Gilbert @ 2010-04-14 22:48 UTC (permalink / raw)
To: Arnd Bergmann
Cc: Jens Axboe, Vivek Goyal, Tejun Heo, Frederic Weisbecker,
FUJITA Tomonori, Martin K. Petersen, Steven Rostedt, Ingo Molnar,
John Kacur, linux-scsi, linux-kernel
Arnd Bergmann wrote:
> The block layer is one of the remaining users
> of the Big Kernel Lock. In there, it is used
> mainly for serializing the blkdev_get and
> blkdev_put functions and some ioctl implementations
> as well as some less common open functions of
> related character devices following a previous
> pushdown and parts of the blktrace code.
>
> The only one that seems to be a bit nasty is the
> blkdev_get function which is actually recursive
> and may try to get the BKL twice.
>
> All users except the one in blkdev_get seem to
> be outermost locks, meaning we don't rely on
> the release-on-sleep semantics to avoid deadlocks.
>
> The ctl_mutex (pktcdvd.ko), raw_mutex (raw.ko),
> state_mutex (dasd.ko), reconfig_mutex (md.ko),
> and jfs_log_mutex (jfs.ko) may be held when
> blkdev_get is called, but as far as I can tell,
> these mutexes are never acquired from any of the
> functions that get converted in this patch.
>
> In order to get rid of the BKL, this introduces
> a new global mutex called blkdev_mutex, which
> replaces the BKL in all drivers that directly
> interact with the block layer. In case of blkdev_get,
> the mutex is moved outside of the function itself
> in order to avoid the recursive taking of blkdev_mutex.
>
> Testing so far has shown no problems whatsoever
> from this patch, but the usage in blkdev_get
> may introduce extra latencies, and I may have
> missed corner cases where an block device ioctl
> function sleeps for a significant amount of time,
> which may be harmful to the performance of other
> threads.
<snip>
> diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
> index dee1c96..ec5b43e 100644
> --- a/drivers/scsi/sg.c
> +++ b/drivers/scsi/sg.c
> @@ -229,7 +229,7 @@ sg_open(struct inode *inode, struct file *filp)
> int res;
> int retval;
>
> - lock_kernel();
> + mutex_lock(&blkdev_mutex);
> nonseekable_open(inode, filp);
> SCSI_LOG_TIMEOUT(3, printk("sg_open: dev=%d, flags=0x%x\n", dev, flags));
> sdp = sg_get_dev(dev);
> @@ -307,7 +307,7 @@ error_out:
> sg_put:
> if (sdp)
> sg_put_dev(sdp);
> - unlock_kernel();
> + mutex_unlock(&blkdev_mutex);
> return retval;
> }
>
> @@ -757,9 +757,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
> return 0;
> }
>
> -static int
> -sg_ioctl(struct inode *inode, struct file *filp,
> - unsigned int cmd_in, unsigned long arg)
> +static long sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
> {
> void __user *p = (void __user *)arg;
> int __user *ip = p;
> @@ -1078,6 +1076,17 @@ sg_ioctl(struct inode *inode, struct file *filp,
> }
> }
>
> +static long sg_unlocked_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
> +{
> + int ret;
> +
> + mutex_lock(&blkdev_mutex);
> + ret = sg_ioctl(filp, cmd_in, arg);
> + mutex_unlock(&blkdev_mutex);
> +
> + return ret;
> +}
> +
> #ifdef CONFIG_COMPAT
> static long sg_compat_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
> {
> @@ -1322,7 +1331,8 @@ static const struct file_operations sg_fops = {
> .read = sg_read,
> .write = sg_write,
> .poll = sg_poll,
> - .ioctl = sg_ioctl,
> + .llseek = generic_file_llseek,
The sg driver has no seek semantics on its read() and
write() calls. And sg_open() calls nonseekable_open(). So
.llseek = no_llseek,
seems more appropriate.
> + .unlocked_ioctl = sg_unlocked_ioctl,
> #ifdef CONFIG_COMPAT
> .compat_ioctl = sg_compat_ioctl,
> #endif
And I just checked st.c (SCSI tape driver) and it calls
lock_kernel() .
Doug Gilbert
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2] [RFC] block: replace BKL with global mutex
2010-04-14 22:48 ` Douglas Gilbert
@ 2010-04-15 7:11 ` Arnd Bergmann
2010-04-15 13:15 ` Douglas Gilbert
0 siblings, 1 reply; 10+ messages in thread
From: Arnd Bergmann @ 2010-04-15 7:11 UTC (permalink / raw)
To: dgilbert
Cc: Jens Axboe, Vivek Goyal, Tejun Heo, Frederic Weisbecker,
FUJITA Tomonori, Martin K. Petersen, Steven Rostedt, Ingo Molnar,
John Kacur, linux-scsi, linux-kernel
On Thursday 15 April 2010 00:48:19 Douglas Gilbert wrote:
> > @@ -1322,7 +1331,8 @@ static const struct file_operations sg_fops = {
> > .read = sg_read,
> > .write = sg_write,
> > .poll = sg_poll,
> > - .ioctl = sg_ioctl,
> > + .llseek = generic_file_llseek,
>
> The sg driver has no seek semantics on its read() and
> write() calls. And sg_open() calls nonseekable_open(). So
> .llseek = no_llseek,
> seems more appropriate.
Ok, I missed the nonseekable_open here and assumed someone
might be calling seek on it. I'll use no_llseek then, or
just leave it alone.
> > + .unlocked_ioctl = sg_unlocked_ioctl,
> > #ifdef CONFIG_COMPAT
> > .compat_ioctl = sg_compat_ioctl,
> > #endif
>
> And I just checked st.c (SCSI tape driver) and it calls
> lock_kernel() .
Ah, good point. So even if the st driver does not need
any locking against the block layer, it might need to
lock its ioctl against sg.
The most simple solution for this would be to let sg
take both blkdev_mutex and the BKL, which of course
feels like a step backwards.
A better way is to get rid of the BKL in sg, which requires
a better understanding of what it's actually protecting.
It only gets it in the open and ioctl functions, which is a
result of the pushdown from the respective file operations.
Chances are that it's not needed at all, but that's really
hard to tell. Can you shed some more light on this?
Arnd
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2] [RFC] block: replace BKL with global mutex
2010-04-15 7:11 ` Arnd Bergmann
@ 2010-04-15 13:15 ` Douglas Gilbert
2010-04-15 14:29 ` Arnd Bergmann
0 siblings, 1 reply; 10+ messages in thread
From: Douglas Gilbert @ 2010-04-15 13:15 UTC (permalink / raw)
To: Arnd Bergmann
Cc: Jens Axboe, Vivek Goyal, Tejun Heo, Frederic Weisbecker,
FUJITA Tomonori, Martin K. Petersen, Steven Rostedt, Ingo Molnar,
John Kacur, linux-scsi, linux-kernel, Kai Makisara
Arnd Bergmann wrote:
> On Thursday 15 April 2010 00:48:19 Douglas Gilbert wrote:
>
>>> @@ -1322,7 +1331,8 @@ static const struct file_operations sg_fops = {
>>> .read = sg_read,
>>> .write = sg_write,
>>> .poll = sg_poll,
>>> - .ioctl = sg_ioctl,
>>> + .llseek = generic_file_llseek,
>> The sg driver has no seek semantics on its read() and
>> write() calls. And sg_open() calls nonseekable_open(). So
>> .llseek = no_llseek,
>> seems more appropriate.
>
> Ok, I missed the nonseekable_open here and assumed someone
> might be calling seek on it. I'll use no_llseek then, or
> just leave it alone.
>
>>> + .unlocked_ioctl = sg_unlocked_ioctl,
>>> #ifdef CONFIG_COMPAT
>>> .compat_ioctl = sg_compat_ioctl,
>>> #endif
>> And I just checked st.c (SCSI tape driver) and it calls
>> lock_kernel() .
>
> Ah, good point. So even if the st driver does not need
> any locking against the block layer, it might need to
> lock its ioctl against sg.
At the level of SCSI commands, tape device state assumptions
made by the st driver could be compromised by SCSI commands
sent by the sg driver. However the BKL was never meant
to address that concern.
From the comment in st_open() [st.c] it would be using
nonseekable_open() as well but there are apps out there
that do lseek()s on its file descriptors. Not sure
how long nonseekable_open() has been in the sg driver
but no-one has complained to me about it.
> The most simple solution for this would be to let sg
> take both blkdev_mutex and the BKL, which of course
> feels like a step backwards.
>
> A better way is to get rid of the BKL in sg, which requires
> a better understanding of what it's actually protecting.
> It only gets it in the open and ioctl functions, which is a
> result of the pushdown from the respective file operations.
> Chances are that it's not needed at all, but that's really
> hard to tell. Can you shed some more light on this?
The BKL is not used to protect any of the internal
objects within the sg driver. From memory it was added
in some large code sweep through, not unlike what you
are proposing now.
So I would not be concerned about any kernel locking
interactions between the sg and st drivers. I have
added Kai Makisara (st maintainer) to the cc list.
Doug Gilbert
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2] [RFC] block: replace BKL with global mutex
2010-04-15 13:15 ` Douglas Gilbert
@ 2010-04-15 14:29 ` Arnd Bergmann
2010-04-15 20:03 ` Kai Makisara
0 siblings, 1 reply; 10+ messages in thread
From: Arnd Bergmann @ 2010-04-15 14:29 UTC (permalink / raw)
To: dgilbert
Cc: Jens Axboe, Vivek Goyal, Tejun Heo, Frederic Weisbecker,
FUJITA Tomonori, Martin K. Petersen, Steven Rostedt, Ingo Molnar,
John Kacur, linux-scsi, linux-kernel, Kai Makisara
On Thursday 15 April 2010, Douglas Gilbert wrote:
> At the level of SCSI commands, tape device state assumptions
> made by the st driver could be compromised by SCSI commands
> sent by the sg driver. However the BKL was never meant
> to address that concern.
>
> From the comment in st_open() [st.c] it would be using
> nonseekable_open() as well but there are apps out there
> that do lseek()s on its file descriptors. Not sure
> how long nonseekable_open() has been in the sg driver
> but no-one has complained to me about it.
It's been there for a long time, at least since the start of
the git history, and it's very likely correct this way.
> > The most simple solution for this would be to let sg
> > take both blkdev_mutex and the BKL, which of course
> > feels like a step backwards.
> >
> > A better way is to get rid of the BKL in sg, which requires
> > a better understanding of what it's actually protecting.
> > It only gets it in the open and ioctl functions, which is a
> > result of the pushdown from the respective file operations.
> > Chances are that it's not needed at all, but that's really
> > hard to tell. Can you shed some more light on this?
>
> The BKL is not used to protect any of the internal
> objects within the sg driver. From memory it was added
> in some large code sweep through, not unlike what you
> are proposing now.
The one in the open function was moved there when the
BKL was moved out from vfs_open(), while the use in ioctl is
implicit by never having been converted to unlocked_ioctl.
I don't see anything that really needs BKL protection in
sg_open, so that can probably just be killed. For sg_ioctl,
at least the blk_trace_* and scsi_ioctl stuff is currently
called with BKL held everywhere else (not in st_ioctl though)
and may still need that.
> So I would not be concerned about any kernel locking
> interactions between the sg and st drivers. I have
> added Kai Makisara (st maintainer) to the cc list.
Ok. I've also checked st.c again and noticed that it
doesn't use use the BKL in ioctl() but only in open(),
which is very unlikely to race against anything in sg.c
or the block subsystem.
Arnd
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2] [RFC] block: replace BKL with global mutex
2010-04-15 14:29 ` Arnd Bergmann
@ 2010-04-15 20:03 ` Kai Makisara
2010-04-15 20:51 ` [PATCH] scsi/st: remove BKL from open Arnd Bergmann
0 siblings, 1 reply; 10+ messages in thread
From: Kai Makisara @ 2010-04-15 20:03 UTC (permalink / raw)
To: Arnd Bergmann
Cc: dgilbert, Jens Axboe, Vivek Goyal, Tejun Heo, Frederic Weisbecker,
FUJITA Tomonori, Martin K. Petersen, Steven Rostedt, Ingo Molnar,
John Kacur, linux-scsi, linux-kernel
On Thu, 15 Apr 2010, Arnd Bergmann wrote:
> On Thursday 15 April 2010, Douglas Gilbert wrote:
> > At the level of SCSI commands, tape device state assumptions
> > made by the st driver could be compromised by SCSI commands
> > sent by the sg driver. However the BKL was never meant
> > to address that concern.
> >
...
> > So I would not be concerned about any kernel locking
> > interactions between the sg and st drivers. I have
> > added Kai Makisara (st maintainer) to the cc list.
>
> Ok. I've also checked st.c again and noticed that it
> doesn't use use the BKL in ioctl() but only in open(),
> which is very unlikely to race against anything in sg.c
> or the block subsystem.
>
Using sg with a tape opened by st may lead to incorrect state within st.
However, to prevent this would be far too complicated and so the
coordination responsibility is left to the user. (There are some
cases where use of both sg and st can be justified but then the user
should know what he/she is doing and properly serialize access in user
space.)
BKL does not have any "hidden duties" in open() in st.c. I don't know any
reason why it would be needed, but, because I have not been absolutely
sure, I have not removed it. (The tape devices are not opened often and so
the overhead has been negligible. That is, while BKL has been available.)
If you don't see any reason for BKL in open(), go ahead and remove it.
Kai
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH] scsi/st: remove BKL from open
2010-04-15 20:03 ` Kai Makisara
@ 2010-04-15 20:51 ` Arnd Bergmann
2010-04-30 2:18 ` Frederic Weisbecker
0 siblings, 1 reply; 10+ messages in thread
From: Arnd Bergmann @ 2010-04-15 20:51 UTC (permalink / raw)
To: Kai Makisara
Cc: dgilbert, Jens Axboe, Vivek Goyal, Tejun Heo, Frederic Weisbecker,
FUJITA Tomonori, Martin K. Petersen, Steven Rostedt, Ingo Molnar,
John Kacur, linux-scsi, linux-kernel
The st_open function is serialized through the st_dev_arr_lock
and the STp->in_use flag, so there is no race that the BKL
can protect against in the driver itself, and the function
does not access any global state outside of the driver that
might be protected with the BKL.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
drivers/scsi/st.c | 9 +--------
1 files changed, 1 insertions(+), 8 deletions(-)
On Thursday 15 April 2010 22:03:24 Kai Makisara wrote:
> BKL does not have any "hidden duties" in open() in st.c. I don't know any
> reason why it would be needed, but, because I have not been absolutely
> sure, I have not removed it. (The tape devices are not opened often and so
> the overhead has been negligible. That is, while BKL has been available.)
> If you don't see any reason for BKL in open(), go ahead and remove it.
Ok, that certainly simplifies the sg.c conversion. Thanks!
Arnd
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 3ea1a71..b1462e0 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -39,7 +39,6 @@ static const char *verstr = "20081215";
#include <linux/cdev.h>
#include <linux/delay.h>
#include <linux/mutex.h>
-#include <linux/smp_lock.h>
#include <asm/uaccess.h>
#include <asm/dma.h>
@@ -1180,7 +1179,6 @@ static int st_open(struct inode *inode, struct file *filp)
int dev = TAPE_NR(inode);
char *name;
- lock_kernel();
/*
* We really want to do nonseekable_open(inode, filp); here, but some
* versions of tar incorrectly call lseek on tapes and bail out if that
@@ -1188,10 +1186,8 @@ static int st_open(struct inode *inode, struct file *filp)
*/
filp->f_mode &= ~(FMODE_PREAD | FMODE_PWRITE);
- if (!(STp = scsi_tape_get(dev))) {
- unlock_kernel();
+ if (!(STp = scsi_tape_get(dev)))
return -ENXIO;
- }
write_lock(&st_dev_arr_lock);
filp->private_data = STp;
@@ -1200,7 +1196,6 @@ static int st_open(struct inode *inode, struct file *filp)
if (STp->in_use) {
write_unlock(&st_dev_arr_lock);
scsi_tape_put(STp);
- unlock_kernel();
DEB( printk(ST_DEB_MSG "%s: Device already in use.\n", name); )
return (-EBUSY);
}
@@ -1249,14 +1244,12 @@ static int st_open(struct inode *inode, struct file *filp)
retval = (-EIO);
goto err_out;
}
- unlock_kernel();
return 0;
err_out:
normalize_buffer(STp->buffer);
STp->in_use = 0;
scsi_tape_put(STp);
- unlock_kernel();
return retval;
}
--
1.7.0.4
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH] scsi/st: remove BKL from open
2010-04-15 20:51 ` [PATCH] scsi/st: remove BKL from open Arnd Bergmann
@ 2010-04-30 2:18 ` Frederic Weisbecker
2010-04-30 19:03 ` Kai Makisara
0 siblings, 1 reply; 10+ messages in thread
From: Frederic Weisbecker @ 2010-04-30 2:18 UTC (permalink / raw)
To: Arnd Bergmann, Kai Makisara
Cc: dgilbert, Jens Axboe, Vivek Goyal, Tejun Heo, FUJITA Tomonori,
Martin K. Petersen, Steven Rostedt, Ingo Molnar, John Kacur,
linux-scsi, linux-kernel, James Bottomley
On Thu, Apr 15, 2010 at 10:51:38PM +0200, Arnd Bergmann wrote:
> The st_open function is serialized through the st_dev_arr_lock
> and the STp->in_use flag, so there is no race that the BKL
> can protect against in the driver itself, and the function
> does not access any global state outside of the driver that
> might be protected with the BKL.
>
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> ---
Kai, can we get your ack on this?
Thanks.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] scsi/st: remove BKL from open
2010-04-30 2:18 ` Frederic Weisbecker
@ 2010-04-30 19:03 ` Kai Makisara
0 siblings, 0 replies; 10+ messages in thread
From: Kai Makisara @ 2010-04-30 19:03 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Arnd Bergmann, dgilbert, Jens Axboe, Vivek Goyal, Tejun Heo,
FUJITA Tomonori, Martin K. Petersen, Steven Rostedt, Ingo Molnar,
John Kacur, linux-scsi, linux-kernel, James Bottomley
On Fri, 30 Apr 2010, Frederic Weisbecker wrote:
> On Thu, Apr 15, 2010 at 10:51:38PM +0200, Arnd Bergmann wrote:
> > The st_open function is serialized through the st_dev_arr_lock
> > and the STp->in_use flag, so there is no race that the BKL
> > can protect against in the driver itself, and the function
> > does not access any global state outside of the driver that
> > might be protected with the BKL.
> >
> > Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> > ---
>
>
>
> Kai, can we get your ack on this?
>
I am sorry, I have forgotten to send it.
Acked-by: Kai Makisara <kai.makisara@kolumbus.fi>
Thanks,
Kai
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH] scsi/st: remove BKL from open
@ 2010-06-01 21:16 Arnd Bergmann
0 siblings, 0 replies; 10+ messages in thread
From: Arnd Bergmann @ 2010-06-01 21:16 UTC (permalink / raw)
Cc: Arnd Bergmann, Willem Riede, Kai Mäkisara,
James E.J. Bottomley, osst-users, linux-scsi, linux-kernel
The st_open function is serialized through the st_dev_arr_lock
and the STp->in_use flag, so there is no race that the BKL
can protect against in the driver itself, and the function
does not access any global state outside of the driver that
might be protected with the BKL.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Willem Riede <osst@riede.org>
Cc: "Kai Mäkisara" <Kai.Makisara@kolumbus.fi>
Cc: "James E.J. Bottomley" <James.Bottomley@suse.de>
Cc: osst-users@lists.sourceforge.net
Cc: linux-scsi@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
drivers/scsi/st.c | 9 +--------
1 files changed, 1 insertions(+), 8 deletions(-)
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 24211d0..8d4c4bb 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -39,7 +39,6 @@ static const char *verstr = "20081215";
#include <linux/cdev.h>
#include <linux/delay.h>
#include <linux/mutex.h>
-#include <linux/smp_lock.h>
#include <asm/uaccess.h>
#include <asm/dma.h>
@@ -1180,7 +1179,6 @@ static int st_open(struct inode *inode, struct file *filp)
int dev = TAPE_NR(inode);
char *name;
- lock_kernel();
/*
* We really want to do nonseekable_open(inode, filp); here, but some
* versions of tar incorrectly call lseek on tapes and bail out if that
@@ -1188,10 +1186,8 @@ static int st_open(struct inode *inode, struct file *filp)
*/
filp->f_mode &= ~(FMODE_PREAD | FMODE_PWRITE);
- if (!(STp = scsi_tape_get(dev))) {
- unlock_kernel();
+ if (!(STp = scsi_tape_get(dev)))
return -ENXIO;
- }
write_lock(&st_dev_arr_lock);
filp->private_data = STp;
@@ -1200,7 +1196,6 @@ static int st_open(struct inode *inode, struct file *filp)
if (STp->in_use) {
write_unlock(&st_dev_arr_lock);
scsi_tape_put(STp);
- unlock_kernel();
DEB( printk(ST_DEB_MSG "%s: Device already in use.\n", name); )
return (-EBUSY);
}
@@ -1249,14 +1244,12 @@ static int st_open(struct inode *inode, struct file *filp)
retval = (-EIO);
goto err_out;
}
- unlock_kernel();
return 0;
err_out:
normalize_buffer(STp->buffer);
STp->in_use = 0;
scsi_tape_put(STp);
- unlock_kernel();
return retval;
}
--
1.7.0.4
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 10+ messages in thread
end of thread, other threads:[~2010-06-01 21:17 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-14 20:36 [PATCH 1/2] [RFC] block: replace BKL with global mutex Arnd Bergmann
2010-04-14 22:48 ` Douglas Gilbert
2010-04-15 7:11 ` Arnd Bergmann
2010-04-15 13:15 ` Douglas Gilbert
2010-04-15 14:29 ` Arnd Bergmann
2010-04-15 20:03 ` Kai Makisara
2010-04-15 20:51 ` [PATCH] scsi/st: remove BKL from open Arnd Bergmann
2010-04-30 2:18 ` Frederic Weisbecker
2010-04-30 19:03 ` Kai Makisara
-- strict thread matches above, loose matches on Subject: below --
2010-06-01 21:16 Arnd Bergmann
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox