public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] new bd_mutex lockdep annotation
@ 2006-09-13 17:43 Peter Zijlstra
  2006-09-13 17:43 ` [PATCH 1/2] remove the old " Peter Zijlstra
  2006-09-13 17:43 ` [PATCH 2/2] new " Peter Zijlstra
  0 siblings, 2 replies; 7+ messages in thread
From: Peter Zijlstra @ 2006-09-13 17:43 UTC (permalink / raw)
  To: linux-kernel
  Cc: Neil Brown, Ingo Molnar, Arjan van de Ven, Andrew Morton,
	Jason Baron, Peter Zijlstra

--

Andrew thought the old bd_mutex lockdep annotations were too complex, 
Arjan too said something like this several weeks ago and suggested I do
something with lockdep_set_class() instead of mutex_lock_nested().


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/2] remove the old bd_mutex lockdep annotation
  2006-09-13 17:43 [PATCH 0/2] new bd_mutex lockdep annotation Peter Zijlstra
@ 2006-09-13 17:43 ` Peter Zijlstra
  2006-09-13 17:43 ` [PATCH 2/2] new " Peter Zijlstra
  1 sibling, 0 replies; 7+ messages in thread
From: Peter Zijlstra @ 2006-09-13 17:43 UTC (permalink / raw)
  To: linux-kernel
  Cc: Neil Brown, Ingo Molnar, Arjan van de Ven, Andrew Morton,
	Jason Baron, Peter Zijlstra

[-- Attachment #1: remove_old_block_annotation.patch --]
[-- Type: text/plain, Size: 11228 bytes --]

Remove the old complex and crufty bd_mutex annotation.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Jason Baron <jbaron@redhat.com>
---
 block/ioctl.c      |    4 -
 drivers/md/md.c    |    6 -
 fs/block_dev.c     |  180 ++++++++++++++++-------------------------------------
 include/linux/fs.h |   17 -----
 4 files changed, 60 insertions(+), 147 deletions(-)

Index: linux-2.6-mm/drivers/md/md.c
===================================================================
--- linux-2.6-mm.orig/drivers/md/md.c
+++ linux-2.6-mm/drivers/md/md.c
@@ -1408,7 +1408,7 @@ static int lock_rdev(mdk_rdev_t *rdev, d
 	struct block_device *bdev;
 	char b[BDEVNAME_SIZE];
 
-	bdev = open_partition_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+	bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
 	if (IS_ERR(bdev)) {
 		printk(KERN_ERR "md: could not open %s.\n",
 			__bdevname(dev, b));
@@ -1418,7 +1418,7 @@ static int lock_rdev(mdk_rdev_t *rdev, d
 	if (err) {
 		printk(KERN_ERR "md: could not bd_claim %s.\n",
 			bdevname(bdev, b));
-		blkdev_put_partition(bdev);
+		blkdev_put(bdev);
 		return err;
 	}
 	rdev->bdev = bdev;
@@ -1432,7 +1432,7 @@ static void unlock_rdev(mdk_rdev_t *rdev
 	if (!bdev)
 		MD_BUG();
 	bd_release(bdev);
-	blkdev_put_partition(bdev);
+	blkdev_put(bdev);
 }
 
 void md_autodetect_dev(dev_t dev);
Index: linux-2.6-mm/fs/block_dev.c
===================================================================
--- linux-2.6-mm.orig/fs/block_dev.c
+++ linux-2.6-mm/fs/block_dev.c
@@ -751,7 +751,7 @@ static int bd_claim_by_kobject(struct bl
 	if (!bo)
 		return -ENOMEM;
 
-	mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
+	mutex_lock(&bdev->bd_mutex);
 	res = bd_claim(bdev, holder);
 	if (res == 0)
 		res = add_bd_holder(bdev, bo);
@@ -778,7 +778,7 @@ static void bd_release_from_kobject(stru
 	if (!kobj)
 		return;
 
-	mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
+	mutex_lock(&bdev->bd_mutex);
 	bd_release(bdev);
 	if ((bo = del_bd_holder(bdev, kobj)))
 		free_bd_holder(bo);
@@ -836,22 +836,6 @@ struct block_device *open_by_devnum(dev_
 
 EXPORT_SYMBOL(open_by_devnum);
 
-static int
-blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags);
-
-struct block_device *open_partition_by_devnum(dev_t dev, unsigned mode)
-{
-	struct block_device *bdev = bdget(dev);
-	int err = -ENOMEM;
-	int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY;
-	if (bdev)
-		err = blkdev_get_partition(bdev, mode, flags);
-	return err ? ERR_PTR(err) : bdev;
-}
-
-EXPORT_SYMBOL(open_partition_by_devnum);
-
-
 /*
  * This routine checks whether a removable media has been changed,
  * and invalidates all buffer-cache-entries in that case. This
@@ -902,66 +886,7 @@ void bd_set_size(struct block_device *bd
 }
 EXPORT_SYMBOL(bd_set_size);
 
-static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
-{
-	int ret = 0;
-	struct inode *bd_inode = bdev->bd_inode;
-	struct gendisk *disk = bdev->bd_disk;
-
-	mutex_lock_nested(&bdev->bd_mutex, subclass);
-	lock_kernel();
-	if (!--bdev->bd_openers) {
-		sync_blockdev(bdev);
-		kill_bdev(bdev);
-	}
-	if (bdev->bd_contains == bdev) {
-		if (disk->fops->release)
-			ret = disk->fops->release(bd_inode, NULL);
-	} else {
-		mutex_lock_nested(&bdev->bd_contains->bd_mutex,
-				  subclass + 1);
-		bdev->bd_contains->bd_part_count--;
-		mutex_unlock(&bdev->bd_contains->bd_mutex);
-	}
-	if (!bdev->bd_openers) {
-		struct module *owner = disk->fops->owner;
-
-		put_disk(disk);
-		module_put(owner);
-
-		if (bdev->bd_contains != bdev) {
-			kobject_put(&bdev->bd_part->kobj);
-			bdev->bd_part = NULL;
-		}
-		bdev->bd_disk = NULL;
-		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
-		if (bdev != bdev->bd_contains)
-			__blkdev_put(bdev->bd_contains, subclass + 1);
-		bdev->bd_contains = NULL;
-	}
-	unlock_kernel();
-	mutex_unlock(&bdev->bd_mutex);
-	bdput(bdev);
-	return ret;
-}
-
-int blkdev_put(struct block_device *bdev)
-{
-	return __blkdev_put(bdev, BD_MUTEX_NORMAL);
-}
-EXPORT_SYMBOL(blkdev_put);
-
-int blkdev_put_partition(struct block_device *bdev)
-{
-	return __blkdev_put(bdev, BD_MUTEX_PARTITION);
-}
-EXPORT_SYMBOL(blkdev_put_partition);
-
-static int
-blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags);
-
-static int
-do_open(struct block_device *bdev, struct file *file, unsigned int subclass)
+static int do_open(struct block_device *bdev, struct file *file)
 {
 	struct module *owner = NULL;
 	struct gendisk *disk;
@@ -978,8 +903,7 @@ do_open(struct block_device *bdev, struc
 	}
 	owner = disk->fops->owner;
 
-	mutex_lock_nested(&bdev->bd_mutex, subclass);
-
+	mutex_lock(&bdev->bd_mutex);
 	if (!bdev->bd_openers) {
 		bdev->bd_disk = disk;
 		bdev->bd_contains = bdev;
@@ -1006,11 +930,11 @@ do_open(struct block_device *bdev, struc
 			ret = -ENOMEM;
 			if (!whole)
 				goto out_first;
-			ret = blkdev_get_whole(whole, file->f_mode, file->f_flags);
+			ret = blkdev_get(whole, file->f_mode, file->f_flags);
 			if (ret)
 				goto out_first;
 			bdev->bd_contains = whole;
-			mutex_lock_nested(&whole->bd_mutex, BD_MUTEX_WHOLE);
+			mutex_lock(&whole->bd_mutex);
 			whole->bd_part_count++;
 			p = disk->part[part - 1];
 			bdev->bd_inode->i_data.backing_dev_info =
@@ -1038,8 +962,7 @@ do_open(struct block_device *bdev, struc
 			if (bdev->bd_invalidated)
 				rescan_partitions(bdev->bd_disk, bdev);
 		} else {
-			mutex_lock_nested(&bdev->bd_contains->bd_mutex,
-					  BD_MUTEX_WHOLE);
+			mutex_lock(&bdev->bd_contains->bd_mutex);
 			bdev->bd_contains->bd_part_count++;
 			mutex_unlock(&bdev->bd_contains->bd_mutex);
 		}
@@ -1053,7 +976,7 @@ out_first:
 	bdev->bd_disk = NULL;
 	bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 	if (bdev != bdev->bd_contains)
-		__blkdev_put(bdev->bd_contains, BD_MUTEX_WHOLE);
+		blkdev_put(bdev->bd_contains);
 	bdev->bd_contains = NULL;
 	put_disk(disk);
 	module_put(owner);
@@ -1080,49 +1003,11 @@ int blkdev_get(struct block_device *bdev
 	fake_file.f_dentry = &fake_dentry;
 	fake_dentry.d_inode = bdev->bd_inode;
 
-	return do_open(bdev, &fake_file, BD_MUTEX_NORMAL);
+	return do_open(bdev, &fake_file);
 }
 
 EXPORT_SYMBOL(blkdev_get);
 
-static int
-blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags)
-{
-	/*
-	 * This crockload is due to bad choice of ->open() type.
-	 * It will go away.
-	 * For now, block device ->open() routine must _not_
-	 * examine anything in 'inode' argument except ->i_rdev.
-	 */
-	struct file fake_file = {};
-	struct dentry fake_dentry = {};
-	fake_file.f_mode = mode;
-	fake_file.f_flags = flags;
-	fake_file.f_dentry = &fake_dentry;
-	fake_dentry.d_inode = bdev->bd_inode;
-
-	return do_open(bdev, &fake_file, BD_MUTEX_WHOLE);
-}
-
-static int
-blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags)
-{
-	/*
-	 * This crockload is due to bad choice of ->open() type.
-	 * It will go away.
-	 * For now, block device ->open() routine must _not_
-	 * examine anything in 'inode' argument except ->i_rdev.
-	 */
-	struct file fake_file = {};
-	struct dentry fake_dentry = {};
-	fake_file.f_mode = mode;
-	fake_file.f_flags = flags;
-	fake_file.f_dentry = &fake_dentry;
-	fake_dentry.d_inode = bdev->bd_inode;
-
-	return do_open(bdev, &fake_file, BD_MUTEX_PARTITION);
-}
-
 static int blkdev_open(struct inode * inode, struct file * filp)
 {
 	struct block_device *bdev;
@@ -1138,7 +1023,7 @@ static int blkdev_open(struct inode * in
 
 	bdev = bd_acquire(inode);
 
-	res = do_open(bdev, filp, BD_MUTEX_NORMAL);
+	res = do_open(bdev, filp);
 	if (res)
 		return res;
 
@@ -1152,6 +1037,51 @@ static int blkdev_open(struct inode * in
 	return res;
 }
 
+int blkdev_put(struct block_device *bdev)
+{
+	int ret = 0;
+	struct inode *bd_inode = bdev->bd_inode;
+	struct gendisk *disk = bdev->bd_disk;
+
+	mutex_lock(&bdev->bd_mutex);
+	lock_kernel();
+	if (!--bdev->bd_openers) {
+		sync_blockdev(bdev);
+		kill_bdev(bdev);
+	}
+	if (bdev->bd_contains == bdev) {
+		if (disk->fops->release)
+			ret = disk->fops->release(bd_inode, NULL);
+	} else {
+		mutex_lock(&bdev->bd_contains->bd_mutex);
+		bdev->bd_contains->bd_part_count--;
+		mutex_unlock(&bdev->bd_contains->bd_mutex);
+	}
+	if (!bdev->bd_openers) {
+		struct module *owner = disk->fops->owner;
+
+		put_disk(disk);
+		module_put(owner);
+
+		if (bdev->bd_contains != bdev) {
+			kobject_put(&bdev->bd_part->kobj);
+			bdev->bd_part = NULL;
+		}
+		bdev->bd_disk = NULL;
+		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
+		if (bdev != bdev->bd_contains) {
+			blkdev_put(bdev->bd_contains);
+		}
+		bdev->bd_contains = NULL;
+	}
+	unlock_kernel();
+	mutex_unlock(&bdev->bd_mutex);
+	bdput(bdev);
+	return ret;
+}
+
+EXPORT_SYMBOL(blkdev_put);
+
 static int blkdev_close(struct inode * inode, struct file * filp)
 {
 	struct block_device *bdev = I_BDEV(filp->f_mapping->host);
Index: linux-2.6-mm/include/linux/fs.h
===================================================================
--- linux-2.6-mm.orig/include/linux/fs.h
+++ linux-2.6-mm/include/linux/fs.h
@@ -504,21 +504,6 @@ struct block_device {
 };
 
 /*
- * bdev->bd_mutex nesting subclasses for the lock validator:
- *
- * 0: normal
- * 1: 'whole'
- * 2: 'partition'
- */
-enum bdev_bd_mutex_lock_class
-{
-	BD_MUTEX_NORMAL,
-	BD_MUTEX_WHOLE,
-	BD_MUTEX_PARTITION
-};
-
-
-/*
  * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
  * radix trees
  */
@@ -1596,7 +1581,6 @@ extern void bd_set_size(struct block_dev
 extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
 extern struct block_device *open_by_devnum(dev_t, unsigned);
-extern struct block_device *open_partition_by_devnum(dev_t, unsigned);
 extern const struct address_space_operations def_blk_aops;
 #else
 static inline void bd_forget(struct inode *inode) {}
@@ -1614,7 +1598,6 @@ extern int blkdev_driver_ioctl(struct in
 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
 extern int blkdev_get(struct block_device *, mode_t, unsigned);
 extern int blkdev_put(struct block_device *);
-extern int blkdev_put_partition(struct block_device *);
 extern int bd_claim(struct block_device *, void *);
 extern void bd_release(struct block_device *);
 #ifdef CONFIG_SYSFS
Index: linux-2.6-mm/block/ioctl.c
===================================================================
--- linux-2.6-mm.orig/block/ioctl.c
+++ linux-2.6-mm/block/ioctl.c
@@ -72,7 +72,7 @@ static int blkpg_ioctl(struct block_devi
 			bdevp = bdget_disk(disk, part);
 			if (!bdevp)
 				return -ENOMEM;
-			mutex_lock_nested(&bdevp->bd_mutex, BD_MUTEX_PARTITION);
+			mutex_lock(&bdevp->bd_mutex);
 			if (bdevp->bd_openers) {
 				mutex_unlock(&bdevp->bd_mutex);
 				bdput(bdevp);
@@ -82,7 +82,7 @@ static int blkpg_ioctl(struct block_devi
 			fsync_bdev(bdevp);
 			invalidate_bdev(bdevp, 0);
 
-			mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_WHOLE);
+			mutex_lock(&bdev->bd_mutex);
 			delete_partition(disk, part);
 			mutex_unlock(&bdev->bd_mutex);
 			mutex_unlock(&bdevp->bd_mutex);

--


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/2] new bd_mutex lockdep annotation
  2006-09-13 17:43 [PATCH 0/2] new bd_mutex lockdep annotation Peter Zijlstra
  2006-09-13 17:43 ` [PATCH 1/2] remove the old " Peter Zijlstra
@ 2006-09-13 17:43 ` Peter Zijlstra
  2006-09-13 18:15   ` Arjan van de Ven
  2006-09-14  7:11   ` Neil Brown
  1 sibling, 2 replies; 7+ messages in thread
From: Peter Zijlstra @ 2006-09-13 17:43 UTC (permalink / raw)
  To: linux-kernel
  Cc: Neil Brown, Ingo Molnar, Arjan van de Ven, Andrew Morton,
	Jason Baron, Peter Zijlstra

[-- Attachment #1: new_block_annotation.patch --]
[-- Type: text/plain, Size: 1249 bytes --]

Use the gendisk partition number to set a lock class.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Jason Baron <jbaron@redhat.com>
---
 fs/block_dev.c |    9 +++++++++
 1 file changed, 9 insertions(+)

Index: linux-2.6-mm/fs/block_dev.c
===================================================================
--- linux-2.6-mm.orig/fs/block_dev.c
+++ linux-2.6-mm/fs/block_dev.c
@@ -357,10 +357,14 @@ static int bdev_set(struct inode *inode,
 
 static LIST_HEAD(all_bdevs);
 
+static struct lock_class_key bdev_part_lock_key;
+
 struct block_device *bdget(dev_t dev)
 {
 	struct block_device *bdev;
 	struct inode *inode;
+	struct gendisk *disk;
+	int part = 0;
 
 	inode = iget5_locked(bd_mnt->mnt_sb, hash(dev),
 			bdev_test, bdev_set, &dev);
@@ -386,6 +390,11 @@ struct block_device *bdget(dev_t dev)
 		list_add(&bdev->bd_list, &all_bdevs);
 		spin_unlock(&bdev_lock);
 		unlock_new_inode(inode);
+		mutex_init(&bdev->bd_mutex);
+		disk = get_gendisk(dev, &part);
+		if (part)
+			lockdep_set_class(&bdev->bd_mutex, &bdev_part_lock_key);
+		put_disk(disk);
 	}
 	return bdev;
 }

--


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] new bd_mutex lockdep annotation
  2006-09-13 17:43 ` [PATCH 2/2] new " Peter Zijlstra
@ 2006-09-13 18:15   ` Arjan van de Ven
  2006-09-14  7:11   ` Neil Brown
  1 sibling, 0 replies; 7+ messages in thread
From: Arjan van de Ven @ 2006-09-13 18:15 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Neil Brown, Ingo Molnar, Andrew Morton, Jason Baron

Peter Zijlstra wrote:
> Use the gendisk partition number to set a lock class.
> 
I like this approach a whole better than what is there today (but we talked about that before ;)
It's a lot more obviously the right approach and kills a whole lot of ugly duplication

so

Acked-by: Arjan van de Ven <arjan@linux.intel.com>

> 
> --

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] new bd_mutex lockdep annotation
  2006-09-13 17:43 ` [PATCH 2/2] new " Peter Zijlstra
  2006-09-13 18:15   ` Arjan van de Ven
@ 2006-09-14  7:11   ` Neil Brown
  2006-09-14  8:50     ` Peter Zijlstra
  1 sibling, 1 reply; 7+ messages in thread
From: Neil Brown @ 2006-09-14  7:11 UTC (permalink / raw)
  To: Peter Zijlstra, Al Viro
  Cc: linux-kernel, Ingo Molnar, Arjan van de Ven, Andrew Morton,
	Jason Baron

On Wednesday September 13, a.p.zijlstra@chello.nl wrote:
> Use the gendisk partition number to set a lock class.

Yes, this does look a lot nicer, thanks.

Two observations.
1/ I was confused that you added a call to mutex_init.   One would
 normally expect to only have one of these for any given mutex, so
 adding one was a surprise.
 I now realise that the purpose of this call is not exactly to init
 the mutex, but to init the lockdep class in case this inode was
 previously used for a partition but is now being used for a whole
 device.  This makes sense, but renders the mutex_init in 
 init_once pointless.  Maybe that should be removed?

2/ You are introducing a new call to get_gendisk.
   This bothers me for two reasons.  Both relate to a comparison
   with the call to get_gendisk in block_dev.c:do_open.
   a/ That call is protected by lock_kernel.  Your call is not.
   b/ That call is followed by a test for '!disk' implying that it
       can return NULL.  Yours is not - at least not obviously
       (put_disk does have the check).

   I'm not sure if these are actually problems, but the do bother me.

   Thinking through the possibly reasons for the lock_kernel, I wonder
   it the current device number mapping scheme actually allows you
   to determine if something is partitioned or not in a static sense.
   Maybe that is only guaranteed to be stable while the device is
   open...
   I wonder if Al Viro could put my mind at rest .... Al - do you have
   a moment to look at this?  Thanks.

NeilBrown
    
> 
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Cc: Neil Brown <neilb@cse.unsw.edu.au>
> Cc: Ingo Molnar <mingo@elte.hu>
> Cc: Arjan van de Ven <arjan@linux.intel.com>
> Cc: Andrew Morton <akpm@osdl.org>
> Cc: Jason Baron <jbaron@redhat.com>
> ---
>  fs/block_dev.c |    9 +++++++++
>  1 file changed, 9 insertions(+)
> 
> Index: linux-2.6-mm/fs/block_dev.c
> ===================================================================
> --- linux-2.6-mm.orig/fs/block_dev.c
> +++ linux-2.6-mm/fs/block_dev.c
> @@ -357,10 +357,14 @@ static int bdev_set(struct inode *inode,
>  
>  static LIST_HEAD(all_bdevs);
>  
> +static struct lock_class_key bdev_part_lock_key;
> +
>  struct block_device *bdget(dev_t dev)
>  {
>  	struct block_device *bdev;
>  	struct inode *inode;
> +	struct gendisk *disk;
> +	int part = 0;
>  
>  	inode = iget5_locked(bd_mnt->mnt_sb, hash(dev),
>  			bdev_test, bdev_set, &dev);
> @@ -386,6 +390,11 @@ struct block_device *bdget(dev_t dev)
>  		list_add(&bdev->bd_list, &all_bdevs);
>  		spin_unlock(&bdev_lock);
>  		unlock_new_inode(inode);
> +		mutex_init(&bdev->bd_mutex);
> +		disk = get_gendisk(dev, &part);
> +		if (part)
> +			lockdep_set_class(&bdev->bd_mutex, &bdev_part_lock_key);
> +		put_disk(disk);
>  	}
>  	return bdev;
>  }
> 
> --

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] new bd_mutex lockdep annotation
  2006-09-14  7:11   ` Neil Brown
@ 2006-09-14  8:50     ` Peter Zijlstra
  2006-09-29 18:30       ` Peter Zijlstra
  0 siblings, 1 reply; 7+ messages in thread
From: Peter Zijlstra @ 2006-09-14  8:50 UTC (permalink / raw)
  To: Neil Brown
  Cc: Al Viro, linux-kernel, Ingo Molnar, Arjan van de Ven,
	Andrew Morton, Jason Baron

On Thu, 2006-09-14 at 17:11 +1000, Neil Brown wrote:
> On Wednesday September 13, a.p.zijlstra@chello.nl wrote:
> > Use the gendisk partition number to set a lock class.
> 
> Yes, this does look a lot nicer, thanks.
> 
> Two observations.
> 1/ I was confused that you added a call to mutex_init.   One would
>  normally expect to only have one of these for any given mutex, so
>  adding one was a surprise.
>  I now realise that the purpose of this call is not exactly to init
>  the mutex, but to init the lockdep class in case this inode was
>  previously used for a partition but is now being used for a whole
>  device.  This makes sense, but renders the mutex_init in 
>  init_once pointless.  Maybe that should be removed?

Yes, that would be quite redundant now, new patch attached.

> 2/ You are introducing a new call to get_gendisk.
>    This bothers me for two reasons.  Both relate to a comparison
>    with the call to get_gendisk in block_dev.c:do_open.
>    a/ That call is protected by lock_kernel.  Your call is not.
>    b/ That call is followed by a test for '!disk' implying that it
>        can return NULL.  Yours is not - at least not obviously
>        (put_disk does have the check).

a) kobj_lookup() vs kobj_(un)map() use the domain lock.

Not all calls to blk_register_region() were under lock_kernel() afaicf.

So I don't think this is needed, but I'll gladly take advise otherwise,
I'm not well versed with the kobj stuff.

b) from quick inspection yesterday I reached two (false) conclusions
 - &part would not be changed when !disk
 - disk would have to exists at the time we call bdget()
Now I can't seem to validate either of them. Added disk to the if
statement just to be safe.

>    I'm not sure if these are actually problems, but the do bother me.
> 
>    Thinking through the possibly reasons for the lock_kernel, I wonder
>    it the current device number mapping scheme actually allows you
>    to determine if something is partitioned or not in a static sense.
>    Maybe that is only guaranteed to be stable while the device is
>    open...

Hmm, yes I think I see what you mean...

>    I wonder if Al Viro could put my mind at rest .... Al - do you have
>    a moment to look at this?  Thanks.

+1

---

Use the gendisk partition number to set a lock class.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Jason Baron <jbaron@redhat.com>
---
 fs/block_dev.c |   10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

Index: linux-2.6-mm/fs/block_dev.c
===================================================================
--- linux-2.6-mm.orig/fs/block_dev.c
+++ linux-2.6-mm/fs/block_dev.c
@@ -264,7 +264,6 @@ static void init_once(void * foo, kmem_c
 	    SLAB_CTOR_CONSTRUCTOR)
 	{
 		memset(bdev, 0, sizeof(*bdev));
-		mutex_init(&bdev->bd_mutex);
 		mutex_init(&bdev->bd_mount_mutex);
 		INIT_LIST_HEAD(&bdev->bd_inodes);
 		INIT_LIST_HEAD(&bdev->bd_list);
@@ -357,10 +356,14 @@ static int bdev_set(struct inode *inode,
 
 static LIST_HEAD(all_bdevs);
 
+static struct lock_class_key bdev_part_lock_key;
+
 struct block_device *bdget(dev_t dev)
 {
 	struct block_device *bdev;
 	struct inode *inode;
+	struct gendisk *disk;
+	int part = 0;
 
 	inode = iget5_locked(bd_mnt->mnt_sb, hash(dev),
 			bdev_test, bdev_set, &dev);
@@ -386,6 +389,11 @@ struct block_device *bdget(dev_t dev)
 		list_add(&bdev->bd_list, &all_bdevs);
 		spin_unlock(&bdev_lock);
 		unlock_new_inode(inode);
+		mutex_init(&bdev->bd_mutex);
+		disk = get_gendisk(dev, &part);
+		if (disk && part)
+			lockdep_set_class(&bdev->bd_mutex, &bdev_part_lock_key);
+		put_disk(disk);
 	}
 	return bdev;
 }



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] new bd_mutex lockdep annotation
  2006-09-14  8:50     ` Peter Zijlstra
@ 2006-09-29 18:30       ` Peter Zijlstra
  0 siblings, 0 replies; 7+ messages in thread
From: Peter Zijlstra @ 2006-09-29 18:30 UTC (permalink / raw)
  To: Neil Brown
  Cc: Al Viro, linux-kernel, Ingo Molnar, Arjan van de Ven,
	Andrew Morton, Jason Baron

Andrew, please hold on this, Al Viro doesn't agree with it.

Calling get_gendisk() from bdget() changes the bdget() semantics too
much. For one it enables bdget() to load modules.

Al proposed the following approach. Neil can you agree with this too?

---
Avoid the nesting of bd_mutex by serializing the locks. This is made
easier by changing the ->bd_part_count rules, its now only changed for
the first openers/closers.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 92de28d..0ffc4f0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -898,12 +889,15 @@ void bd_set_size(struct block_device *bd
 }
 EXPORT_SYMBOL(bd_set_size);
 
+static int __blkdev_put(struct block_device *bdev, int part);
+
 static int do_open(struct block_device *bdev, struct file *file)
 {
 	struct module *owner = NULL;
 	struct gendisk *disk;
 	int ret = -ENXIO;
 	int part;
+	struct block_device *whole = NULL;
 
 	file->f_mapping = bdev->bd_inode->i_mapping;
 	lock_kernel();
@@ -937,30 +931,42 @@ static int do_open(struct block_device *
 				rescan_partitions(disk, bdev);
 		} else {
 			struct hd_struct *p;
-			struct block_device *whole;
+
+			mutex_unlock(&bdev->bd_mutex);
+
 			whole = bdget_disk(disk, 0);
 			ret = -ENOMEM;
 			if (!whole)
-				goto out_first;
+				goto out_first_lock;
 			ret = blkdev_get(whole, file->f_mode, file->f_flags);
 			if (ret)
-				goto out_first;
-			bdev->bd_contains = whole;
+				goto out_first_lock;
+
 			mutex_lock(&whole->bd_mutex);
 			whole->bd_part_count++;
 			p = disk->part[part - 1];
-			bdev->bd_inode->i_data.backing_dev_info =
-			   whole->bd_inode->i_data.backing_dev_info;
 			if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) {
-				whole->bd_part_count--;
 				mutex_unlock(&whole->bd_mutex);
 				ret = -ENXIO;
-				goto out_first;
+				goto out_first_lock;
 			}
 			kobject_get(&p->kobj);
-			bdev->bd_part = p;
-			bd_set_size(bdev, (loff_t) p->nr_sects << 9);
 			mutex_unlock(&whole->bd_mutex);
+
+			mutex_lock(&bdev->bd_mutex);
+			if (bdev->bd_contains != whole) {
+				bdev->bd_contains = whole;
+				bdev->bd_inode->i_data.backing_dev_info =
+				  whole->bd_inode->i_data.backing_dev_info;
+				bdev->bd_part = p;
+				bd_set_size(bdev, (loff_t) p->nr_sects << 9);
+				whole = NULL;
+			} else {
+				mutex_unlock(&bdev->bd_mutex);
+				kobject_put(&p->kobj);
+				__blkdev_put(whole, 1);
+				mutex_lock(&bdev->bd_mutex);
+			}
 		}
 	} else {
 		put_disk(disk);
@@ -973,10 +979,6 @@ static int do_open(struct block_device *
 			}
 			if (bdev->bd_invalidated)
 				rescan_partitions(bdev->bd_disk, bdev);
-		} else {
-			mutex_lock(&bdev->bd_contains->bd_mutex);
-			bdev->bd_contains->bd_part_count++;
-			mutex_unlock(&bdev->bd_contains->bd_mutex);
 		}
 	}
 	bdev->bd_openers++;
@@ -984,11 +986,12 @@ static int do_open(struct block_device *
 	unlock_kernel();
 	return 0;
 
+out_first_lock:
+	if (whole)
+		__blkdev_put(whole, 1);
+	mutex_lock(&bdev->bd_mutex);
 out_first:
 	bdev->bd_disk = NULL;
-	bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
-	if (bdev != bdev->bd_contains)
-		blkdev_put(bdev->bd_contains);
 	bdev->bd_contains = NULL;
 	put_disk(disk);
 	module_put(owner);
@@ -1049,14 +1052,17 @@ static int blkdev_open(struct inode * in
 	return res;
 }
 
-int blkdev_put(struct block_device *bdev)
+static int __blkdev_put(struct block_device *bdev, int part)
 {
 	int ret = 0;
 	struct inode *bd_inode = bdev->bd_inode;
 	struct gendisk *disk = bdev->bd_disk;
+	struct block_device *victim = NULL;
 
 	mutex_lock(&bdev->bd_mutex);
 	lock_kernel();
+	if (part)
+		bdev->bd_part_count--;
 	if (!--bdev->bd_openers) {
 		sync_blockdev(bdev);
 		kill_bdev(bdev);
@@ -1064,10 +1070,6 @@ int blkdev_put(struct block_device *bdev
 	if (bdev->bd_contains == bdev) {
 		if (disk->fops->release)
 			ret = disk->fops->release(bd_inode, NULL);
-	} else {
-		mutex_lock(&bdev->bd_contains->bd_mutex);
-		bdev->bd_contains->bd_part_count--;
-		mutex_unlock(&bdev->bd_contains->bd_mutex);
 	}
 	if (!bdev->bd_openers) {
 		struct module *owner = disk->fops->owner;
@@ -1081,17 +1083,23 @@ int blkdev_put(struct block_device *bdev
 		}
 		bdev->bd_disk = NULL;
 		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
-		if (bdev != bdev->bd_contains) {
-			blkdev_put(bdev->bd_contains);
-		}
+		if (bdev != bdev->bd_contains)
+			victim = bdev->bd_contains;
 		bdev->bd_contains = NULL;
 	}
 	unlock_kernel();
 	mutex_unlock(&bdev->bd_mutex);
+	if (victim)
+		__blkdev_put(victim, 1);
 	bdput(bdev);
 	return ret;
 }
 
+int blkdev_put(struct block_device *bdev)
+{
+	return __blkdev_put(bdev, 0);
+}
+
 EXPORT_SYMBOL(blkdev_put);
 
 static int blkdev_close(struct inode * inode, struct file * filp)



^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2006-09-29 18:31 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-09-13 17:43 [PATCH 0/2] new bd_mutex lockdep annotation Peter Zijlstra
2006-09-13 17:43 ` [PATCH 1/2] remove the old " Peter Zijlstra
2006-09-13 17:43 ` [PATCH 2/2] new " Peter Zijlstra
2006-09-13 18:15   ` Arjan van de Ven
2006-09-14  7:11   ` Neil Brown
2006-09-14  8:50     ` Peter Zijlstra
2006-09-29 18:30       ` Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox