* [PATCH 0/2] new bd_mutex lockdep annotation @ 2006-09-13 17:43 Peter Zijlstra 2006-09-13 17:43 ` [PATCH 1/2] remove the old " Peter Zijlstra 2006-09-13 17:43 ` [PATCH 2/2] new " Peter Zijlstra 0 siblings, 2 replies; 7+ messages in thread From: Peter Zijlstra @ 2006-09-13 17:43 UTC (permalink / raw) To: linux-kernel Cc: Neil Brown, Ingo Molnar, Arjan van de Ven, Andrew Morton, Jason Baron, Peter Zijlstra -- Andrew thought the old bd_mutex lockdep annotations were too complex, Arjan too said something like this several weeks ago and suggested I do something with lockdep_set_class() instead of mutex_lock_nested(). ^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/2] remove the old bd_mutex lockdep annotation 2006-09-13 17:43 [PATCH 0/2] new bd_mutex lockdep annotation Peter Zijlstra @ 2006-09-13 17:43 ` Peter Zijlstra 2006-09-13 17:43 ` [PATCH 2/2] new " Peter Zijlstra 1 sibling, 0 replies; 7+ messages in thread From: Peter Zijlstra @ 2006-09-13 17:43 UTC (permalink / raw) To: linux-kernel Cc: Neil Brown, Ingo Molnar, Arjan van de Ven, Andrew Morton, Jason Baron, Peter Zijlstra [-- Attachment #1: remove_old_block_annotation.patch --] [-- Type: text/plain, Size: 11228 bytes --] Remove the old complex and crufty bd_mutex annotation. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Neil Brown <neilb@cse.unsw.edu.au> Cc: Ingo Molnar <mingo@elte.hu> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Andrew Morton <akpm@osdl.org> Cc: Jason Baron <jbaron@redhat.com> --- block/ioctl.c | 4 - drivers/md/md.c | 6 - fs/block_dev.c | 180 ++++++++++++++++------------------------------------- include/linux/fs.h | 17 ----- 4 files changed, 60 insertions(+), 147 deletions(-) Index: linux-2.6-mm/drivers/md/md.c =================================================================== --- linux-2.6-mm.orig/drivers/md/md.c +++ linux-2.6-mm/drivers/md/md.c @@ -1408,7 +1408,7 @@ static int lock_rdev(mdk_rdev_t *rdev, d struct block_device *bdev; char b[BDEVNAME_SIZE]; - bdev = open_partition_by_devnum(dev, FMODE_READ|FMODE_WRITE); + bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); if (IS_ERR(bdev)) { printk(KERN_ERR "md: could not open %s.\n", __bdevname(dev, b)); @@ -1418,7 +1418,7 @@ static int lock_rdev(mdk_rdev_t *rdev, d if (err) { printk(KERN_ERR "md: could not bd_claim %s.\n", bdevname(bdev, b)); - blkdev_put_partition(bdev); + blkdev_put(bdev); return err; } rdev->bdev = bdev; @@ -1432,7 +1432,7 @@ static void unlock_rdev(mdk_rdev_t *rdev if (!bdev) MD_BUG(); bd_release(bdev); - blkdev_put_partition(bdev); + blkdev_put(bdev); } void md_autodetect_dev(dev_t dev); Index: linux-2.6-mm/fs/block_dev.c =================================================================== --- linux-2.6-mm.orig/fs/block_dev.c +++ linux-2.6-mm/fs/block_dev.c @@ -751,7 +751,7 @@ static int bd_claim_by_kobject(struct bl if (!bo) return -ENOMEM; - mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); + mutex_lock(&bdev->bd_mutex); res = bd_claim(bdev, holder); if (res == 0) res = add_bd_holder(bdev, bo); @@ -778,7 +778,7 @@ static void bd_release_from_kobject(stru if (!kobj) return; - mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); + mutex_lock(&bdev->bd_mutex); bd_release(bdev); if ((bo = del_bd_holder(bdev, kobj))) free_bd_holder(bo); @@ -836,22 +836,6 @@ struct block_device *open_by_devnum(dev_ EXPORT_SYMBOL(open_by_devnum); -static int -blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags); - -struct block_device *open_partition_by_devnum(dev_t dev, unsigned mode) -{ - struct block_device *bdev = bdget(dev); - int err = -ENOMEM; - int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY; - if (bdev) - err = blkdev_get_partition(bdev, mode, flags); - return err ? ERR_PTR(err) : bdev; -} - -EXPORT_SYMBOL(open_partition_by_devnum); - - /* * This routine checks whether a removable media has been changed, * and invalidates all buffer-cache-entries in that case. This @@ -902,66 +886,7 @@ void bd_set_size(struct block_device *bd } EXPORT_SYMBOL(bd_set_size); -static int __blkdev_put(struct block_device *bdev, unsigned int subclass) -{ - int ret = 0; - struct inode *bd_inode = bdev->bd_inode; - struct gendisk *disk = bdev->bd_disk; - - mutex_lock_nested(&bdev->bd_mutex, subclass); - lock_kernel(); - if (!--bdev->bd_openers) { - sync_blockdev(bdev); - kill_bdev(bdev); - } - if (bdev->bd_contains == bdev) { - if (disk->fops->release) - ret = disk->fops->release(bd_inode, NULL); - } else { - mutex_lock_nested(&bdev->bd_contains->bd_mutex, - subclass + 1); - bdev->bd_contains->bd_part_count--; - mutex_unlock(&bdev->bd_contains->bd_mutex); - } - if (!bdev->bd_openers) { - struct module *owner = disk->fops->owner; - - put_disk(disk); - module_put(owner); - - if (bdev->bd_contains != bdev) { - kobject_put(&bdev->bd_part->kobj); - bdev->bd_part = NULL; - } - bdev->bd_disk = NULL; - bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; - if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, subclass + 1); - bdev->bd_contains = NULL; - } - unlock_kernel(); - mutex_unlock(&bdev->bd_mutex); - bdput(bdev); - return ret; -} - -int blkdev_put(struct block_device *bdev) -{ - return __blkdev_put(bdev, BD_MUTEX_NORMAL); -} -EXPORT_SYMBOL(blkdev_put); - -int blkdev_put_partition(struct block_device *bdev) -{ - return __blkdev_put(bdev, BD_MUTEX_PARTITION); -} -EXPORT_SYMBOL(blkdev_put_partition); - -static int -blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags); - -static int -do_open(struct block_device *bdev, struct file *file, unsigned int subclass) +static int do_open(struct block_device *bdev, struct file *file) { struct module *owner = NULL; struct gendisk *disk; @@ -978,8 +903,7 @@ do_open(struct block_device *bdev, struc } owner = disk->fops->owner; - mutex_lock_nested(&bdev->bd_mutex, subclass); - + mutex_lock(&bdev->bd_mutex); if (!bdev->bd_openers) { bdev->bd_disk = disk; bdev->bd_contains = bdev; @@ -1006,11 +930,11 @@ do_open(struct block_device *bdev, struc ret = -ENOMEM; if (!whole) goto out_first; - ret = blkdev_get_whole(whole, file->f_mode, file->f_flags); + ret = blkdev_get(whole, file->f_mode, file->f_flags); if (ret) goto out_first; bdev->bd_contains = whole; - mutex_lock_nested(&whole->bd_mutex, BD_MUTEX_WHOLE); + mutex_lock(&whole->bd_mutex); whole->bd_part_count++; p = disk->part[part - 1]; bdev->bd_inode->i_data.backing_dev_info = @@ -1038,8 +962,7 @@ do_open(struct block_device *bdev, struc if (bdev->bd_invalidated) rescan_partitions(bdev->bd_disk, bdev); } else { - mutex_lock_nested(&bdev->bd_contains->bd_mutex, - BD_MUTEX_WHOLE); + mutex_lock(&bdev->bd_contains->bd_mutex); bdev->bd_contains->bd_part_count++; mutex_unlock(&bdev->bd_contains->bd_mutex); } @@ -1053,7 +976,7 @@ out_first: bdev->bd_disk = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, BD_MUTEX_WHOLE); + blkdev_put(bdev->bd_contains); bdev->bd_contains = NULL; put_disk(disk); module_put(owner); @@ -1080,49 +1003,11 @@ int blkdev_get(struct block_device *bdev fake_file.f_dentry = &fake_dentry; fake_dentry.d_inode = bdev->bd_inode; - return do_open(bdev, &fake_file, BD_MUTEX_NORMAL); + return do_open(bdev, &fake_file); } EXPORT_SYMBOL(blkdev_get); -static int -blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags) -{ - /* - * This crockload is due to bad choice of ->open() type. - * It will go away. - * For now, block device ->open() routine must _not_ - * examine anything in 'inode' argument except ->i_rdev. - */ - struct file fake_file = {}; - struct dentry fake_dentry = {}; - fake_file.f_mode = mode; - fake_file.f_flags = flags; - fake_file.f_dentry = &fake_dentry; - fake_dentry.d_inode = bdev->bd_inode; - - return do_open(bdev, &fake_file, BD_MUTEX_WHOLE); -} - -static int -blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags) -{ - /* - * This crockload is due to bad choice of ->open() type. - * It will go away. - * For now, block device ->open() routine must _not_ - * examine anything in 'inode' argument except ->i_rdev. - */ - struct file fake_file = {}; - struct dentry fake_dentry = {}; - fake_file.f_mode = mode; - fake_file.f_flags = flags; - fake_file.f_dentry = &fake_dentry; - fake_dentry.d_inode = bdev->bd_inode; - - return do_open(bdev, &fake_file, BD_MUTEX_PARTITION); -} - static int blkdev_open(struct inode * inode, struct file * filp) { struct block_device *bdev; @@ -1138,7 +1023,7 @@ static int blkdev_open(struct inode * in bdev = bd_acquire(inode); - res = do_open(bdev, filp, BD_MUTEX_NORMAL); + res = do_open(bdev, filp); if (res) return res; @@ -1152,6 +1037,51 @@ static int blkdev_open(struct inode * in return res; } +int blkdev_put(struct block_device *bdev) +{ + int ret = 0; + struct inode *bd_inode = bdev->bd_inode; + struct gendisk *disk = bdev->bd_disk; + + mutex_lock(&bdev->bd_mutex); + lock_kernel(); + if (!--bdev->bd_openers) { + sync_blockdev(bdev); + kill_bdev(bdev); + } + if (bdev->bd_contains == bdev) { + if (disk->fops->release) + ret = disk->fops->release(bd_inode, NULL); + } else { + mutex_lock(&bdev->bd_contains->bd_mutex); + bdev->bd_contains->bd_part_count--; + mutex_unlock(&bdev->bd_contains->bd_mutex); + } + if (!bdev->bd_openers) { + struct module *owner = disk->fops->owner; + + put_disk(disk); + module_put(owner); + + if (bdev->bd_contains != bdev) { + kobject_put(&bdev->bd_part->kobj); + bdev->bd_part = NULL; + } + bdev->bd_disk = NULL; + bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; + if (bdev != bdev->bd_contains) { + blkdev_put(bdev->bd_contains); + } + bdev->bd_contains = NULL; + } + unlock_kernel(); + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); + return ret; +} + +EXPORT_SYMBOL(blkdev_put); + static int blkdev_close(struct inode * inode, struct file * filp) { struct block_device *bdev = I_BDEV(filp->f_mapping->host); Index: linux-2.6-mm/include/linux/fs.h =================================================================== --- linux-2.6-mm.orig/include/linux/fs.h +++ linux-2.6-mm/include/linux/fs.h @@ -504,21 +504,6 @@ struct block_device { }; /* - * bdev->bd_mutex nesting subclasses for the lock validator: - * - * 0: normal - * 1: 'whole' - * 2: 'partition' - */ -enum bdev_bd_mutex_lock_class -{ - BD_MUTEX_NORMAL, - BD_MUTEX_WHOLE, - BD_MUTEX_PARTITION -}; - - -/* * Radix-tree tags, for tagging dirty and writeback pages within the pagecache * radix trees */ @@ -1596,7 +1581,6 @@ extern void bd_set_size(struct block_dev extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); extern struct block_device *open_by_devnum(dev_t, unsigned); -extern struct block_device *open_partition_by_devnum(dev_t, unsigned); extern const struct address_space_operations def_blk_aops; #else static inline void bd_forget(struct inode *inode) {} @@ -1614,7 +1598,6 @@ extern int blkdev_driver_ioctl(struct in extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); extern int blkdev_get(struct block_device *, mode_t, unsigned); extern int blkdev_put(struct block_device *); -extern int blkdev_put_partition(struct block_device *); extern int bd_claim(struct block_device *, void *); extern void bd_release(struct block_device *); #ifdef CONFIG_SYSFS Index: linux-2.6-mm/block/ioctl.c =================================================================== --- linux-2.6-mm.orig/block/ioctl.c +++ linux-2.6-mm/block/ioctl.c @@ -72,7 +72,7 @@ static int blkpg_ioctl(struct block_devi bdevp = bdget_disk(disk, part); if (!bdevp) return -ENOMEM; - mutex_lock_nested(&bdevp->bd_mutex, BD_MUTEX_PARTITION); + mutex_lock(&bdevp->bd_mutex); if (bdevp->bd_openers) { mutex_unlock(&bdevp->bd_mutex); bdput(bdevp); @@ -82,7 +82,7 @@ static int blkpg_ioctl(struct block_devi fsync_bdev(bdevp); invalidate_bdev(bdevp, 0); - mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_WHOLE); + mutex_lock(&bdev->bd_mutex); delete_partition(disk, part); mutex_unlock(&bdev->bd_mutex); mutex_unlock(&bdevp->bd_mutex); -- ^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 2/2] new bd_mutex lockdep annotation 2006-09-13 17:43 [PATCH 0/2] new bd_mutex lockdep annotation Peter Zijlstra 2006-09-13 17:43 ` [PATCH 1/2] remove the old " Peter Zijlstra @ 2006-09-13 17:43 ` Peter Zijlstra 2006-09-13 18:15 ` Arjan van de Ven 2006-09-14 7:11 ` Neil Brown 1 sibling, 2 replies; 7+ messages in thread From: Peter Zijlstra @ 2006-09-13 17:43 UTC (permalink / raw) To: linux-kernel Cc: Neil Brown, Ingo Molnar, Arjan van de Ven, Andrew Morton, Jason Baron, Peter Zijlstra [-- Attachment #1: new_block_annotation.patch --] [-- Type: text/plain, Size: 1249 bytes --] Use the gendisk partition number to set a lock class. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Neil Brown <neilb@cse.unsw.edu.au> Cc: Ingo Molnar <mingo@elte.hu> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Andrew Morton <akpm@osdl.org> Cc: Jason Baron <jbaron@redhat.com> --- fs/block_dev.c | 9 +++++++++ 1 file changed, 9 insertions(+) Index: linux-2.6-mm/fs/block_dev.c =================================================================== --- linux-2.6-mm.orig/fs/block_dev.c +++ linux-2.6-mm/fs/block_dev.c @@ -357,10 +357,14 @@ static int bdev_set(struct inode *inode, static LIST_HEAD(all_bdevs); +static struct lock_class_key bdev_part_lock_key; + struct block_device *bdget(dev_t dev) { struct block_device *bdev; struct inode *inode; + struct gendisk *disk; + int part = 0; inode = iget5_locked(bd_mnt->mnt_sb, hash(dev), bdev_test, bdev_set, &dev); @@ -386,6 +390,11 @@ struct block_device *bdget(dev_t dev) list_add(&bdev->bd_list, &all_bdevs); spin_unlock(&bdev_lock); unlock_new_inode(inode); + mutex_init(&bdev->bd_mutex); + disk = get_gendisk(dev, &part); + if (part) + lockdep_set_class(&bdev->bd_mutex, &bdev_part_lock_key); + put_disk(disk); } return bdev; } -- ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/2] new bd_mutex lockdep annotation 2006-09-13 17:43 ` [PATCH 2/2] new " Peter Zijlstra @ 2006-09-13 18:15 ` Arjan van de Ven 2006-09-14 7:11 ` Neil Brown 1 sibling, 0 replies; 7+ messages in thread From: Arjan van de Ven @ 2006-09-13 18:15 UTC (permalink / raw) To: Peter Zijlstra Cc: linux-kernel, Neil Brown, Ingo Molnar, Andrew Morton, Jason Baron Peter Zijlstra wrote: > Use the gendisk partition number to set a lock class. > I like this approach a whole better than what is there today (but we talked about that before ;) It's a lot more obviously the right approach and kills a whole lot of ugly duplication so Acked-by: Arjan van de Ven <arjan@linux.intel.com> > > -- ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/2] new bd_mutex lockdep annotation 2006-09-13 17:43 ` [PATCH 2/2] new " Peter Zijlstra 2006-09-13 18:15 ` Arjan van de Ven @ 2006-09-14 7:11 ` Neil Brown 2006-09-14 8:50 ` Peter Zijlstra 1 sibling, 1 reply; 7+ messages in thread From: Neil Brown @ 2006-09-14 7:11 UTC (permalink / raw) To: Peter Zijlstra, Al Viro Cc: linux-kernel, Ingo Molnar, Arjan van de Ven, Andrew Morton, Jason Baron On Wednesday September 13, a.p.zijlstra@chello.nl wrote: > Use the gendisk partition number to set a lock class. Yes, this does look a lot nicer, thanks. Two observations. 1/ I was confused that you added a call to mutex_init. One would normally expect to only have one of these for any given mutex, so adding one was a surprise. I now realise that the purpose of this call is not exactly to init the mutex, but to init the lockdep class in case this inode was previously used for a partition but is now being used for a whole device. This makes sense, but renders the mutex_init in init_once pointless. Maybe that should be removed? 2/ You are introducing a new call to get_gendisk. This bothers me for two reasons. Both relate to a comparison with the call to get_gendisk in block_dev.c:do_open. a/ That call is protected by lock_kernel. Your call is not. b/ That call is followed by a test for '!disk' implying that it can return NULL. Yours is not - at least not obviously (put_disk does have the check). I'm not sure if these are actually problems, but the do bother me. Thinking through the possibly reasons for the lock_kernel, I wonder it the current device number mapping scheme actually allows you to determine if something is partitioned or not in a static sense. Maybe that is only guaranteed to be stable while the device is open... I wonder if Al Viro could put my mind at rest .... Al - do you have a moment to look at this? Thanks. NeilBrown > > Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> > Cc: Neil Brown <neilb@cse.unsw.edu.au> > Cc: Ingo Molnar <mingo@elte.hu> > Cc: Arjan van de Ven <arjan@linux.intel.com> > Cc: Andrew Morton <akpm@osdl.org> > Cc: Jason Baron <jbaron@redhat.com> > --- > fs/block_dev.c | 9 +++++++++ > 1 file changed, 9 insertions(+) > > Index: linux-2.6-mm/fs/block_dev.c > =================================================================== > --- linux-2.6-mm.orig/fs/block_dev.c > +++ linux-2.6-mm/fs/block_dev.c > @@ -357,10 +357,14 @@ static int bdev_set(struct inode *inode, > > static LIST_HEAD(all_bdevs); > > +static struct lock_class_key bdev_part_lock_key; > + > struct block_device *bdget(dev_t dev) > { > struct block_device *bdev; > struct inode *inode; > + struct gendisk *disk; > + int part = 0; > > inode = iget5_locked(bd_mnt->mnt_sb, hash(dev), > bdev_test, bdev_set, &dev); > @@ -386,6 +390,11 @@ struct block_device *bdget(dev_t dev) > list_add(&bdev->bd_list, &all_bdevs); > spin_unlock(&bdev_lock); > unlock_new_inode(inode); > + mutex_init(&bdev->bd_mutex); > + disk = get_gendisk(dev, &part); > + if (part) > + lockdep_set_class(&bdev->bd_mutex, &bdev_part_lock_key); > + put_disk(disk); > } > return bdev; > } > > -- ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/2] new bd_mutex lockdep annotation 2006-09-14 7:11 ` Neil Brown @ 2006-09-14 8:50 ` Peter Zijlstra 2006-09-29 18:30 ` Peter Zijlstra 0 siblings, 1 reply; 7+ messages in thread From: Peter Zijlstra @ 2006-09-14 8:50 UTC (permalink / raw) To: Neil Brown Cc: Al Viro, linux-kernel, Ingo Molnar, Arjan van de Ven, Andrew Morton, Jason Baron On Thu, 2006-09-14 at 17:11 +1000, Neil Brown wrote: > On Wednesday September 13, a.p.zijlstra@chello.nl wrote: > > Use the gendisk partition number to set a lock class. > > Yes, this does look a lot nicer, thanks. > > Two observations. > 1/ I was confused that you added a call to mutex_init. One would > normally expect to only have one of these for any given mutex, so > adding one was a surprise. > I now realise that the purpose of this call is not exactly to init > the mutex, but to init the lockdep class in case this inode was > previously used for a partition but is now being used for a whole > device. This makes sense, but renders the mutex_init in > init_once pointless. Maybe that should be removed? Yes, that would be quite redundant now, new patch attached. > 2/ You are introducing a new call to get_gendisk. > This bothers me for two reasons. Both relate to a comparison > with the call to get_gendisk in block_dev.c:do_open. > a/ That call is protected by lock_kernel. Your call is not. > b/ That call is followed by a test for '!disk' implying that it > can return NULL. Yours is not - at least not obviously > (put_disk does have the check). a) kobj_lookup() vs kobj_(un)map() use the domain lock. Not all calls to blk_register_region() were under lock_kernel() afaicf. So I don't think this is needed, but I'll gladly take advise otherwise, I'm not well versed with the kobj stuff. b) from quick inspection yesterday I reached two (false) conclusions - &part would not be changed when !disk - disk would have to exists at the time we call bdget() Now I can't seem to validate either of them. Added disk to the if statement just to be safe. > I'm not sure if these are actually problems, but the do bother me. > > Thinking through the possibly reasons for the lock_kernel, I wonder > it the current device number mapping scheme actually allows you > to determine if something is partitioned or not in a static sense. > Maybe that is only guaranteed to be stable while the device is > open... Hmm, yes I think I see what you mean... > I wonder if Al Viro could put my mind at rest .... Al - do you have > a moment to look at this? Thanks. +1 --- Use the gendisk partition number to set a lock class. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Arjan van de Ven <arjan@linux.intel.com> Cc: Neil Brown <neilb@cse.unsw.edu.au> Cc: Ingo Molnar <mingo@elte.hu> Cc: Andrew Morton <akpm@osdl.org> Cc: Jason Baron <jbaron@redhat.com> --- fs/block_dev.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) Index: linux-2.6-mm/fs/block_dev.c =================================================================== --- linux-2.6-mm.orig/fs/block_dev.c +++ linux-2.6-mm/fs/block_dev.c @@ -264,7 +264,6 @@ static void init_once(void * foo, kmem_c SLAB_CTOR_CONSTRUCTOR) { memset(bdev, 0, sizeof(*bdev)); - mutex_init(&bdev->bd_mutex); mutex_init(&bdev->bd_mount_mutex); INIT_LIST_HEAD(&bdev->bd_inodes); INIT_LIST_HEAD(&bdev->bd_list); @@ -357,10 +356,14 @@ static int bdev_set(struct inode *inode, static LIST_HEAD(all_bdevs); +static struct lock_class_key bdev_part_lock_key; + struct block_device *bdget(dev_t dev) { struct block_device *bdev; struct inode *inode; + struct gendisk *disk; + int part = 0; inode = iget5_locked(bd_mnt->mnt_sb, hash(dev), bdev_test, bdev_set, &dev); @@ -386,6 +389,11 @@ struct block_device *bdget(dev_t dev) list_add(&bdev->bd_list, &all_bdevs); spin_unlock(&bdev_lock); unlock_new_inode(inode); + mutex_init(&bdev->bd_mutex); + disk = get_gendisk(dev, &part); + if (disk && part) + lockdep_set_class(&bdev->bd_mutex, &bdev_part_lock_key); + put_disk(disk); } return bdev; } ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/2] new bd_mutex lockdep annotation 2006-09-14 8:50 ` Peter Zijlstra @ 2006-09-29 18:30 ` Peter Zijlstra 0 siblings, 0 replies; 7+ messages in thread From: Peter Zijlstra @ 2006-09-29 18:30 UTC (permalink / raw) To: Neil Brown Cc: Al Viro, linux-kernel, Ingo Molnar, Arjan van de Ven, Andrew Morton, Jason Baron Andrew, please hold on this, Al Viro doesn't agree with it. Calling get_gendisk() from bdget() changes the bdget() semantics too much. For one it enables bdget() to load modules. Al proposed the following approach. Neil can you agree with this too? --- Avoid the nesting of bd_mutex by serializing the locks. This is made easier by changing the ->bd_part_count rules, its now only changed for the first openers/closers. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> --- diff --git a/fs/block_dev.c b/fs/block_dev.c index 92de28d..0ffc4f0 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -898,12 +889,15 @@ void bd_set_size(struct block_device *bd } EXPORT_SYMBOL(bd_set_size); +static int __blkdev_put(struct block_device *bdev, int part); + static int do_open(struct block_device *bdev, struct file *file) { struct module *owner = NULL; struct gendisk *disk; int ret = -ENXIO; int part; + struct block_device *whole = NULL; file->f_mapping = bdev->bd_inode->i_mapping; lock_kernel(); @@ -937,30 +931,42 @@ static int do_open(struct block_device * rescan_partitions(disk, bdev); } else { struct hd_struct *p; - struct block_device *whole; + + mutex_unlock(&bdev->bd_mutex); + whole = bdget_disk(disk, 0); ret = -ENOMEM; if (!whole) - goto out_first; + goto out_first_lock; ret = blkdev_get(whole, file->f_mode, file->f_flags); if (ret) - goto out_first; - bdev->bd_contains = whole; + goto out_first_lock; + mutex_lock(&whole->bd_mutex); whole->bd_part_count++; p = disk->part[part - 1]; - bdev->bd_inode->i_data.backing_dev_info = - whole->bd_inode->i_data.backing_dev_info; if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { - whole->bd_part_count--; mutex_unlock(&whole->bd_mutex); ret = -ENXIO; - goto out_first; + goto out_first_lock; } kobject_get(&p->kobj); - bdev->bd_part = p; - bd_set_size(bdev, (loff_t) p->nr_sects << 9); mutex_unlock(&whole->bd_mutex); + + mutex_lock(&bdev->bd_mutex); + if (bdev->bd_contains != whole) { + bdev->bd_contains = whole; + bdev->bd_inode->i_data.backing_dev_info = + whole->bd_inode->i_data.backing_dev_info; + bdev->bd_part = p; + bd_set_size(bdev, (loff_t) p->nr_sects << 9); + whole = NULL; + } else { + mutex_unlock(&bdev->bd_mutex); + kobject_put(&p->kobj); + __blkdev_put(whole, 1); + mutex_lock(&bdev->bd_mutex); + } } } else { put_disk(disk); @@ -973,10 +979,6 @@ static int do_open(struct block_device * } if (bdev->bd_invalidated) rescan_partitions(bdev->bd_disk, bdev); - } else { - mutex_lock(&bdev->bd_contains->bd_mutex); - bdev->bd_contains->bd_part_count++; - mutex_unlock(&bdev->bd_contains->bd_mutex); } } bdev->bd_openers++; @@ -984,11 +986,12 @@ static int do_open(struct block_device * unlock_kernel(); return 0; +out_first_lock: + if (whole) + __blkdev_put(whole, 1); + mutex_lock(&bdev->bd_mutex); out_first: bdev->bd_disk = NULL; - bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; - if (bdev != bdev->bd_contains) - blkdev_put(bdev->bd_contains); bdev->bd_contains = NULL; put_disk(disk); module_put(owner); @@ -1049,14 +1052,17 @@ static int blkdev_open(struct inode * in return res; } -int blkdev_put(struct block_device *bdev) +static int __blkdev_put(struct block_device *bdev, int part) { int ret = 0; struct inode *bd_inode = bdev->bd_inode; struct gendisk *disk = bdev->bd_disk; + struct block_device *victim = NULL; mutex_lock(&bdev->bd_mutex); lock_kernel(); + if (part) + bdev->bd_part_count--; if (!--bdev->bd_openers) { sync_blockdev(bdev); kill_bdev(bdev); @@ -1064,10 +1070,6 @@ int blkdev_put(struct block_device *bdev if (bdev->bd_contains == bdev) { if (disk->fops->release) ret = disk->fops->release(bd_inode, NULL); - } else { - mutex_lock(&bdev->bd_contains->bd_mutex); - bdev->bd_contains->bd_part_count--; - mutex_unlock(&bdev->bd_contains->bd_mutex); } if (!bdev->bd_openers) { struct module *owner = disk->fops->owner; @@ -1081,17 +1083,23 @@ int blkdev_put(struct block_device *bdev } bdev->bd_disk = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; - if (bdev != bdev->bd_contains) { - blkdev_put(bdev->bd_contains); - } + if (bdev != bdev->bd_contains) + victim = bdev->bd_contains; bdev->bd_contains = NULL; } unlock_kernel(); mutex_unlock(&bdev->bd_mutex); + if (victim) + __blkdev_put(victim, 1); bdput(bdev); return ret; } +int blkdev_put(struct block_device *bdev) +{ + return __blkdev_put(bdev, 0); +} + EXPORT_SYMBOL(blkdev_put); static int blkdev_close(struct inode * inode, struct file * filp) ^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2006-09-29 18:31 UTC | newest] Thread overview: 7+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2006-09-13 17:43 [PATCH 0/2] new bd_mutex lockdep annotation Peter Zijlstra 2006-09-13 17:43 ` [PATCH 1/2] remove the old " Peter Zijlstra 2006-09-13 17:43 ` [PATCH 2/2] new " Peter Zijlstra 2006-09-13 18:15 ` Arjan van de Ven 2006-09-14 7:11 ` Neil Brown 2006-09-14 8:50 ` Peter Zijlstra 2006-09-29 18:30 ` Peter Zijlstra
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox