All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: Jens Axboe <axboe@kernel.dk>, NeilBrown <neilb@suse.de>,
	Tejun Heo <htejun@gmail.com>
Cc: linux-kernel <linux-kernel@vger.kernel.org>,
	linux-raid <linux-raid@vger.kernel.org>,
	"Neubauer, Wojciech" <Wojciech.Neubauer@intel.com>,
	Ed Ciechanowski <ed.ciechanowski@intel.com>
Subject: md versus partition scanning (bd_invalidated)
Date: Wed, 21 Jul 2010 12:52:46 -0700	[thread overview]
Message-ID: <1279741966.25713.31.camel@dwillia2-linux> (raw)

Hi,

We (Intel software raid devs) are seeing a problem with the detection of
partitions on md devices.  The trace below shows that the block device
inode is dropped in-between when the array comes up and when it is first
opened (timestamp 655.498875).  When this happens bdev->bd_invalidated
is cleared by bdget() and we never detect partitions.  (Seen on a 2.6.32
based kernel, but I presume it is still present)

# tracer: nop
#
#           TASK-PID    CPU#    TIMESTAMP  FUNCTION
#              | |       |          |         |
           <...>-1114  [000]   655.488730: check_disk_size_change: ffff8800374d3780: (md1) check_disk_size_change:1103 open: 1 invalidated: 0
           <...>-1114  [000]   655.497906: check_disk_size_change: ffff8800374d3780: (md1) check_disk_size_change:1112 open: 1 invalidated: 0
           <...>-1114  [000]   655.497908: flush_disk: ffff8800374d3780: (md1) flush_disk:1084 open: 1 invalidated: 1
           <...>-1114  [000]   655.497909: flush_disk: ffff8800374d3780: (md1) flush_disk:1086 open: 1 invalidated: 1
           <...>-1117  [003]   655.498875: bdget: ffff8800375aec40: () bdget:595 open: 0 invalidated: 0
           <...>-1117  [003]   655.498878: __blkdev_get: ffff8800375aec40: (md1) __blkdev_get:1229 open: 0 invalidated: 0
           <...>-1117  [003]   655.498879: __blkdev_get: ffff8800375aec40: (md1) __blkdev_get:1233 open: 0 invalidated: 0
           <...>-1117  [003]   655.498880: __blkdev_get: ffff8800375aec40: (md1) __blkdev_get:1239 open: 0 invalidated: 0
           <...>-1117  [003]   655.498882: __blkdev_get: ffff8800375aec40: (md1) __blkdev_get:1307 open: 1 invalidated: 0

These traces generated by:

#define dbg(bdev) ({ if (debug_partitions) {\
        char name[BDEVNAME_SIZE] = "";\
        if (bdev->bd_disk)\
                disk_name(bdev->bd_disk, 0, name);\
        trace_printk("%p: (%s) %s:%d open: %d invalidated: %d\n", bdev, name, __func__, __LINE__, bdev->bd_openers, bdev->bd_invalidated); \
        } 0; })

The patch below (2.6.32 based) moves the block_device bd_invalidated
field to a gendisk flag, as it seems this info wants a longer lifetime.

Thoughts on this fix?  Maybe it wants to be a standalone integer flag so
we don't need to add locking to nbd.

Thanks,
Dan

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index cc923a5..de8a4a4 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -607,8 +607,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
 			if (S_ISSOCK(inode->i_mode)) {
 				lo->file = file;
 				lo->sock = SOCKET_I(inode);
+				mutex_lock(&bdev->bd_mutex);
 				if (max_part > 0)
-					bdev->bd_invalidated = 1;
+					bdev->bd_disk->flags |= GENHD_FL_INVALIDATED;
+				mutex_unlock(&bdev->bd_mutex);
 				return 0;
 			} else {
 				fput(file);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 6dcee88..147f449 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -571,7 +571,6 @@ struct block_device *bdget(dev_t dev)
 		bdev->bd_inode = inode;
 		bdev->bd_block_size = (1 << inode->i_blkbits);
 		bdev->bd_part_count = 0;
-		bdev->bd_invalidated = 0;
 		inode->i_mode = S_IFBLK;
 		inode->i_rdev = dev;
 		inode->i_bdev = bdev;
@@ -1069,7 +1068,7 @@ static void flush_disk(struct block_device *bdev)
 	if (!bdev->bd_disk)
 		return;
 	if (disk_partitionable(bdev->bd_disk))
-		bdev->bd_invalidated = 1;
+		bdev->bd_disk->flags |= GENHD_FL_INVALIDATED;
 }
 
 /**
@@ -1243,7 +1242,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 					bdi = &default_backing_dev_info;
 				bdev->bd_inode->i_data.backing_dev_info = bdi;
 			}
-			if (bdev->bd_invalidated)
+			if (bdev->bd_disk->flags & GENHD_FL_INVALIDATED)
 				rescan_partitions(disk, bdev);
 		} else {
 			struct block_device *whole;
@@ -1276,7 +1275,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 				if (ret)
 					goto out_unlock_bdev;
 			}
-			if (bdev->bd_invalidated)
+			if (bdev->bd_disk->flags & GENHD_FL_INVALIDATED)
 				rescan_partitions(bdev->bd_disk, bdev);
 		}
 	}
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index e8865c1..7872269 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -519,7 +519,7 @@ void register_disk(struct gendisk *disk)
 	if (!bdev)
 		goto exit;
 
-	bdev->bd_invalidated = 1;
+	disk->flags |= GENHD_FL_INVALIDATED;
 	err = blkdev_get(bdev, FMODE_READ);
 	if (err < 0)
 		goto exit;
@@ -558,7 +558,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 	if (disk->fops->revalidate_disk)
 		disk->fops->revalidate_disk(disk);
 	check_disk_size_change(disk, bdev);
-	bdev->bd_invalidated = 0;
+	bdev->bd_disk->flags &= ~GENHD_FL_INVALIDATED;
 	if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
 		return 0;
 	if (IS_ERR(state))	/* I/O error reading the partition table */
@@ -609,7 +609,7 @@ try_scan:
 				if (capacity > get_capacity(disk)) {
 					set_capacity(disk, capacity);
 					check_disk_size_change(disk, bdev);
-					bdev->bd_invalidated = 0;
+					bdev->bd_disk->flags &= ~GENHD_FL_INVALIDATED;
 				}
 				goto try_scan;
 			} else {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1e58fc8..367875a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -661,7 +661,6 @@ struct block_device {
 	struct hd_struct *	bd_part;
 	/* number of times partitions within this device have been opened. */
 	unsigned		bd_part_count;
-	int			bd_invalidated;
 	struct gendisk *	bd_disk;
 	struct list_head	bd_list;
 	/*
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c6c0c41..d97cdec 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -116,6 +116,7 @@ struct hd_struct {
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
 #define GENHD_FL_EXT_DEVT			64 /* allow extended devt */
 #define GENHD_FL_NATIVE_CAPACITY		128
+#define GENHD_FL_INVALIDATED			256
 
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))



             reply	other threads:[~2010-07-21 19:52 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-07-21 19:52 Dan Williams [this message]
2010-07-28 19:23 ` md versus partition scanning (bd_invalidated) Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1279741966.25713.31.camel@dwillia2-linux \
    --to=dan.j.williams@intel.com \
    --cc=Wojciech.Neubauer@intel.com \
    --cc=axboe@kernel.dk \
    --cc=ed.ciechanowski@intel.com \
    --cc=htejun@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.