linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1] md: add sync-bitmap to only resync WRITTEN data when adding new disk in raid array.
@ 2013-06-25  8:51 Robin Dong
  2013-06-25  8:51 ` [PATCH v1] mdadm: " Robin Dong
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Robin Dong @ 2013-06-25  8:51 UTC (permalink / raw)
  To: linux-raid; +Cc: Robin Dong, NeilBrown

From: Robin Dong <sanbai@taobao.com>

Add a new bitmap type named "sync-bitmap" for md, all the WRITTEN data will be
marked and when adding a new disk, the md will only resync WRITTEN data to
new disk therefore it will save a lot of time and reduce disk-durability.

We add the "sync-bitmap" behind the "write-intent-bitmap", not closely but
aligned to PAGE_SIZE:

|          page0                       |        page1                   |
+--------------------------------------+--------------------------------+
|bitmap_super and write-intent-bitmap  |         sync-bitmap            |

all the write-operation will set the bit in sync-bitmap.


TEST CASE:

	mdadm --create /dev/md1 --bitmap=internal --chunk=64 --level=1 --raid-devices=2 /dev/sdf missing --assume-clean
	mkfs.ext4 /dev/md1
	mount -t ext4 /dev/md1 /mnt/
	cp kernel.tgz /mnt/
	reboot
	mdadm --assemble /dev/md1 /dev/sdf
	mdadm --add /dev/md1 /dev/sdg
	echo offline > /sys/block/sdf/device/state
	mount -t ext4 /dev/md1 /mnt/ (mount success)
	cksum /mnt/kernel.tgz        (cksum ok)

TODO:

	* Allow "discard" to clear bit in sync-bitmap
	* More complicated test case on raid5

Signed-off-by: Robin Dong <sanbai@taobao.com>
Cc: NeilBrown <neilb@suse.de>
---
 drivers/md/bitmap.c            |  195 ++++++++++++++++++++++++++++++++++++++--
 drivers/md/bitmap.h            |    5 +
 drivers/md/md.c                |    7 ++-
 drivers/md/md.h                |    1 +
 drivers/md/raid1.c             |    7 ++
 drivers/md/raid5.c             |    7 ++
 include/uapi/linux/raid/md_p.h |    2 +
 7 files changed, 217 insertions(+), 7 deletions(-)

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 5a2c754..86279e1 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -30,6 +30,13 @@
 #include "md.h"
 #include "bitmap.h"
 
+static inline sector_t syncbitmap_offset(struct bitmap *bitmap, sector_t block)
+{
+	return block +
+		(bitmap->syncbitmap_num_pages << bitmap->counts.chunkshift
+		 << PAGE_SHIFT << 3);
+}
+
 static inline char *bmname(struct bitmap *bitmap)
 {
 	return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
@@ -682,18 +689,40 @@ static inline struct page *filemap_get_page(struct bitmap_storage *store,
 			      - file_page_index(store, 0)];
 }
 
-static int bitmap_storage_alloc(struct bitmap_storage *store,
-				unsigned long chunks, int with_super)
+static void chunks_to_pages(unsigned long chunks, unsigned long *res_bytes,
+			unsigned long *res_pages, int with_super)
 {
-	int pnum;
-	unsigned long num_pages;
 	unsigned long bytes;
 
 	bytes = DIV_ROUND_UP(chunks, 8);
 	if (with_super)
 		bytes += sizeof(bitmap_super_t);
 
-	num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
+	if (res_bytes)
+		*res_bytes = bytes;
+	if (res_pages)
+		*res_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
+}
+
+static int bitmap_storage_alloc(struct bitmap_storage *store,
+				unsigned long chunks, int with_super,
+				int with_sync_bitmap)
+{
+	int pnum;
+	unsigned long num_pages;
+	unsigned long bytes;
+	unsigned long syncbitmap_num_pages;
+
+	chunks_to_pages(chunks, &bytes, &num_pages, with_super);
+	/* we need two bitmaps: write-intent-bitmap and sync-bitmap, sync-bitmap
+	 * locates behind write-intent-bitmap closely. write-intent-bit maps
+	 * "this was written recently, a resync might be needed after a crash"
+	 * and the sync-bit maps "This has been written since array create,
+	 * so the chunk needs to be recovered to any spare".
+	 */
+	chunks_to_pages(chunks, NULL, &syncbitmap_num_pages, 0);
+	if (with_sync_bitmap)
+		num_pages += syncbitmap_num_pages;
 
 	store->filemap = kmalloc(sizeof(struct page *)
 				 * num_pages, GFP_KERNEL);
@@ -853,6 +882,41 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
 	set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
 }
 
+static int syncbitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
+{
+	unsigned long bit;
+	struct page *page;
+	void *kaddr;
+	unsigned long chunk;
+	int res;
+
+	chunk = syncbitmap_offset(bitmap, block) >> bitmap->counts.chunkshift;
+
+	page = filemap_get_page(&bitmap->storage, chunk);
+	if (!page)
+		return 1;
+	bit = file_page_offset(&bitmap->storage, chunk);
+
+	/* set the bit */
+	kaddr = kmap_atomic(page);
+	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
+		res = test_bit(bit, kaddr);
+	else
+		res = test_bit_le(bit, kaddr);
+	kunmap_atomic(kaddr);
+	pr_debug("test syncbitmap bit %lu page %lu\n", bit, page->index);
+	return res;
+}
+
+/*
+ * syncbitmap_file_set_bit -- set the bit in sync-bitmap, just jump out
+ * the offset of write-intent-bitmap.
+ */
+static void syncbitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
+{
+	bitmap_file_set_bit(bitmap, syncbitmap_offset(bitmap, block));
+}
+
 static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
 {
 	unsigned long bit;
@@ -1038,6 +1102,61 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
 		offset = 0;
 	}
 
+	if (bitmap->mddev->bitmap_info.sync_bitmap) {
+		for (i = 0; i < chunks; i++) {
+			int b;
+			index = file_page_index(&bitmap->storage, i) +
+				bitmap->syncbitmap_num_pages;
+			bit = file_page_offset(&bitmap->storage, i);
+			if (index != oldindex) {
+				/* this is a new page, read it in */
+				page = store->filemap[index];
+				if (file)
+					ret = read_page(file, index, bitmap,
+							PAGE_SIZE, page);
+				else
+					ret = read_sb_page(
+						bitmap->mddev,
+						bitmap->mddev->bitmap_info.offset,
+						page,
+						index, PAGE_SIZE);
+				if (ret)
+					goto err;
+
+				oldindex = index;
+
+				if (outofdate) {
+					/*
+					 * if bitmap is out of date, dirty the
+					 * whole page and write it out
+					 */
+					paddr = kmap_atomic(page);
+					memset(paddr + offset, 0xff,
+						   PAGE_SIZE - offset);
+					kunmap_atomic(paddr);
+					write_page(bitmap, page, 1);
+
+					ret = -EIO;
+					if (test_bit(BITMAP_WRITE_ERROR,
+							 &bitmap->flags))
+						goto err;
+				}
+			}
+			paddr = kmap_atomic(page);
+			if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
+				b = test_bit(bit, paddr);
+			else
+				b = test_bit_le(bit, paddr);
+			kunmap_atomic(paddr);
+			if (b) {
+				/* if the disk bit is set, set the memory bit */
+				syncbitmap_file_set_bit(bitmap, (sector_t)i <<
+						bitmap->counts.chunkshift);
+				bit_cnt++;
+			}
+			offset = 0;
+		}
+	}
 	printk(KERN_INFO "%s: bitmap initialized from disk: "
 	       "read %lu pages, set %lu of %lu bits\n",
 	       bmname(bitmap), store->file_pages,
@@ -1303,6 +1422,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
 			continue;
 		}
 
+		syncbitmap_file_set_bit(bitmap, offset);
 		switch (*bmc) {
 		case 0:
 			bitmap_file_set_bit(bitmap, offset);
@@ -1431,6 +1551,42 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
 }
 EXPORT_SYMBOL(bitmap_start_sync);
 
+int __syncbitmap_start_sync(struct bitmap *bitmap, sector_t offset,
+						sector_t *blocks)
+{
+	int res;
+	unsigned long csize;
+	if (bitmap == NULL) {
+		*blocks = 1024;
+		return 1;
+	}
+
+	spin_lock_irq(&bitmap->counts.lock);
+	res = syncbitmap_file_test_bit(bitmap, offset);
+	if (res) {
+		csize = ((sector_t)1) << bitmap->counts.chunkshift;
+		*blocks = csize - (offset & (csize - 1));
+	}
+	spin_unlock_irq(&bitmap->counts.lock);
+	return res;
+}
+
+int syncbitmap_start_sync(struct bitmap *bitmap, sector_t offset,
+						sector_t *blocks)
+{
+	int rv = 0;
+	sector_t blocks1;
+
+	*blocks = 0;
+	while (*blocks < (PAGE_SIZE>>9)) {
+		rv |= __syncbitmap_start_sync(bitmap, offset, &blocks1);
+		offset += blocks1;
+		*blocks += blocks1;
+	}
+	return rv;
+}
+EXPORT_SYMBOL(syncbitmap_start_sync);
+
 void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
 {
 	bitmap_counter_t *bmc;
@@ -1805,6 +1961,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
 	sector_t old_blocks, new_blocks;
 	int chunkshift;
 	int ret = 0;
+	unsigned long pnum, old_pnum, num_pages, old_num_pages;
 	long pages;
 	struct bitmap_page *new_bp;
 
@@ -1842,7 +1999,8 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
 	memset(&store, 0, sizeof(store));
 	if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
 		ret = bitmap_storage_alloc(&store, chunks,
-					   !bitmap->mddev->bitmap_info.external);
+				!bitmap->mddev->bitmap_info.external,
+				bitmap->mddev->bitmap_info.sync_bitmap);
 	if (ret)
 		goto err;
 
@@ -1865,6 +2023,31 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
 		memcpy(page_address(store.sb_page),
 		       page_address(bitmap->storage.sb_page),
 		       sizeof(bitmap_super_t));
+	if (bitmap->mddev->bitmap_info.sync_bitmap) {
+		/* copy old sync-bitmap to new one */
+		chunks_to_pages(chunks, NULL, &pnum,
+					!bitmap->mddev->bitmap_info.external);
+		bitmap->syncbitmap_num_pages = pnum;
+		if (bitmap->storage.filemap) {
+			chunks_to_pages(bitmap->counts.chunks, NULL, &old_pnum,
+				!bitmap->mddev->bitmap_info.external);
+			num_pages = pnum * 2;
+			old_num_pages = old_pnum * 2;
+			pnum++;
+			old_pnum++;
+			for (; pnum <= num_pages && old_pnum <= old_num_pages;
+					pnum++, old_pnum++) {
+				memcpy(store.filemap[pnum],
+					bitmap->storage.filemap[old_pnum],
+					PAGE_SIZE);
+				/* All new sync-bitmap data
+				 * shoule be write out */
+				set_bit((pnum << 2) + BITMAP_PAGE_DIRTY,
+					store.filemap_attr);
+			}
+		}
+	}
+
 	bitmap_file_unmap(&bitmap->storage);
 	bitmap->storage = store;
 
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index df4aeb6..87c4686 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -226,6 +226,7 @@ struct bitmap {
 	wait_queue_head_t behind_wait;
 
 	struct sysfs_dirent *sysfs_can_clear;
+	unsigned long syncbitmap_num_pages;
 };
 
 /* the bitmap API */
@@ -252,6 +253,10 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
 			unsigned long sectors, int success, int behind);
 int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
 void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
+
+int syncbitmap_start_sync(struct bitmap *bitmap, sector_t offset,
+						sector_t *blocks);
+
 void bitmap_close_sync(struct bitmap *bitmap);
 void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 681d109..fb81a01 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1621,6 +1621,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
 		mddev->events = ev1;
 		mddev->bitmap_info.offset = 0;
 		mddev->bitmap_info.space = 0;
+		mddev->bitmap_info.sync_bitmap = 0;
 		/* Default location for bitmap is 1K after superblock
 		 * using 3K - total of 4K
 		 */
@@ -1652,6 +1653,9 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
 					-mddev->bitmap_info.offset;
 		}
 
+		if (le32_to_cpu(sb->feature_map) & MD_FEATURE_SYNCBITMAP)
+			mddev->bitmap_info.sync_bitmap = 1;
+
 		if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
 			mddev->reshape_position = le64_to_cpu(sb->reshape_position);
 			mddev->delta_disks = le32_to_cpu(sb->delta_disks);
@@ -1762,7 +1766,8 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
 
 	if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
 		sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
-		sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
+		sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET |
+					MD_FEATURE_SYNCBITMAP);
 	}
 
 	if (rdev->raid_disk >= 0 &&
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 653f992..1cef001 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -404,6 +404,7 @@ struct mddev {
 		unsigned long		daemon_sleep; /* how many jiffies between updates? */
 		unsigned long		max_write_behind; /* write-behind mode */
 		int			external;
+		int			sync_bitmap;
 	} bitmap_info;
 
 	atomic_t 			max_corr_read_errors; /* max read retries */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 5595118..ba47ee7 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2396,6 +2396,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
 		*skipped = 1;
 		return sync_blocks;
 	}
+
+	if (conf->fullsync && !syncbitmap_start_sync(mddev->bitmap,
+				sector_nr, &sync_blocks)) {
+		*skipped = 1;
+		return sync_blocks;
+	}
+
 	/*
 	 * If there is non-resync activity waiting for a turn,
 	 * and resync is going fast enough,
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9359828..7528aa8 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4688,6 +4688,13 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
 		return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
 	}
 
+	if (conf->fullsync && sync_blocks >= STRIPE_SECTORS &&
+	    !syncbitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks)) {
+		sync_blocks /= STRIPE_SECTORS;
+		*skipped = 1;
+		return sync_blocks * STRIPE_SECTORS;
+	}
+
 	bitmap_cond_end_sync(mddev->bitmap, sector_nr);
 
 	sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index fe1a540..7949f61 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -291,6 +291,7 @@ struct mdp_superblock_1 {
 					    * backwards anyway.
 					    */
 #define	MD_FEATURE_NEW_OFFSET		64 /* new_offset must be honoured */
+#define	MD_FEATURE_SYNCBITMAP		128
 #define	MD_FEATURE_ALL			(MD_FEATURE_BITMAP_OFFSET	\
 					|MD_FEATURE_RECOVERY_OFFSET	\
 					|MD_FEATURE_RESHAPE_ACTIVE	\
@@ -298,6 +299,7 @@ struct mdp_superblock_1 {
 					|MD_FEATURE_REPLACEMENT		\
 					|MD_FEATURE_RESHAPE_BACKWARDS	\
 					|MD_FEATURE_NEW_OFFSET		\
+					|MD_FEATURE_SYNCBITMAP		\
 					)
 
 #endif 
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v1] mdadm: add sync-bitmap to only resync WRITTEN data when adding new disk in raid array.
  2013-06-25  8:51 [PATCH v1] md: add sync-bitmap to only resync WRITTEN data when adding new disk in raid array Robin Dong
@ 2013-06-25  8:51 ` Robin Dong
  2013-06-26  2:26 ` [PATCH v1] md: " NeilBrown
       [not found] ` <201306260851362261286@gmail.com>
  2 siblings, 0 replies; 4+ messages in thread
From: Robin Dong @ 2013-06-25  8:51 UTC (permalink / raw)
  To: linux-raid; +Cc: Robin Dong, Robin Dong, NeilBrown

We add a new feature named "MD_FEATURE_SYNCBITMAP" and set all bit to zero for initializing of sync-bitmap.

Signed-off-by: Robin Dong<sanbai@taobao.com>
Cc: NeilBrown <neilb@suse.de>
---
 Grow.c   |    1 -
 super1.c |   24 +++++++++++++++++++++---
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/Grow.c b/Grow.c
index 948fc8d..44772bf 100644
--- a/Grow.c
+++ b/Grow.c
@@ -1541,7 +1541,6 @@ int Grow_reshape(char *devname, int fd,
 	struct mdinfo info;
 	struct mdinfo *sra;
 
-
 	if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
 		fprintf(stderr, Name ": %s is not an active md array - aborting\n",
 			devname);
diff --git a/super1.c b/super1.c
index d0f1d5f..6240aac 100644
--- a/super1.c
+++ b/super1.c
@@ -124,6 +124,7 @@ struct misc_dev_info {
 					    * backwards anyway.
 					    */
 #define	MD_FEATURE_NEW_OFFSET		64 /* new_offset must be honoured */
+#define	MD_FEATURE_SYNCBITMAP		128
 #define	MD_FEATURE_ALL			(MD_FEATURE_BITMAP_OFFSET	\
 					|MD_FEATURE_RECOVERY_OFFSET	\
 					|MD_FEATURE_RESHAPE_ACTIVE	\
@@ -131,6 +132,7 @@ struct misc_dev_info {
 					|MD_FEATURE_REPLACEMENT		\
 					|MD_FEATURE_RESHAPE_BACKWARDS	\
 					|MD_FEATURE_NEW_OFFSET		\
+					|MD_FEATURE_SYNCBITMAP		\
 					)
 
 #ifndef offsetof
@@ -1961,7 +1963,7 @@ add_internal_bitmap1(struct supertype *st,
 	sb->bitmap_offset = (int32_t)__cpu_to_le32(offset);
 
 	sb->feature_map = __cpu_to_le32(__le32_to_cpu(sb->feature_map)
-					| MD_FEATURE_BITMAP_OFFSET);
+					| MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_SYNCBITMAP);
 	memset(bms, 0, sizeof(*bms));
 	bms->magic = __cpu_to_le32(BITMAP_MAGIC);
 	bms->version = __cpu_to_le32(major);
@@ -2002,7 +2004,7 @@ static int write_bitmap1(struct supertype *st, int fd)
 	bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb)+MAX_SB_SIZE);
 	int rv = 0;
 	void *buf;
-	int towrite, n;
+	int towrite, syncbitmap_towrite, n;
 	struct align_fd afd;
 
 	init_afd(&afd, fd);
@@ -2018,7 +2020,8 @@ static int write_bitmap1(struct supertype *st, int fd)
 	towrite = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
 	towrite = (towrite+7) >> 3; /* bits to bytes */
 	towrite += sizeof(bitmap_super_t);
-	towrite = ROUND_UP(towrite, 512);
+	towrite = ROUND_UP(towrite, 4096);
+	syncbitmap_towrite = towrite;
 	while (towrite > 0) {
 		n = towrite;
 		if (n > 4096)
@@ -2030,6 +2033,21 @@ static int write_bitmap1(struct supertype *st, int fd)
 			break;
 		memset(buf, 0xff, 4096);
 	}
+
+	/* write init sync-bitmap */
+	memset(buf, 0x00, 4096);
+	while (syncbitmap_towrite > 0) {
+		n = syncbitmap_towrite;
+		if (n > 4096)
+			n = 4096;
+		n = awrite(&afd, buf, n);
+		if (n > 0)
+			syncbitmap_towrite -= n;
+		else
+			break;
+		memset(buf, 0x00, 4096);
+	}
+
 	fsync(fd);
 	if (towrite)
 		rv = -2;
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v1] md: add sync-bitmap to only resync WRITTEN data when adding new disk in raid array.
  2013-06-25  8:51 [PATCH v1] md: add sync-bitmap to only resync WRITTEN data when adding new disk in raid array Robin Dong
  2013-06-25  8:51 ` [PATCH v1] mdadm: " Robin Dong
@ 2013-06-26  2:26 ` NeilBrown
       [not found] ` <201306260851362261286@gmail.com>
  2 siblings, 0 replies; 4+ messages in thread
From: NeilBrown @ 2013-06-26  2:26 UTC (permalink / raw)
  To: Robin Dong; +Cc: linux-raid, Robin Dong

[-- Attachment #1: Type: text/plain, Size: 17167 bytes --]

On Tue, 25 Jun 2013 16:51:36 +0800 Robin Dong <robin.k.dong@gmail.com> wrote:

> From: Robin Dong <sanbai@taobao.com>
> 
> Add a new bitmap type named "sync-bitmap" for md, all the WRITTEN data will be
> marked and when adding a new disk, the md will only resync WRITTEN data to
> new disk therefore it will save a lot of time and reduce disk-durability.
> 
> We add the "sync-bitmap" behind the "write-intent-bitmap", not closely but
> aligned to PAGE_SIZE:
> 
> |          page0                       |        page1                   |
> +--------------------------------------+--------------------------------+
> |bitmap_super and write-intent-bitmap  |         sync-bitmap            |
> 
> all the write-operation will set the bit in sync-bitmap.
> 
> 

Hi Robin,
 thanks for the patch.

I think the idea of simply appending the sync bitmap to the end of the
write-intent bitmap is a good one.
I would probably put the flag indicating its presence in the bitmap
superblock rather than the md superblock (maybe as a new bitmap version
number), but that is a fairly small point.  If you encoded the presence of
the sync-bitmap in the version number, it would be clear that there is no
need for a host-endian version (which really is the case. No new hostendian
bitmaps please!).

There is a bigger issue though.  For RAID5, it is important that the array
actually be in-sync, otherwise corruption can occur.
If/when we read read-modify-write cycle for RAID6, this will be true for
RAID6 too.

If part of the array is not in sync, then a read-modify-write cycle will
update incorrect parity to new incorrect parity.  If you subsequently get a
drive failure, any data on that drive in the not-in-sync region will be lost.

So when you set a bit in the sync-bitmap, you need to be sure that the whole
region actually is in-sync.  I don't think your patch does that (or did I
miss it?).

I think the correct sequence would be:
 - on first write to a not-in-sync region, set the write-intent bit and
   schedule a resync.  Don't allow the write-intent bit to be cleared.
 - When the resync finishes, set the sync bit and allow the write-intent bit
   to be cleared.

Then on reboot we would need to resync all regions with the write-intent bit
set (which we do anyway), and then make sure the sync bit is set.

This extra work is not needed for RAID1 and RAID10.  If you only particularly
want the functionality for one of those levels, I could accept a patch which
work much like yours, but restricts the sync bitmap to only be active on
RAID1 and RAID10.

Thanks,
NeilBrown


> TEST CASE:
> 
> 	mdadm --create /dev/md1 --bitmap=internal --chunk=64 --level=1 --raid-devices=2 /dev/sdf missing --assume-clean
> 	mkfs.ext4 /dev/md1
> 	mount -t ext4 /dev/md1 /mnt/
> 	cp kernel.tgz /mnt/
> 	reboot
> 	mdadm --assemble /dev/md1 /dev/sdf
> 	mdadm --add /dev/md1 /dev/sdg
> 	echo offline > /sys/block/sdf/device/state
> 	mount -t ext4 /dev/md1 /mnt/ (mount success)
> 	cksum /mnt/kernel.tgz        (cksum ok)
> 
> TODO:
> 
> 	* Allow "discard" to clear bit in sync-bitmap
> 	* More complicated test case on raid5
> 
> Signed-off-by: Robin Dong <sanbai@taobao.com>
> Cc: NeilBrown <neilb@suse.de>
> ---
>  drivers/md/bitmap.c            |  195 ++++++++++++++++++++++++++++++++++++++--
>  drivers/md/bitmap.h            |    5 +
>  drivers/md/md.c                |    7 ++-
>  drivers/md/md.h                |    1 +
>  drivers/md/raid1.c             |    7 ++
>  drivers/md/raid5.c             |    7 ++
>  include/uapi/linux/raid/md_p.h |    2 +
>  7 files changed, 217 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
> index 5a2c754..86279e1 100644
> --- a/drivers/md/bitmap.c
> +++ b/drivers/md/bitmap.c
> @@ -30,6 +30,13 @@
>  #include "md.h"
>  #include "bitmap.h"
>  
> +static inline sector_t syncbitmap_offset(struct bitmap *bitmap, sector_t block)
> +{
> +	return block +
> +		(bitmap->syncbitmap_num_pages << bitmap->counts.chunkshift
> +		 << PAGE_SHIFT << 3);
> +}
> +
>  static inline char *bmname(struct bitmap *bitmap)
>  {
>  	return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
> @@ -682,18 +689,40 @@ static inline struct page *filemap_get_page(struct bitmap_storage *store,
>  			      - file_page_index(store, 0)];
>  }
>  
> -static int bitmap_storage_alloc(struct bitmap_storage *store,
> -				unsigned long chunks, int with_super)
> +static void chunks_to_pages(unsigned long chunks, unsigned long *res_bytes,
> +			unsigned long *res_pages, int with_super)
>  {
> -	int pnum;
> -	unsigned long num_pages;
>  	unsigned long bytes;
>  
>  	bytes = DIV_ROUND_UP(chunks, 8);
>  	if (with_super)
>  		bytes += sizeof(bitmap_super_t);
>  
> -	num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
> +	if (res_bytes)
> +		*res_bytes = bytes;
> +	if (res_pages)
> +		*res_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
> +}
> +
> +static int bitmap_storage_alloc(struct bitmap_storage *store,
> +				unsigned long chunks, int with_super,
> +				int with_sync_bitmap)
> +{
> +	int pnum;
> +	unsigned long num_pages;
> +	unsigned long bytes;
> +	unsigned long syncbitmap_num_pages;
> +
> +	chunks_to_pages(chunks, &bytes, &num_pages, with_super);
> +	/* we need two bitmaps: write-intent-bitmap and sync-bitmap, sync-bitmap
> +	 * locates behind write-intent-bitmap closely. write-intent-bit maps
> +	 * "this was written recently, a resync might be needed after a crash"
> +	 * and the sync-bit maps "This has been written since array create,
> +	 * so the chunk needs to be recovered to any spare".
> +	 */
> +	chunks_to_pages(chunks, NULL, &syncbitmap_num_pages, 0);
> +	if (with_sync_bitmap)
> +		num_pages += syncbitmap_num_pages;
>  
>  	store->filemap = kmalloc(sizeof(struct page *)
>  				 * num_pages, GFP_KERNEL);
> @@ -853,6 +882,41 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
>  	set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
>  }
>  
> +static int syncbitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
> +{
> +	unsigned long bit;
> +	struct page *page;
> +	void *kaddr;
> +	unsigned long chunk;
> +	int res;
> +
> +	chunk = syncbitmap_offset(bitmap, block) >> bitmap->counts.chunkshift;
> +
> +	page = filemap_get_page(&bitmap->storage, chunk);
> +	if (!page)
> +		return 1;
> +	bit = file_page_offset(&bitmap->storage, chunk);
> +
> +	/* set the bit */
> +	kaddr = kmap_atomic(page);
> +	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
> +		res = test_bit(bit, kaddr);
> +	else
> +		res = test_bit_le(bit, kaddr);
> +	kunmap_atomic(kaddr);
> +	pr_debug("test syncbitmap bit %lu page %lu\n", bit, page->index);
> +	return res;
> +}
> +
> +/*
> + * syncbitmap_file_set_bit -- set the bit in sync-bitmap, just jump out
> + * the offset of write-intent-bitmap.
> + */
> +static void syncbitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
> +{
> +	bitmap_file_set_bit(bitmap, syncbitmap_offset(bitmap, block));
> +}
> +
>  static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
>  {
>  	unsigned long bit;
> @@ -1038,6 +1102,61 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
>  		offset = 0;
>  	}
>  
> +	if (bitmap->mddev->bitmap_info.sync_bitmap) {
> +		for (i = 0; i < chunks; i++) {
> +			int b;
> +			index = file_page_index(&bitmap->storage, i) +
> +				bitmap->syncbitmap_num_pages;
> +			bit = file_page_offset(&bitmap->storage, i);
> +			if (index != oldindex) {
> +				/* this is a new page, read it in */
> +				page = store->filemap[index];
> +				if (file)
> +					ret = read_page(file, index, bitmap,
> +							PAGE_SIZE, page);
> +				else
> +					ret = read_sb_page(
> +						bitmap->mddev,
> +						bitmap->mddev->bitmap_info.offset,
> +						page,
> +						index, PAGE_SIZE);
> +				if (ret)
> +					goto err;
> +
> +				oldindex = index;
> +
> +				if (outofdate) {
> +					/*
> +					 * if bitmap is out of date, dirty the
> +					 * whole page and write it out
> +					 */
> +					paddr = kmap_atomic(page);
> +					memset(paddr + offset, 0xff,
> +						   PAGE_SIZE - offset);
> +					kunmap_atomic(paddr);
> +					write_page(bitmap, page, 1);
> +
> +					ret = -EIO;
> +					if (test_bit(BITMAP_WRITE_ERROR,
> +							 &bitmap->flags))
> +						goto err;
> +				}
> +			}
> +			paddr = kmap_atomic(page);
> +			if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
> +				b = test_bit(bit, paddr);
> +			else
> +				b = test_bit_le(bit, paddr);
> +			kunmap_atomic(paddr);
> +			if (b) {
> +				/* if the disk bit is set, set the memory bit */
> +				syncbitmap_file_set_bit(bitmap, (sector_t)i <<
> +						bitmap->counts.chunkshift);
> +				bit_cnt++;
> +			}
> +			offset = 0;
> +		}
> +	}
>  	printk(KERN_INFO "%s: bitmap initialized from disk: "
>  	       "read %lu pages, set %lu of %lu bits\n",
>  	       bmname(bitmap), store->file_pages,
> @@ -1303,6 +1422,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
>  			continue;
>  		}
>  
> +		syncbitmap_file_set_bit(bitmap, offset);
>  		switch (*bmc) {
>  		case 0:
>  			bitmap_file_set_bit(bitmap, offset);
> @@ -1431,6 +1551,42 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
>  }
>  EXPORT_SYMBOL(bitmap_start_sync);
>  
> +int __syncbitmap_start_sync(struct bitmap *bitmap, sector_t offset,
> +						sector_t *blocks)
> +{
> +	int res;
> +	unsigned long csize;
> +	if (bitmap == NULL) {
> +		*blocks = 1024;
> +		return 1;
> +	}
> +
> +	spin_lock_irq(&bitmap->counts.lock);
> +	res = syncbitmap_file_test_bit(bitmap, offset);
> +	if (res) {
> +		csize = ((sector_t)1) << bitmap->counts.chunkshift;
> +		*blocks = csize - (offset & (csize - 1));
> +	}
> +	spin_unlock_irq(&bitmap->counts.lock);
> +	return res;
> +}
> +
> +int syncbitmap_start_sync(struct bitmap *bitmap, sector_t offset,
> +						sector_t *blocks)
> +{
> +	int rv = 0;
> +	sector_t blocks1;
> +
> +	*blocks = 0;
> +	while (*blocks < (PAGE_SIZE>>9)) {
> +		rv |= __syncbitmap_start_sync(bitmap, offset, &blocks1);
> +		offset += blocks1;
> +		*blocks += blocks1;
> +	}
> +	return rv;
> +}
> +EXPORT_SYMBOL(syncbitmap_start_sync);
> +
>  void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
>  {
>  	bitmap_counter_t *bmc;
> @@ -1805,6 +1961,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
>  	sector_t old_blocks, new_blocks;
>  	int chunkshift;
>  	int ret = 0;
> +	unsigned long pnum, old_pnum, num_pages, old_num_pages;
>  	long pages;
>  	struct bitmap_page *new_bp;
>  
> @@ -1842,7 +1999,8 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
>  	memset(&store, 0, sizeof(store));
>  	if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
>  		ret = bitmap_storage_alloc(&store, chunks,
> -					   !bitmap->mddev->bitmap_info.external);
> +				!bitmap->mddev->bitmap_info.external,
> +				bitmap->mddev->bitmap_info.sync_bitmap);
>  	if (ret)
>  		goto err;
>  
> @@ -1865,6 +2023,31 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
>  		memcpy(page_address(store.sb_page),
>  		       page_address(bitmap->storage.sb_page),
>  		       sizeof(bitmap_super_t));
> +	if (bitmap->mddev->bitmap_info.sync_bitmap) {
> +		/* copy old sync-bitmap to new one */
> +		chunks_to_pages(chunks, NULL, &pnum,
> +					!bitmap->mddev->bitmap_info.external);
> +		bitmap->syncbitmap_num_pages = pnum;
> +		if (bitmap->storage.filemap) {
> +			chunks_to_pages(bitmap->counts.chunks, NULL, &old_pnum,
> +				!bitmap->mddev->bitmap_info.external);
> +			num_pages = pnum * 2;
> +			old_num_pages = old_pnum * 2;
> +			pnum++;
> +			old_pnum++;
> +			for (; pnum <= num_pages && old_pnum <= old_num_pages;
> +					pnum++, old_pnum++) {
> +				memcpy(store.filemap[pnum],
> +					bitmap->storage.filemap[old_pnum],
> +					PAGE_SIZE);
> +				/* All new sync-bitmap data
> +				 * shoule be write out */
> +				set_bit((pnum << 2) + BITMAP_PAGE_DIRTY,
> +					store.filemap_attr);
> +			}
> +		}
> +	}
> +
>  	bitmap_file_unmap(&bitmap->storage);
>  	bitmap->storage = store;
>  
> diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
> index df4aeb6..87c4686 100644
> --- a/drivers/md/bitmap.h
> +++ b/drivers/md/bitmap.h
> @@ -226,6 +226,7 @@ struct bitmap {
>  	wait_queue_head_t behind_wait;
>  
>  	struct sysfs_dirent *sysfs_can_clear;
> +	unsigned long syncbitmap_num_pages;
>  };
>  
>  /* the bitmap API */
> @@ -252,6 +253,10 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
>  			unsigned long sectors, int success, int behind);
>  int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
>  void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
> +
> +int syncbitmap_start_sync(struct bitmap *bitmap, sector_t offset,
> +						sector_t *blocks);
> +
>  void bitmap_close_sync(struct bitmap *bitmap);
>  void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
>  
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 681d109..fb81a01 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -1621,6 +1621,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
>  		mddev->events = ev1;
>  		mddev->bitmap_info.offset = 0;
>  		mddev->bitmap_info.space = 0;
> +		mddev->bitmap_info.sync_bitmap = 0;
>  		/* Default location for bitmap is 1K after superblock
>  		 * using 3K - total of 4K
>  		 */
> @@ -1652,6 +1653,9 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
>  					-mddev->bitmap_info.offset;
>  		}
>  
> +		if (le32_to_cpu(sb->feature_map) & MD_FEATURE_SYNCBITMAP)
> +			mddev->bitmap_info.sync_bitmap = 1;
> +
>  		if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
>  			mddev->reshape_position = le64_to_cpu(sb->reshape_position);
>  			mddev->delta_disks = le32_to_cpu(sb->delta_disks);
> @@ -1762,7 +1766,8 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
>  
>  	if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
>  		sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
> -		sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
> +		sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET |
> +					MD_FEATURE_SYNCBITMAP);
>  	}
>  
>  	if (rdev->raid_disk >= 0 &&
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 653f992..1cef001 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -404,6 +404,7 @@ struct mddev {
>  		unsigned long		daemon_sleep; /* how many jiffies between updates? */
>  		unsigned long		max_write_behind; /* write-behind mode */
>  		int			external;
> +		int			sync_bitmap;
>  	} bitmap_info;
>  
>  	atomic_t 			max_corr_read_errors; /* max read retries */
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index 5595118..ba47ee7 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -2396,6 +2396,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
>  		*skipped = 1;
>  		return sync_blocks;
>  	}
> +
> +	if (conf->fullsync && !syncbitmap_start_sync(mddev->bitmap,
> +				sector_nr, &sync_blocks)) {
> +		*skipped = 1;
> +		return sync_blocks;
> +	}
> +
>  	/*
>  	 * If there is non-resync activity waiting for a turn,
>  	 * and resync is going fast enough,
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 9359828..7528aa8 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -4688,6 +4688,13 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
>  		return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
>  	}
>  
> +	if (conf->fullsync && sync_blocks >= STRIPE_SECTORS &&
> +	    !syncbitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks)) {
> +		sync_blocks /= STRIPE_SECTORS;
> +		*skipped = 1;
> +		return sync_blocks * STRIPE_SECTORS;
> +	}
> +
>  	bitmap_cond_end_sync(mddev->bitmap, sector_nr);
>  
>  	sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
> diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
> index fe1a540..7949f61 100644
> --- a/include/uapi/linux/raid/md_p.h
> +++ b/include/uapi/linux/raid/md_p.h
> @@ -291,6 +291,7 @@ struct mdp_superblock_1 {
>  					    * backwards anyway.
>  					    */
>  #define	MD_FEATURE_NEW_OFFSET		64 /* new_offset must be honoured */
> +#define	MD_FEATURE_SYNCBITMAP		128
>  #define	MD_FEATURE_ALL			(MD_FEATURE_BITMAP_OFFSET	\
>  					|MD_FEATURE_RECOVERY_OFFSET	\
>  					|MD_FEATURE_RESHAPE_ACTIVE	\
> @@ -298,6 +299,7 @@ struct mdp_superblock_1 {
>  					|MD_FEATURE_REPLACEMENT		\
>  					|MD_FEATURE_RESHAPE_BACKWARDS	\
>  					|MD_FEATURE_NEW_OFFSET		\
> +					|MD_FEATURE_SYNCBITMAP		\
>  					)
>  
>  #endif 


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 828 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: Re: [PATCH v1] md: add sync-bitmap to only resync WRITTEN data when adding new disk in raid array.
       [not found]   ` <CANsebLG00tpjOEuYzUo=PByT+wu-w8Z77XnFG2W1dHMVgLbUfQ@mail.gmail.com>
@ 2013-07-01 11:40     ` majianpeng
  0 siblings, 0 replies; 4+ messages in thread
From: majianpeng @ 2013-07-01 11:40 UTC (permalink / raw)
  To: Robin Dong; +Cc: linux-raid, Robin Dong, NeilBrown

>Hi Jiapeng,
>
>The granularity of file-system and md is very different. For example, if
>one bit in bitmap tag as 16MB chunk, but the file-system only write first
>block (4K) in this chunk, the bit now must be set.
>But when some one read the end of this chunk, we can't know where is the
>no-sync area and where is the sync area in this 16MB chunk.
>
Yes.If the area of reading is like:
|--sync-area---|---no-sync-area--|
It should split the original bio into two or more bio.
One for sync-area,it should read from low-driver.
Other for no-sync-area, it should only retrun zero like sdd.

Thanks!
Jianpeng
>
>2013/6/26 kernelmail <kedacomkernel@gmail.com>
>
>> >From: Robin Dong <sanbai@taobao.com>
>> >
>> >Add a new bitmap type named "sync-bitmap" for md, all the WRITTEN data
>> will be
>> >marked and when adding a new disk, the md will only resync WRITTEN data to
>> >new disk therefore it will save a lot of time and reduce disk-durability.
>> >
>> >We add the "sync-bitmap" behind the "write-intent-bitmap", not closely but
>> >aligned to PAGE_SIZE:
>> >
>> >|          page0                       |        page1                   |
>> >+--------------------------------------+--------------------------------+
>> >|bitmap_super and write-intent-bitmap  |         sync-bitmap            |
>> >
>> >all the write-operation will set the bit in sync-bitmap.
>> >
>> >
>> I very like this feature. But for the read/write for no-sync area, your
>> patch can't do.
>> I think for read from no-sync area, the operation like ssd or
>> thin-provision, it should return 0.
>>
>> Thanks
>> Jianpeng Ma
>> >TEST CASE:
>> >
>> >       mdadm --create /dev/md1 --bitmap=internal --chunk=64 --level=1
>> --raid-devices=2 /dev/sdf missing --assume-clean
>> >       mkfs.ext4 /dev/md1
>> >       mount -t ext4 /dev/md1 /mnt/
>> >       cp kernel.tgz /mnt/
>> >       reboot
>> >       mdadm --assemble /dev/md1 /dev/sdf
>> >       mdadm --add /dev/md1 /dev/sdg
>> >       echo offline > /sys/block/sdf/device/state
>> >       mount -t ext4 /dev/md1 /mnt/ (mount success)
>> >       cksum /mnt/kernel.tgz        (cksum ok)
>> >
>> >TODO:
>> >
>> >       * Allow "discard" to clear bit in sync-bitmap
>> >       * More complicated test case on raid5
>> >
>> >Signed-off-by: Robin Dong <sanbai@taobao.com>
>> >Cc: NeilBrown <neilb@suse.de>
>> >---
>> > drivers/md/bitmap.c            |  195
>> ++++++++++++++++++++++++++++++++++++++--
>> > drivers/md/bitmap.h            |    5 +
>> > drivers/md/md.c                |    7 ++-
>> > drivers/md/md.h                |    1 +
>> > drivers/md/raid1.c             |    7 ++
>> > drivers/md/raid5.c             |    7 ++
>> > include/uapi/linux/raid/md_p.h |    2 +
>> > 7 files changed, 217 insertions(+), 7 deletions(-)
>> >
>> >diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
>> >index 5a2c754..86279e1 100644
>> >--- a/drivers/md/bitmap.c
>> >+++ b/drivers/md/bitmap.c
>> >@@ -30,6 +30,13 @@
>> > #include "md.h"
>> > #include "bitmap.h"
>> >
>> >+static inline sector_t syncbitmap_offset(struct bitmap *bitmap, sector_t
>> block)
>> >+{
>> >+      return block +
>> >+              (bitmap->syncbitmap_num_pages << bitmap->counts.chunkshift
>> >+               << PAGE_SHIFT << 3);
>> >+}
>> >+
>> > static inline char *bmname(struct bitmap *bitmap)
>> > {
>> >       return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
>> >@@ -682,18 +689,40 @@ static inline struct page *filemap_get_page(struct
>> bitmap_storage *store,
>> >                             - file_page_index(store, 0)];
>> > }
>> >
>> >-static int bitmap_storage_alloc(struct bitmap_storage *store,
>> >-                              unsigned long chunks, int with_super)
>> >+static void chunks_to_pages(unsigned long chunks, unsigned long
>> *res_bytes,
>> >+                      unsigned long *res_pages, int with_super)
>> > {
>> >-      int pnum;
>> >-      unsigned long num_pages;
>> >       unsigned long bytes;
>> >
>> >       bytes = DIV_ROUND_UP(chunks, 8);
>> >       if (with_super)
>> >               bytes += sizeof(bitmap_super_t);
>> >
>> >-      num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
>> >+      if (res_bytes)
>> >+              *res_bytes = bytes;
>> >+      if (res_pages)
>> >+              *res_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
>> >+}
>> >+
>> >+static int bitmap_storage_alloc(struct bitmap_storage *store,
>> >+                              unsigned long chunks, int with_super,
>> >+                              int with_sync_bitmap)
>> >+{
>> >+      int pnum;
>> >+      unsigned long num_pages;
>> >+      unsigned long bytes;
>> >+      unsigned long syncbitmap_num_pages;
>> >+
>> >+      chunks_to_pages(chunks, &bytes, &num_pages, with_super);
>> >+      /* we need two bitmaps: write-intent-bitmap and sync-bitmap,
>> sync-bitmap
>> >+       * locates behind write-intent-bitmap closely. write-intent-bit
>> maps
>> >+       * "this was written recently, a resync might be needed after a
>> crash"
>> >+       * and the sync-bit maps "This has been written since array create,
>> >+       * so the chunk needs to be recovered to any spare".
>> >+       */
>> >+      chunks_to_pages(chunks, NULL, &syncbitmap_num_pages, 0);
>> >+      if (with_sync_bitmap)
>> >+              num_pages += syncbitmap_num_pages;
>> >
>> >       store->filemap = kmalloc(sizeof(struct page *)
>> >                                * num_pages, GFP_KERNEL);
>> >@@ -853,6 +882,41 @@ static void bitmap_file_set_bit(struct bitmap
>> *bitmap, sector_t block)
>> >       set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
>> > }
>> >
>> >+static int syncbitmap_file_test_bit(struct bitmap *bitmap, sector_t
>> block)
>> >+{
>> >+      unsigned long bit;
>> >+      struct page *page;
>> >+      void *kaddr;
>> >+      unsigned long chunk;
>> >+      int res;
>> >+
>> >+      chunk = syncbitmap_offset(bitmap, block) >>
>> bitmap->counts.chunkshift;
>> >+
>> >+      page = filemap_get_page(&bitmap->storage, chunk);
>> >+      if (!page)
>> >+              return 1;
>> >+      bit = file_page_offset(&bitmap->storage, chunk);
>> >+
>> >+      /* set the bit */
>> >+      kaddr = kmap_atomic(page);
>> >+      if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
>> >+              res = test_bit(bit, kaddr);
>> >+      else
>> >+              res = test_bit_le(bit, kaddr);
>> >+      kunmap_atomic(kaddr);
>> >+      pr_debug("test syncbitmap bit %lu page %lu\n", bit, page->index);
>> >+      return res;
>> >+}
>> >+
>> >+/*
>> >+ * syncbitmap_file_set_bit -- set the bit in sync-bitmap, just jump out
>> >+ * the offset of write-intent-bitmap.
>> >+ */
>> >+static void syncbitmap_file_set_bit(struct bitmap *bitmap, sector_t
>> block)
>> >+{
>> >+      bitmap_file_set_bit(bitmap, syncbitmap_offset(bitmap, block));
>> >+}
>> >+
>> > static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
>> > {
>> >       unsigned long bit;
>> >@@ -1038,6 +1102,61 @@ static int bitmap_init_from_disk(struct bitmap
>> *bitmap, sector_t start)
>> >               offset = 0;
>> >       }
>> >
>> >+      if (bitmap->mddev->bitmap_info.sync_bitmap) {
>> >+              for (i = 0; i < chunks; i++) {
>> >+                      int b;
>> >+                      index = file_page_index(&bitmap->storage, i) +
>> >+                              bitmap->syncbitmap_num_pages;
>> >+                      bit = file_page_offset(&bitmap->storage, i);
>> >+                      if (index != oldindex) {
>> >+                              /* this is a new page, read it in */
>> >+                              page = store->filemap[index];
>> >+                              if (file)
>> >+                                      ret = read_page(file, index,
>> bitmap,
>> >+                                                      PAGE_SIZE, page);
>> >+                              else
>> >+                                      ret = read_sb_page(
>> >+                                              bitmap->mddev,
>> >+
>>  bitmap->mddev->bitmap_info.offset,
>> >+                                              page,
>> >+                                              index, PAGE_SIZE);
>> >+                              if (ret)
>> >+                                      goto err;
>> >+
>> >+                              oldindex = index;
>> >+
>> >+                              if (outofdate) {
>> >+                                      /*
>> >+                                       * if bitmap is out of date, dirty
>> the
>> >+                                       * whole page and write it out
>> >+                                       */
>> >+                                      paddr = kmap_atomic(page);
>> >+                                      memset(paddr + offset, 0xff,
>> >+                                                 PAGE_SIZE - offset);
>> >+                                      kunmap_atomic(paddr);
>> >+                                      write_page(bitmap, page, 1);
>> >+
>> >+                                      ret = -EIO;
>> >+                                      if (test_bit(BITMAP_WRITE_ERROR,
>> >+                                                       &bitmap->flags))
>> >+                                              goto err;
>> >+                              }
>> >+                      }
>> >+                      paddr = kmap_atomic(page);
>> >+                      if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
>> >+                              b = test_bit(bit, paddr);
>> >+                      else
>> >+                              b = test_bit_le(bit, paddr);
>> >+                      kunmap_atomic(paddr);
>> >+                      if (b) {
>> >+                              /* if the disk bit is set, set the memory
>> bit */
>> >+                              syncbitmap_file_set_bit(bitmap,
>> (sector_t)i <<
>> >+                                              bitmap->counts.chunkshift);
>> >+                              bit_cnt++;
>> >+                      }
>> >+                      offset = 0;
>> >+              }
>> >+      }
>> >       printk(KERN_INFO "%s: bitmap initialized from disk: "
>> >              "read %lu pages, set %lu of %lu bits\n",
>> >              bmname(bitmap), store->file_pages,
>> >@@ -1303,6 +1422,7 @@ int bitmap_startwrite(struct bitmap *bitmap,
>> sector_t offset, unsigned long sect
>> >                       continue;
>> >               }
>> >
>> >+              syncbitmap_file_set_bit(bitmap, offset);
>> >               switch (*bmc) {
>> >               case 0:
>> >                       bitmap_file_set_bit(bitmap, offset);
>> >@@ -1431,6 +1551,42 @@ int bitmap_start_sync(struct bitmap *bitmap,
>> sector_t offset, sector_t *blocks,
>> > }
>> > EXPORT_SYMBOL(bitmap_start_sync);
>> >
>> >+int __syncbitmap_start_sync(struct bitmap *bitmap, sector_t offset,
>> >+                                              sector_t *blocks)
>> >+{
>> >+      int res;
>> >+      unsigned long csize;
>> >+      if (bitmap == NULL) {
>> >+              *blocks = 1024;
>> >+              return 1;
>> >+      }
>> >+
>> >+      spin_lock_irq(&bitmap->counts.lock);
>> >+      res = syncbitmap_file_test_bit(bitmap, offset);
>> >+      if (res) {
>> >+              csize = ((sector_t)1) << bitmap->counts.chunkshift;
>> >+              *blocks = csize - (offset & (csize - 1));
>> >+      }
>> >+      spin_unlock_irq(&bitmap->counts.lock);
>> >+      return res;
>> >+}
>> >+
>> >+int syncbitmap_start_sync(struct bitmap *bitmap, sector_t offset,
>> >+                                              sector_t *blocks)
>> >+{
>> >+      int rv = 0;
>> >+      sector_t blocks1;
>> >+
>> >+      *blocks = 0;
>> >+      while (*blocks < (PAGE_SIZE>>9)) {
>> >+              rv |= __syncbitmap_start_sync(bitmap, offset, &blocks1);
>> >+              offset += blocks1;
>> >+              *blocks += blocks1;
>> >+      }
>> >+      return rv;
>> >+}
>> >+EXPORT_SYMBOL(syncbitmap_start_sync);
>> >+
>> > void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t
>> *blocks, int aborted)
>> > {
>> >       bitmap_counter_t *bmc;
>> >@@ -1805,6 +1961,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t
>> blocks,
>> >       sector_t old_blocks, new_blocks;
>> >       int chunkshift;
>> >       int ret = 0;
>> >+      unsigned long pnum, old_pnum, num_pages, old_num_pages;
>> >       long pages;
>> >       struct bitmap_page *new_bp;
>> >
>> >@@ -1842,7 +1999,8 @@ int bitmap_resize(struct bitmap *bitmap, sector_t
>> blocks,
>> >       memset(&store, 0, sizeof(store));
>> >       if (bitmap->mddev->bitmap_info.offset ||
>> bitmap->mddev->bitmap_info.file)
>> >               ret = bitmap_storage_alloc(&store, chunks,
>> >-
>> !bitmap->mddev->bitmap_info.external);
>> >+                              !bitmap->mddev->bitmap_info.external,
>> >+                              bitmap->mddev->bitmap_info.sync_bitmap);
>> >       if (ret)
>> >               goto err;
>> >
>> >@@ -1865,6 +2023,31 @@ int bitmap_resize(struct bitmap *bitmap, sector_t
>> blocks,
>> >               memcpy(page_address(store.sb_page),
>> >                      page_address(bitmap->storage.sb_page),
>> >                      sizeof(bitmap_super_t));
>> >+      if (bitmap->mddev->bitmap_info.sync_bitmap) {
>> >+              /* copy old sync-bitmap to new one */
>> >+              chunks_to_pages(chunks, NULL, &pnum,
>> >+
>>  !bitmap->mddev->bitmap_info.external);
>> >+              bitmap->syncbitmap_num_pages = pnum;
>> >+              if (bitmap->storage.filemap) {
>> >+                      chunks_to_pages(bitmap->counts.chunks, NULL,
>> &old_pnum,
>> >+                              !bitmap->mddev->bitmap_info.external);
>> >+                      num_pages = pnum * 2;
>> >+                      old_num_pages = old_pnum * 2;
>> >+                      pnum++;
>> >+                      old_pnum++;
>> >+                      for (; pnum <= num_pages && old_pnum <=
>> old_num_pages;
>> >+                                      pnum++, old_pnum++) {
>> >+                              memcpy(store.filemap[pnum],
>> >+                                      bitmap->storage.filemap[old_pnum],
>> >+                                      PAGE_SIZE);
>> >+                              /* All new sync-bitmap data
>> >+                               * shoule be write out */
>> >+                              set_bit((pnum << 2) + BITMAP_PAGE_DIRTY,
>> >+                                      store.filemap_attr);
>> >+                      }
>> >+              }
>> >+      }
>> >+
>> >       bitmap_file_unmap(&bitmap->storage);
>> >       bitmap->storage = store;
>> >
>> >diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
>> >index df4aeb6..87c4686 100644
>> >--- a/drivers/md/bitmap.h
>> >+++ b/drivers/md/bitmap.h
>> >@@ -226,6 +226,7 @@ struct bitmap {
>> >       wait_queue_head_t behind_wait;
>> >
>> >       struct sysfs_dirent *sysfs_can_clear;
>> >+      unsigned long syncbitmap_num_pages;
>> > };
>> >
>> > /* the bitmap API */
>> >@@ -252,6 +253,10 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t
>> offset,
>> >                       unsigned long sectors, int success, int behind);
>> > int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t
>> *blocks, int degraded);
>> > void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t
>> *blocks, int aborted);
>> >+
>> >+int syncbitmap_start_sync(struct bitmap *bitmap, sector_t offset,
>> >+                                              sector_t *blocks);
>> >+
>> > void bitmap_close_sync(struct bitmap *bitmap);
>> > void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
>> >
>> >diff --git a/drivers/md/md.c b/drivers/md/md.c
>> >index 681d109..fb81a01 100644
>> >--- a/drivers/md/md.c
>> >+++ b/drivers/md/md.c
>> >@@ -1621,6 +1621,7 @@ static int super_1_validate(struct mddev *mddev,
>> struct md_rdev *rdev)
>> >               mddev->events = ev1;
>> >               mddev->bitmap_info.offset = 0;
>> >               mddev->bitmap_info.space = 0;
>> >+              mddev->bitmap_info.sync_bitmap = 0;
>> >               /* Default location for bitmap is 1K after superblock
>> >                * using 3K - total of 4K
>> >                */
>> >@@ -1652,6 +1653,9 @@ static int super_1_validate(struct mddev *mddev,
>> struct md_rdev *rdev)
>> >                                       -mddev->bitmap_info.offset;
>> >               }
>> >
>> >+              if (le32_to_cpu(sb->feature_map) & MD_FEATURE_SYNCBITMAP)
>> >+                      mddev->bitmap_info.sync_bitmap = 1;
>> >+
>> >               if ((le32_to_cpu(sb->feature_map) &
>> MD_FEATURE_RESHAPE_ACTIVE)) {
>> >                       mddev->reshape_position =
>> le64_to_cpu(sb->reshape_position);
>> >                       mddev->delta_disks = le32_to_cpu(sb->delta_disks);
>> >@@ -1762,7 +1766,8 @@ static void super_1_sync(struct mddev *mddev,
>> struct md_rdev *rdev)
>> >
>> >       if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
>> >               sb->bitmap_offset =
>> cpu_to_le32((__u32)mddev->bitmap_info.offset);
>> >-              sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
>> >+              sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET |
>> >+                                      MD_FEATURE_SYNCBITMAP);
>> >       }
>> >
>> >       if (rdev->raid_disk >= 0 &&
>> >diff --git a/drivers/md/md.h b/drivers/md/md.h
>> >index 653f992..1cef001 100644
>> >--- a/drivers/md/md.h
>> >+++ b/drivers/md/md.h
>> >@@ -404,6 +404,7 @@ struct mddev {
>> >               unsigned long           daemon_sleep; /* how many jiffies
>> between updates? */
>> >               unsigned long           max_write_behind; /* write-behind
>> mode */
>> >               int                     external;
>> >+              int                     sync_bitmap;
>> >       } bitmap_info;
>> >
>> >       atomic_t                        max_corr_read_errors; /* max read
>> retries */
>> >diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
>> >index 5595118..ba47ee7 100644
>> >--- a/drivers/md/raid1.c
>> >+++ b/drivers/md/raid1.c
>> >@@ -2396,6 +2396,13 @@ static sector_t sync_request(struct mddev *mddev,
>> sector_t sector_nr, int *skipp
>> >               *skipped = 1;
>> >               return sync_blocks;
>> >       }
>> >+
>> >+      if (conf->fullsync && !syncbitmap_start_sync(mddev->bitmap,
>> >+                              sector_nr, &sync_blocks)) {
>> >+              *skipped = 1;
>> >+              return sync_blocks;
>> >+      }
>> >+
>> >       /*
>> >        * If there is non-resync activity waiting for a turn,
>> >        * and resync is going fast enough,
>> >diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
>> >index 9359828..7528aa8 100644
>> >--- a/drivers/md/raid5.c
>> >+++ b/drivers/md/raid5.c
>> >@@ -4688,6 +4688,13 @@ static inline sector_t sync_request(struct mddev
>> *mddev, sector_t sector_nr, int
>> >               return sync_blocks * STRIPE_SECTORS; /* keep things
>> rounded to whole stripes */
>> >       }
>> >
>> >+      if (conf->fullsync && sync_blocks >= STRIPE_SECTORS &&
>> >+          !syncbitmap_start_sync(mddev->bitmap, sector_nr,
>> &sync_blocks)) {
>> >+              sync_blocks /= STRIPE_SECTORS;
>> >+              *skipped = 1;
>> >+              return sync_blocks * STRIPE_SECTORS;
>> >+      }
>> >+
>> >       bitmap_cond_end_sync(mddev->bitmap, sector_nr);
>> >
>> >       sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
>> >diff --git a/include/uapi/linux/raid/md_p.h
>> b/include/uapi/linux/raid/md_p.h
>> >index fe1a540..7949f61 100644
>> >--- a/include/uapi/linux/raid/md_p.h
>> >+++ b/include/uapi/linux/raid/md_p.h
>> >@@ -291,6 +291,7 @@ struct mdp_superblock_1 {
>> >                                           * backwards anyway.
>> >                                           */
>> > #define       MD_FEATURE_NEW_OFFSET           64 /* new_offset must be
>> honoured */
>> >+#define       MD_FEATURE_SYNCBITMAP           128
>> > #define       MD_FEATURE_ALL                  (MD_FEATURE_BITMAP_OFFSET
>>       \
>> >                                       |MD_FEATURE_RECOVERY_OFFSET     \
>> >                                       |MD_FEATURE_RESHAPE_ACTIVE      \
>> >@@ -298,6 +299,7 @@ struct mdp_superblock_1 {
>> >                                       |MD_FEATURE_REPLACEMENT         \
>> >                                       |MD_FEATURE_RESHAPE_BACKWARDS   \
>> >                                       |MD_FEATURE_NEW_OFFSET          \
>> >+                                      |MD_FEATURE_SYNCBITMAP          \
>> >                                       )
>> >
>> > #endif
>> >--
>> >1.7.1
>> >
>> >--
>> >To unsubscribe from this list: send the line "unsubscribe linux-raid" in
>> >the body of a message to majordomo@vger.kernel.org
>> >More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>
>
>
>-- 
>--
>Best Regard
>Robin Dong
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2013-07-01 11:40 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-06-25  8:51 [PATCH v1] md: add sync-bitmap to only resync WRITTEN data when adding new disk in raid array Robin Dong
2013-06-25  8:51 ` [PATCH v1] mdadm: " Robin Dong
2013-06-26  2:26 ` [PATCH v1] md: " NeilBrown
     [not found] ` <201306260851362261286@gmail.com>
     [not found]   ` <CANsebLG00tpjOEuYzUo=PByT+wu-w8Z77XnFG2W1dHMVgLbUfQ@mail.gmail.com>
2013-07-01 11:40     ` majianpeng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).