linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: gqjiang@suse.com
To: neilb@suse.de
Cc: linux-raid@vger.kernel.org, rgoldwyn@suse.de
Subject: [PATCH 03/10] Create n bitmaps for clustered mode
Date: Fri, 24 Apr 2015 15:30:34 +0800	[thread overview]
Message-ID: <1429860641-5839-4-git-send-email-gqjiang@suse.com> (raw)
In-Reply-To: <1429860641-5839-1-git-send-email-gqjiang@suse.com>

From: Guoqing Jiang <gqjiang@suse.com>

For a clustered MD, create bitmaps equal to number of nodes so
each node has an independent bitmap.

Only the first bitmap is has the bits set so that the first node
that assembles the device also performs the sync.

The bitmaps are aligned to 4k boundaries.

On-disk format:

0                    4k                     8k                    12k
-------------------------------------------------------------------
| idle                | md super            | bm super [0] + bits |
| bm bits[0, contd]   | bm super[1] + bits  | bm bits[1, contd]   |
| bm super[2] + bits  | bm bits [2, contd]  | bm super[3] + bits  |
| bm bits [3, contd]  |                     |                     |

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
 Create.c   |  3 ++-
 bitmap.h   |  7 +++++--
 mdadm.8.in |  7 ++++++-
 mdadm.c    | 17 ++++++++++++++++-
 super1.c   | 59 +++++++++++++++++++++++++++++++++++++++++------------------
 5 files changed, 70 insertions(+), 23 deletions(-)

diff --git a/Create.c b/Create.c
index cd5485b..9663dc4 100644
--- a/Create.c
+++ b/Create.c
@@ -752,7 +752,8 @@ int Create(struct supertype *st, char *mddev,
 #endif
 	}
 
-	if (s->bitmap_file && strcmp(s->bitmap_file, "internal")==0) {
+	if (s->bitmap_file && (strcmp(s->bitmap_file, "internal")==0
+			|| strcmp(s->bitmap_file, "clustered")==0)) {
 		if ((vers%100) < 2) {
 			pr_err("internal bitmaps not supported by this kernel.\n");
 			goto abort_locked;
diff --git a/bitmap.h b/bitmap.h
index c8725a3..adbf0b4 100644
--- a/bitmap.h
+++ b/bitmap.h
@@ -154,8 +154,11 @@ typedef struct bitmap_super_s {
 	__u32 chunksize;    /* 52  the bitmap chunk size in bytes */
 	__u32 daemon_sleep; /* 56  seconds between disk flushes */
 	__u32 write_behind; /* 60  number of outstanding write-behind writes */
-
-	__u8  pad[256 - 64]; /* set to zero */
+	__u32 sectors_reserved; /* 64 number of 512-byte sectors that are
+				 * reserved for the bitmap. */
+	__u32 nodes;        /* 68 the maximum number of nodes in cluster. */
+	__u8 cluster_name[64]; /* 72 cluster name to which this md belongs */
+	__u8  pad[256 - 136]; /* set to zero */
 } bitmap_super_t;
 
 /* notes:
diff --git a/mdadm.8.in b/mdadm.8.in
index a0e8288..c015cbf 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -700,7 +700,12 @@ and so is replicated on all devices.  If the word
 .B "none"
 is given with
 .B \-\-grow
-mode, then any bitmap that is present is removed.
+mode, then any bitmap that is present is removed. If the word
+.B "clustered"
+is given, the array is created for a clustered environment. One bitmap
+is created for each node as defined by the
+.B \-\-nodes
+parameter and are stored internally.
 
 To help catch typing errors, the filename must contain at least one
 slash ('/') if it is a real file (not 'internal' or 'none').
diff --git a/mdadm.c b/mdadm.c
index e4f8568..6963a09 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -1111,6 +1111,15 @@ int main(int argc, char *argv[])
 				s.bitmap_file = optarg;
 				continue;
 			}
+			if (strcmp(optarg, "clustered")== 0) {
+				s.bitmap_file = optarg;
+				/* Set the default number of cluster nodes
+				 * to 4 if not already set by user
+				 */
+				if (c.nodes < 1)
+					c.nodes = 4;
+				continue;
+			}
 			/* probable typo */
 			pr_err("bitmap file must contain a '/', or be 'internal', or 'none'\n"
 				"       not '%s'\n", optarg);
@@ -1404,7 +1413,13 @@ int main(int argc, char *argv[])
 		if (c.delay == 0)
 			c.delay = DEFAULT_BITMAP_DELAY;
 
-		if (!strncmp(s.bitmap_file, "internal", 9) ||
+		if (!strncmp(s.bitmap_file, "clustered", 9)) {
+			if (s.level != 1) {
+				pr_err("--bitmap=clustered is currently supported with RAID mirror only\n");
+				rv = 1;
+				break;
+			}
+		} else if (!strncmp(s.bitmap_file, "internal", 9) ||
 			!strncmp(s.bitmap_file,"none", 4)) {
 			if (c.nodes) {
 				pr_err("--nodes argument is incompatible with --bitmap=%s.\n",
diff --git a/super1.c b/super1.c
index f0508fe..ac1b011 100644
--- a/super1.c
+++ b/super1.c
@@ -2144,6 +2144,10 @@ add_internal_bitmap1(struct supertype *st,
 	bms->daemon_sleep = __cpu_to_le32(delay);
 	bms->sync_size = __cpu_to_le64(size);
 	bms->write_behind = __cpu_to_le32(write_behind);
+	bms->nodes = __cpu_to_le32(st->nodes);
+	if (st->cluster_name)
+		strncpy((char *)bms->cluster_name,
+				st->cluster_name, strlen(st->cluster_name));
 
 	*chunkp = chunk;
 	return 1;
@@ -2177,6 +2181,7 @@ static int write_bitmap1(struct supertype *st, int fd)
 	void *buf;
 	int towrite, n;
 	struct align_fd afd;
+	unsigned int i;
 
 	init_afd(&afd, fd);
 
@@ -2185,27 +2190,45 @@ static int write_bitmap1(struct supertype *st, int fd)
 	if (posix_memalign(&buf, 4096, 4096))
 		return -ENOMEM;
 
-	memset(buf, 0xff, 4096);
-	memcpy(buf, (char *)bms, sizeof(bitmap_super_t));
-
-	towrite = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
-	towrite = (towrite+7) >> 3; /* bits to bytes */
-	towrite += sizeof(bitmap_super_t);
-	towrite = ROUND_UP(towrite, 512);
-	while (towrite > 0) {
-		n = towrite;
-		if (n > 4096)
-			n = 4096;
-		n = awrite(&afd, buf, n);
-		if (n > 0)
-			towrite -= n;
+	/* We use bms->nodes as opposed to st->nodes to
+	 * be compatible with write-after-reads such as
+	 * the GROW operation.
+	 */
+	for (i = 0; i < __le32_to_cpu(bms->nodes); i++) {
+		/* Only the first bitmap should resync
+		 * the whole device
+		 */
+		if (i)
+			memset(buf, 0x00, 4096);
 		else
+			memset(buf, 0xff, 4096);
+		memcpy(buf, (char *)bms, sizeof(bitmap_super_t));
+
+		towrite = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
+		towrite = (towrite+7) >> 3; /* bits to bytes */
+		towrite += sizeof(bitmap_super_t);
+		/* we need the bitmaps to be at 4k boundary */
+		towrite = ROUND_UP(towrite, 4096);
+		while (towrite > 0) {
+			n = towrite;
+			if (n > 4096)
+				n = 4096;
+			n = awrite(&afd, buf, n);
+			if (n > 0)
+				towrite -= n;
+			else
+				break;
+			if (i)
+				memset(buf, 0x00, 4096);
+			else
+				memset(buf, 0xff, 4096);
+		}
+		fsync(fd);
+		if (towrite) {
+			rv = -2;
 			break;
-		memset(buf, 0xff, 4096);
+		}
 	}
-	fsync(fd);
-	if (towrite)
-		rv = -2;
 
 	free(buf);
 	return rv;
-- 
1.7.12.4


  parent reply	other threads:[~2015-04-24  7:30 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-24  7:30 [PATCH 00/10] mdadm tool: add the support for cluster-md gqjiang
2015-04-24  7:30 ` [PATCH 01/10] Add nodes option while creating md gqjiang
2015-04-29  1:30   ` NeilBrown
2015-04-30  2:33     ` Guoqing Jiang
2015-04-24  7:30 ` [PATCH 02/10] home-cluster while creating an array gqjiang
2015-04-24  7:30 ` gqjiang [this message]
2015-04-29  1:36   ` [PATCH 03/10] Create n bitmaps for clustered mode NeilBrown
2015-04-29  2:41     ` Goldwyn Rodrigues
2015-04-30  2:51       ` NeilBrown
2015-04-30 12:44         ` Goldwyn Rodrigues
2015-04-29  1:41   ` NeilBrown
2015-04-30  2:44     ` Guoqing Jiang
2015-04-30  2:53       ` NeilBrown
2015-04-24  7:30 ` [PATCH 04/10] Show all bitmaps while examining bitmap gqjiang
2015-04-29  1:41   ` NeilBrown
2015-04-30  3:17     ` Guoqing Jiang
2015-04-30  4:45       ` NeilBrown
2015-04-24  7:30 ` [PATCH 05/10] Add a new clustered disk gqjiang
2015-04-29  1:45   ` NeilBrown
2015-04-30  3:20     ` Guoqing Jiang
2015-04-24  7:30 ` [PATCH 06/10] Convert a bitmap=none device to clustered gqjiang
2015-04-24  7:30 ` [PATCH 07/10] Skip clustered devices in incremental gqjiang
2015-04-24  7:30 ` [PATCH 08/10] mdadm: add the ability to change cluster name gqjiang
2015-04-29  1:50   ` NeilBrown
2015-04-30  3:22     ` Guoqing Jiang
2015-04-24  7:30 ` [PATCH 09/10] mdadm: change the num of cluster node gqjiang
2015-04-29  1:51   ` NeilBrown
2015-04-30  3:34     ` Guoqing Jiang
2015-04-30  6:47       ` NeilBrown
2015-04-30 10:04         ` Guoqing Jiang
2015-04-24  7:30 ` [PATCH 10/10] Reuse the write_bitmap for update uuid gqjiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1429860641-5839-4-git-send-email-gqjiang@suse.com \
    --to=gqjiang@suse.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=rgoldwyn@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).