From: gqjiang@suse.com
To: neilb@suse.de
Cc: linux-raid@vger.kernel.org, rgoldwyn@suse.de
Subject: [PATCH 05/10] Add a new clustered disk
Date: Fri, 24 Apr 2015 15:30:36 +0800 [thread overview]
Message-ID: <1429860641-5839-6-git-send-email-gqjiang@suse.com> (raw)
In-Reply-To: <1429860641-5839-1-git-send-email-gqjiang@suse.com>
From: Guoqing Jiang <gqjiang@suse.com>
A clustered disk is added by the traditional --add sequence.
However, other nodes need to acknowledge that they can "see"
the device. This is done by --cluster-confirm:
--cluster-confirm Y:/dev/whatever (if disk is found)
or
--cluster-confirm Y:missing (if disk is not found)
The node initiating the --add, has the disk state tagged with
MD_DISK_CLUSTER_ADD and the one confirming tag the disk with
MD_DISK_CANDIDATE.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
Manage.c | 33 +++++++++++++++++++++++++++++----
ReadMe.c | 1 +
md_p.h | 7 +++++++
md_u.h | 1 +
mdadm.8.in | 9 +++++++++
mdadm.c | 4 ++++
mdadm.h | 2 ++
util.c | 11 +++++++++++
8 files changed, 64 insertions(+), 4 deletions(-)
diff --git a/Manage.c b/Manage.c
index d3cfb55..4c3d451 100644
--- a/Manage.c
+++ b/Manage.c
@@ -690,7 +690,8 @@ skip_re_add:
int Manage_add(int fd, int tfd, struct mddev_dev *dv,
struct supertype *tst, mdu_array_info_t *array,
int force, int verbose, char *devname,
- char *update, unsigned long rdev, unsigned long long array_size)
+ char *update, unsigned long rdev, unsigned long long array_size,
+ int raid_slot)
{
unsigned long long ldsize;
struct supertype *dev_st = NULL;
@@ -879,7 +880,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
}
disc.major = major(rdev);
disc.minor = minor(rdev);
- disc.number =j;
+ if (raid_slot < 0)
+ disc.number = j;
+ else
+ disc.number = raid_slot;
disc.state = 0;
if (array->not_persistent==0) {
int dfd;
@@ -920,6 +924,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
}
free(used);
}
+
+ if (array->state & (1 << MD_SB_CLUSTERED)) {
+ if (dv->disposition == 'c')
+ disc.state |= (1 << MD_DISK_CANDIDATE);
+ else
+ disc.state |= (1 << MD_DISK_CLUSTER_ADD);
+ }
+
if (dv->writemostly == 1)
disc.state |= (1 << MD_DISK_WRITEMOSTLY);
if (tst->ss->external) {
@@ -1239,6 +1251,7 @@ int Manage_subdevs(char *devname, int fd,
* variant on 'A'
* 'F' - Another variant of 'A', where the device was faulty
* so must be removed from the array first.
+ * 'c' - confirm the device as found (for clustered environments)
*
* For 'f' and 'r', the device can also be a kernel-internal
* name such as 'sdb'.
@@ -1254,6 +1267,7 @@ int Manage_subdevs(char *devname, int fd,
struct mdinfo info;
int frozen = 0;
int busy = 0;
+ int raid_slot = -1;
if (ioctl(fd, GET_ARRAY_INFO, &array)) {
pr_err("Cannot get array info for %s\n",
@@ -1282,6 +1296,11 @@ int Manage_subdevs(char *devname, int fd,
int rv;
int mj,mn;
+ raid_slot = -1;
+ if (dv->disposition == 'c')
+ parse_cluster_confirm_arg(dv->devname, &dv->devname,
+ &raid_slot);
+
if (strcmp(dv->devname, "failed") == 0 ||
strcmp(dv->devname, "faulty") == 0) {
if (dv->disposition != 'A'
@@ -1307,6 +1326,11 @@ int Manage_subdevs(char *devname, int fd,
if (strcmp(dv->devname, "missing") == 0) {
struct mddev_dev *add_devlist = NULL;
struct mddev_dev **dp;
+ if (dv->disposition == 'c') {
+ rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
+ break;
+ }
+
if (dv->disposition != 'A') {
pr_err("'missing' only meaningful with --re-add\n");
goto abort;
@@ -1399,7 +1423,7 @@ int Manage_subdevs(char *devname, int fd,
else {
int open_err = errno;
if (stat(dv->devname, &stb) != 0) {
- pr_err("Cannot find %s: %s\n",
+ pr_err("%s: %d Cannot find %s: %s\n", __func__, __LINE__,
dv->devname, strerror(errno));
goto abort;
}
@@ -1437,6 +1461,7 @@ int Manage_subdevs(char *devname, int fd,
case 'A':
case 'M': /* --re-add missing */
case 'F': /* --re-add faulty */
+ case 'c': /* --cluster-confirm */
/* add the device */
if (subarray) {
pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
@@ -1470,7 +1495,7 @@ int Manage_subdevs(char *devname, int fd,
}
rv = Manage_add(fd, tfd, dv, tst, &array,
force, verbose, devname, update,
- rdev, array_size);
+ rdev, array_size, raid_slot);
close(tfd);
tfd = -1;
if (rv < 0)
diff --git a/ReadMe.c b/ReadMe.c
index c6286ae..c854cd5 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -169,6 +169,7 @@ struct option long_options[] = {
{"wait", 0, 0, WaitOpt},
{"wait-clean", 0, 0, Waitclean },
{"action", 1, 0, Action },
+ {"cluster-confirm", 0, 0, ClusterConfirm},
/* For Detail/Examine */
{"brief", 0, 0, Brief},
diff --git a/md_p.h b/md_p.h
index c4846ba..e59504f 100644
--- a/md_p.h
+++ b/md_p.h
@@ -78,6 +78,12 @@
#define MD_DISK_ACTIVE 1 /* disk is running but may not be in sync */
#define MD_DISK_SYNC 2 /* disk is in sync with the raid set */
#define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */
+#define MD_DISK_CLUSTER_ADD 4 /* Initiate a disk add across the cluster
+ * For clustered enviroments only.
+ */
+#define MD_DISK_CANDIDATE 5 /* disk is added as spare (local) until confirmed
+ * For clustered enviroments only.
+ */
#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config.
* read requests will only be sent here in
@@ -106,6 +112,7 @@ typedef struct mdp_device_descriptor_s {
#define MD_SB_BLOCK_CONTAINER_RESHAPE 3 /* block container wide reshapes */
#define MD_SB_BLOCK_VOLUME 4 /* block activation of array, other arrays
* in container can be activated */
+#define MD_SB_CLUSTERED 5 /* MD is clustered */
#define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */
typedef struct mdp_superblock_s {
diff --git a/md_u.h b/md_u.h
index be9868a..76068d6 100644
--- a/md_u.h
+++ b/md_u.h
@@ -44,6 +44,7 @@
#define STOP_ARRAY _IO (MD_MAJOR, 0x32)
#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33)
#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34)
+#define CLUSTERED_DISK_NACK _IO (MD_MAJOR, 0x35)
typedef struct mdu_version_s {
int major;
diff --git a/mdadm.8.in b/mdadm.8.in
index c015cbf..6873cc7 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -1405,6 +1405,15 @@ will avoid reading from these devices if possible.
.BR \-\-readwrite
Subsequent devices that are added or re\-added will have the 'write-mostly'
flag cleared.
+.TP
+.BR \-\-cluster\-confirm
+Confirm the existence of the device. This is issued in response to an \-\-add
+request by a node in a cluster. When a node adds a device it sends a message
+to all nodes in the cluster to look for a device with a UUID. This translates
+to a udev notification with the UUID of the device to be added and the slot
+number. The receiving node must acknowledge this message
+with \-\-cluster\-confirm. Valid arguments are <slot>:<devicename> in case
+the device is found or <slot>:missing in case the device is not found.
.P
Each of these options requires that the first device listed is the array
diff --git a/mdadm.c b/mdadm.c
index 6963a09..5b4b3ef 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -196,6 +196,7 @@ int main(int argc, char *argv[])
case 'f':
case Fail:
case ReAdd: /* re-add */
+ case ClusterConfirm:
if (!mode) {
newmode = MANAGE;
shortopt = short_bitmap_options;
@@ -933,6 +934,9 @@ int main(int argc, char *argv[])
* remove the device */
devmode = 'f';
continue;
+ case O(MANAGE, ClusterConfirm):
+ devmode = 'c';
+ continue;
case O(MANAGE,Replace):
/* Mark these devices for replacement */
devmode = 'R';
diff --git a/mdadm.h b/mdadm.h
index f56d9d6..00c726e 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -346,6 +346,7 @@ enum special_options {
Action,
Nodes,
ClusterName,
+ ClusterConfirm,
};
enum prefix_standard {
@@ -1281,6 +1282,7 @@ extern int parse_uuid(char *str, int uuid[4]);
extern int parse_layout_10(char *layout);
extern int parse_layout_faulty(char *layout);
extern long parse_num(char *num);
+extern int parse_cluster_confirm_arg(char *inp, char **devname, int *slot);
extern int check_ext2(int fd, char *name);
extern int check_reiser(int fd, char *name);
extern int check_raid(int fd, char *name);
diff --git a/util.c b/util.c
index ed9a745..1d82fc7 100644
--- a/util.c
+++ b/util.c
@@ -273,6 +273,17 @@ long parse_num(char *num)
}
#endif
+int parse_cluster_confirm_arg(char *input, char **devname, int *slot)
+{
+ char *dev;
+ *slot = strtoul(input, &dev, 10);
+ if (dev[0] == ':')
+ *devname = dev+1;
+ else
+ return -1;
+ return 0;
+}
+
void remove_partitions(int fd)
{
/* remove partitions from this block devices.
--
1.7.12.4
next prev parent reply other threads:[~2015-04-24 7:30 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-04-24 7:30 [PATCH 00/10] mdadm tool: add the support for cluster-md gqjiang
2015-04-24 7:30 ` [PATCH 01/10] Add nodes option while creating md gqjiang
2015-04-29 1:30 ` NeilBrown
2015-04-30 2:33 ` Guoqing Jiang
2015-04-24 7:30 ` [PATCH 02/10] home-cluster while creating an array gqjiang
2015-04-24 7:30 ` [PATCH 03/10] Create n bitmaps for clustered mode gqjiang
2015-04-29 1:36 ` NeilBrown
2015-04-29 2:41 ` Goldwyn Rodrigues
2015-04-30 2:51 ` NeilBrown
2015-04-30 12:44 ` Goldwyn Rodrigues
2015-04-29 1:41 ` NeilBrown
2015-04-30 2:44 ` Guoqing Jiang
2015-04-30 2:53 ` NeilBrown
2015-04-24 7:30 ` [PATCH 04/10] Show all bitmaps while examining bitmap gqjiang
2015-04-29 1:41 ` NeilBrown
2015-04-30 3:17 ` Guoqing Jiang
2015-04-30 4:45 ` NeilBrown
2015-04-24 7:30 ` gqjiang [this message]
2015-04-29 1:45 ` [PATCH 05/10] Add a new clustered disk NeilBrown
2015-04-30 3:20 ` Guoqing Jiang
2015-04-24 7:30 ` [PATCH 06/10] Convert a bitmap=none device to clustered gqjiang
2015-04-24 7:30 ` [PATCH 07/10] Skip clustered devices in incremental gqjiang
2015-04-24 7:30 ` [PATCH 08/10] mdadm: add the ability to change cluster name gqjiang
2015-04-29 1:50 ` NeilBrown
2015-04-30 3:22 ` Guoqing Jiang
2015-04-24 7:30 ` [PATCH 09/10] mdadm: change the num of cluster node gqjiang
2015-04-29 1:51 ` NeilBrown
2015-04-30 3:34 ` Guoqing Jiang
2015-04-30 6:47 ` NeilBrown
2015-04-30 10:04 ` Guoqing Jiang
2015-04-24 7:30 ` [PATCH 10/10] Reuse the write_bitmap for update uuid gqjiang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1429860641-5839-6-git-send-email-gqjiang@suse.com \
--to=gqjiang@suse.com \
--cc=linux-raid@vger.kernel.org \
--cc=neilb@suse.de \
--cc=rgoldwyn@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).