All of lore.kernel.org
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.de>
To: gqjiang@suse.com
Cc: linux-raid@vger.kernel.org, rgoldwyn@suse.de
Subject: Re: [PATCH 05/10] Add a new clustered disk
Date: Wed, 29 Apr 2015 11:45:32 +1000	[thread overview]
Message-ID: <20150429114532.568d58bd@notabene.brown> (raw)
In-Reply-To: <1429860641-5839-6-git-send-email-gqjiang@suse.com>

[-- Attachment #1: Type: text/plain, Size: 9758 bytes --]

On Fri, 24 Apr 2015 15:30:36 +0800 gqjiang@suse.com wrote:

> From: Guoqing Jiang <gqjiang@suse.com>
> 
> A clustered disk is added by the traditional --add sequence.
> However, other nodes need to acknowledge that they can "see"
> the device. This is done by --cluster-confirm:
> 
> --cluster-confirm Y:/dev/whatever (if disk is found)
> or
> --cluster-confirm Y:missing (if disk is not found)
> 
> The node initiating the --add, has the disk state tagged with
> MD_DISK_CLUSTER_ADD and the one confirming tag the disk with
> MD_DISK_CANDIDATE.

You haven't explained 'Y' here.  It looks like it means 'Yes', but it doesn't.


> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
> ---
>  Manage.c   | 33 +++++++++++++++++++++++++++++----
>  ReadMe.c   |  1 +
>  md_p.h     |  7 +++++++
>  md_u.h     |  1 +
>  mdadm.8.in |  9 +++++++++
>  mdadm.c    |  4 ++++
>  mdadm.h    |  2 ++
>  util.c     | 11 +++++++++++
>  8 files changed, 64 insertions(+), 4 deletions(-)
> 
> diff --git a/Manage.c b/Manage.c
> index d3cfb55..4c3d451 100644
> --- a/Manage.c
> +++ b/Manage.c
> @@ -690,7 +690,8 @@ skip_re_add:
>  int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  	       struct supertype *tst, mdu_array_info_t *array,
>  	       int force, int verbose, char *devname,
> -	       char *update, unsigned long rdev, unsigned long long array_size)
> +	       char *update, unsigned long rdev, unsigned long long array_size,
> +	       int raid_slot)
>  {
>  	unsigned long long ldsize;
>  	struct supertype *dev_st = NULL;
> @@ -879,7 +880,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  	}
>  	disc.major = major(rdev);
>  	disc.minor = minor(rdev);
> -	disc.number =j;
> +	if (raid_slot < 0)
> +		disc.number = j;
> +	else
> +		disc.number = raid_slot;
>  	disc.state = 0;
>  	if (array->not_persistent==0) {
>  		int dfd;
> @@ -920,6 +924,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  			}
>  		free(used);
>  	}
> +
> +	if (array->state & (1 << MD_SB_CLUSTERED)) {
> +		if (dv->disposition == 'c')
> +			disc.state |= (1 << MD_DISK_CANDIDATE);
> +		else
> +			disc.state |= (1 << MD_DISK_CLUSTER_ADD);
> +	}
> +
>  	if (dv->writemostly == 1)
>  		disc.state |= (1 << MD_DISK_WRITEMOSTLY);
>  	if (tst->ss->external) {
> @@ -1239,6 +1251,7 @@ int Manage_subdevs(char *devname, int fd,
>  	 *        variant on 'A'
>  	 *  'F' - Another variant of 'A', where the device was faulty
>  	 *        so must be removed from the array first.
> +	 *  'c' - confirm the device as found (for clustered environments)
>  	 *
>  	 * For 'f' and 'r', the device can also be a kernel-internal
>  	 * name such as 'sdb'.
> @@ -1254,6 +1267,7 @@ int Manage_subdevs(char *devname, int fd,
>  	struct mdinfo info;
>  	int frozen = 0;
>  	int busy = 0;
> +	int raid_slot = -1;
>  
>  	if (ioctl(fd, GET_ARRAY_INFO, &array)) {
>  		pr_err("Cannot get array info for %s\n",
> @@ -1282,6 +1296,11 @@ int Manage_subdevs(char *devname, int fd,
>  		int rv;
>  		int mj,mn;
>  
> +		raid_slot = -1;
> +		if (dv->disposition == 'c')
> +			parse_cluster_confirm_arg(dv->devname, &dv->devname,
> +					&raid_slot);
> +
>  		if (strcmp(dv->devname, "failed") == 0 ||
>  		    strcmp(dv->devname, "faulty") == 0) {
>  			if (dv->disposition != 'A'
> @@ -1307,6 +1326,11 @@ int Manage_subdevs(char *devname, int fd,
>  		if (strcmp(dv->devname, "missing") == 0) {
>  			struct mddev_dev *add_devlist = NULL;
>  			struct mddev_dev **dp;
> +			if (dv->disposition == 'c') {
> +				rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
> +				break;
> +			}
> +
>  			if (dv->disposition != 'A') {
>  				pr_err("'missing' only meaningful with --re-add\n");
>  				goto abort;
> @@ -1399,7 +1423,7 @@ int Manage_subdevs(char *devname, int fd,
>  			else {
>  				int open_err = errno;
>  				if (stat(dv->devname, &stb) != 0) {
> -					pr_err("Cannot find %s: %s\n",
> +					pr_err("%s: %d Cannot find %s: %s\n", __func__, __LINE__,
>  					       dv->devname, strerror(errno));
>  					goto abort;
>  				}
> @@ -1437,6 +1461,7 @@ int Manage_subdevs(char *devname, int fd,
>  		case 'A':
>  		case 'M': /* --re-add missing */
>  		case 'F': /* --re-add faulty  */
> +		case 'c': /* --cluster-confirm */
>  			/* add the device */
>  			if (subarray) {
>  				pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
> @@ -1470,7 +1495,7 @@ int Manage_subdevs(char *devname, int fd,
>  			}
>  			rv = Manage_add(fd, tfd, dv, tst, &array,
>  					force, verbose, devname, update,
> -					rdev, array_size);
> +					rdev, array_size, raid_slot);
>  			close(tfd);
>  			tfd = -1;
>  			if (rv < 0)
> diff --git a/ReadMe.c b/ReadMe.c
> index c6286ae..c854cd5 100644
> --- a/ReadMe.c
> +++ b/ReadMe.c
> @@ -169,6 +169,7 @@ struct option long_options[] = {
>      {"wait",	  0, 0,  WaitOpt},
>      {"wait-clean", 0, 0, Waitclean },
>      {"action",    1, 0, Action },
> +    {"cluster-confirm", 0, 0, ClusterConfirm},
>  
>      /* For Detail/Examine */
>      {"brief",	  0, 0, Brief},
> diff --git a/md_p.h b/md_p.h
> index c4846ba..e59504f 100644
> --- a/md_p.h
> +++ b/md_p.h
> @@ -78,6 +78,12 @@
>  #define MD_DISK_ACTIVE		1 /* disk is running but may not be in sync */
>  #define MD_DISK_SYNC		2 /* disk is in sync with the raid set */
>  #define MD_DISK_REMOVED		3 /* disk is in sync with the raid set */
> +#define MD_DISK_CLUSTER_ADD     4 /* Initiate a disk add across the cluster
> +				   * For clustered enviroments only.
> +				   */
> +#define MD_DISK_CANDIDATE	5 /* disk is added as spare (local) until confirmed
> +				   * For clustered enviroments only.
> +				   */
>  
>  #define	MD_DISK_WRITEMOSTLY	9 /* disk is "write-mostly" is RAID1 config.
>  				   * read requests will only be sent here in
> @@ -106,6 +112,7 @@ typedef struct mdp_device_descriptor_s {
>  #define MD_SB_BLOCK_CONTAINER_RESHAPE 3 /* block container wide reshapes */
>  #define MD_SB_BLOCK_VOLUME	4 /* block activation of array, other arrays
>  				   * in container can be activated */
> +#define MD_SB_CLUSTERED		5 /* MD is clustered  */
>  #define	MD_SB_BITMAP_PRESENT	8 /* bitmap may be present nearby */
>  
>  typedef struct mdp_superblock_s {
> diff --git a/md_u.h b/md_u.h
> index be9868a..76068d6 100644
> --- a/md_u.h
> +++ b/md_u.h
> @@ -44,6 +44,7 @@
>  #define STOP_ARRAY		_IO (MD_MAJOR, 0x32)
>  #define STOP_ARRAY_RO		_IO (MD_MAJOR, 0x33)
>  #define RESTART_ARRAY_RW	_IO (MD_MAJOR, 0x34)
> +#define CLUSTERED_DISK_NACK	_IO (MD_MAJOR, 0x35)
>  
>  typedef struct mdu_version_s {
>  	int major;
> diff --git a/mdadm.8.in b/mdadm.8.in
> index c015cbf..6873cc7 100644
> --- a/mdadm.8.in
> +++ b/mdadm.8.in
> @@ -1405,6 +1405,15 @@ will avoid reading from these devices if possible.
>  .BR \-\-readwrite
>  Subsequent devices that are added or re\-added will have the 'write-mostly'
>  flag cleared.
> +.TP
> +.BR \-\-cluster\-confirm
> +Confirm the existence of the device. This is issued in response to an \-\-add
> +request by a node in a cluster. When a node adds a device it sends a message
> +to all nodes in the cluster to look for a device with a UUID. This translates
> +to a udev notification with the UUID of the device to be added and the slot
> +number. The receiving node must acknowledge this message
> +with \-\-cluster\-confirm. Valid arguments are <slot>:<devicename> in case
> +the device is found or <slot>:missing in case the device is not found.
>  
>  .P
>  Each of these options requires that the first device listed is the array
> diff --git a/mdadm.c b/mdadm.c
> index 6963a09..5b4b3ef 100644
> --- a/mdadm.c
> +++ b/mdadm.c
> @@ -196,6 +196,7 @@ int main(int argc, char *argv[])
>  		case 'f':
>  		case Fail:
>  		case ReAdd: /* re-add */
> +		case ClusterConfirm:
>  			if (!mode) {
>  				newmode = MANAGE;
>  				shortopt = short_bitmap_options;
> @@ -933,6 +934,9 @@ int main(int argc, char *argv[])
>  					   * remove the device */
>  			devmode = 'f';
>  			continue;
> +		case O(MANAGE, ClusterConfirm):
> +			devmode = 'c';
> +			continue;
>  		case O(MANAGE,Replace):
>  			/* Mark these devices for replacement */
>  			devmode = 'R';
> diff --git a/mdadm.h b/mdadm.h
> index f56d9d6..00c726e 100644
> --- a/mdadm.h
> +++ b/mdadm.h
> @@ -346,6 +346,7 @@ enum special_options {
>  	Action,
>  	Nodes,
>  	ClusterName,
> +	ClusterConfirm,
>  };
>  
>  enum prefix_standard {
> @@ -1281,6 +1282,7 @@ extern int parse_uuid(char *str, int uuid[4]);
>  extern int parse_layout_10(char *layout);
>  extern int parse_layout_faulty(char *layout);
>  extern long parse_num(char *num);
> +extern int parse_cluster_confirm_arg(char *inp, char **devname, int *slot);
>  extern int check_ext2(int fd, char *name);
>  extern int check_reiser(int fd, char *name);
>  extern int check_raid(int fd, char *name);
> diff --git a/util.c b/util.c
> index ed9a745..1d82fc7 100644
> --- a/util.c
> +++ b/util.c
> @@ -273,6 +273,17 @@ long parse_num(char *num)
>  }
>  #endif
>  
> +int parse_cluster_confirm_arg(char *input, char **devname, int *slot)
> +{
> +	char *dev;
> +	*slot = strtoul(input, &dev, 10);
> +	if (dev[0] == ':')
> +		*devname = dev+1;
> +	else
> +		return -1;
> +	return 0;
> +}

The logic here hurts my brain :-(

 *slot = strtoul(input, &dev, 10);
 if (dev == input || dev[0] != ':')
     return -1;
 *devname = dev+1;
 return 0;

> +
>  void remove_partitions(int fd)
>  {
>  	/* remove partitions from this block devices.

Thanks,
NeilBrown


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 811 bytes --]

  reply	other threads:[~2015-04-29  1:45 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-24  7:30 [PATCH 00/10] mdadm tool: add the support for cluster-md gqjiang
2015-04-24  7:30 ` [PATCH 01/10] Add nodes option while creating md gqjiang
2015-04-29  1:30   ` NeilBrown
2015-04-30  2:33     ` Guoqing Jiang
2015-04-24  7:30 ` [PATCH 02/10] home-cluster while creating an array gqjiang
2015-04-24  7:30 ` [PATCH 03/10] Create n bitmaps for clustered mode gqjiang
2015-04-29  1:36   ` NeilBrown
2015-04-29  2:41     ` Goldwyn Rodrigues
2015-04-30  2:51       ` NeilBrown
2015-04-30 12:44         ` Goldwyn Rodrigues
2015-04-29  1:41   ` NeilBrown
2015-04-30  2:44     ` Guoqing Jiang
2015-04-30  2:53       ` NeilBrown
2015-04-24  7:30 ` [PATCH 04/10] Show all bitmaps while examining bitmap gqjiang
2015-04-29  1:41   ` NeilBrown
2015-04-30  3:17     ` Guoqing Jiang
2015-04-30  4:45       ` NeilBrown
2015-04-24  7:30 ` [PATCH 05/10] Add a new clustered disk gqjiang
2015-04-29  1:45   ` NeilBrown [this message]
2015-04-30  3:20     ` Guoqing Jiang
2015-04-24  7:30 ` [PATCH 06/10] Convert a bitmap=none device to clustered gqjiang
2015-04-24  7:30 ` [PATCH 07/10] Skip clustered devices in incremental gqjiang
2015-04-24  7:30 ` [PATCH 08/10] mdadm: add the ability to change cluster name gqjiang
2015-04-29  1:50   ` NeilBrown
2015-04-30  3:22     ` Guoqing Jiang
2015-04-24  7:30 ` [PATCH 09/10] mdadm: change the num of cluster node gqjiang
2015-04-29  1:51   ` NeilBrown
2015-04-30  3:34     ` Guoqing Jiang
2015-04-30  6:47       ` NeilBrown
2015-04-30 10:04         ` Guoqing Jiang
2015-04-24  7:30 ` [PATCH 10/10] Reuse the write_bitmap for update uuid gqjiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150429114532.568d58bd@notabene.brown \
    --to=neilb@suse.de \
    --cc=gqjiang@suse.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=rgoldwyn@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.