All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0 of 4] Re-integrate a failed secondary mirror leg
@ 2009-12-13  9:18 Malahal Naineni
  2009-12-13  9:18 ` [PATCH 1 of 4] Add more error codes in mirror DSO Malahal Naineni
                   ` (3 more replies)
  0 siblings, 4 replies; 17+ messages in thread
From: Malahal Naineni @ 2009-12-13  9:18 UTC (permalink / raw)
  To: lvm-devel

The following 4 patches try to re-integrate a failed secondary mirror leg.

Patch 1: Re-arranges some code and introduces more error type returns
Patch 2: Tries to re-integrate the failed device indefinitely
Patch 3: Removes some "#ifdef 0' code and fixes timeout setup mechanism.
         Also adds interface to remove a timeout setup
Patch 4: Uses Patch 2 and Patch 3 to attempt only few times at a regular
         interval to resync the failed mirror.

Tested with flakey dm module!



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1 of 4] Add more error codes in mirror DSO
  2009-12-13  9:18 [PATCH 0 of 4] Re-integrate a failed secondary mirror leg Malahal Naineni
@ 2009-12-13  9:18 ` Malahal Naineni
  2009-12-18 16:28   ` Jonathan Brassow
  2009-12-13  9:18 ` [PATCH 2 of 4] Handle transient secondary mirror leg failures Malahal Naineni
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 17+ messages in thread
From: Malahal Naineni @ 2009-12-13  9:18 UTC (permalink / raw)
  To: lvm-devel

The mirror DSO (daemons/dmeventd/plugins/mirror/dmeventd_mirror.c) logs
various device failure codes but only handles one failure, 'ME_FAILURE'.
This patchs adds more error codes to handle multiple types of device
failures.

Signed-off-by: Malahal Naineni <malahal@us.ibm.com>

diff -r a48a5f0eea85 -r a74600c6163e daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
--- a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c	Sun Dec 06 23:04:13 2009 -0800
+++ b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c	Sun Dec 13 01:16:51 2009 -0800
@@ -30,9 +30,17 @@
 /* FIXME Replace most syslogs with log_error() style messages and add complete context. */
 /* FIXME Reformat to 80 char lines. */
 
-#define ME_IGNORE    0
-#define ME_INSYNC    1
-#define ME_FAILURE   2
+/* 
+ * An event may contain more than one error type. The following are bit
+ * flags that indicate each error type.
+ */
+#define ME_IGNORE    			0x01U
+#define ME_INSYNC    			0x02U
+#define ME_READ_FAILURE   		0x04U
+#define ME_SYNC_FAILURE   		0x08U
+#define ME_LOG_FAILURE 			0x10U
+#define ME_SECONDARY_WRITE_FAILURE	0x20U
+#define ME_PRIMARY_WRITE_FAILURE	0x40U
 
 /*
  * register_device() is called first and performs initialisation.
@@ -53,45 +61,68 @@ static void *_lvm_handle = NULL;
  */
 static pthread_mutex_t _event_mutex = PTHREAD_MUTEX_INITIALIZER;
 
-static int _process_status_code(const char status_code, const char *dev_name,
-				const char *dev_type, int r)
+static int _process_status_code(const char *mirror, const char *device,
+			        const char status_code, int dev_index)
 {
-	/*
-	 *    A => Alive - No failures
-	 *    D => Dead - A write failure occurred leaving mirror out-of-sync
-	 *    F => Flush failed.
-	 *    S => Sync - A sychronization failure occurred, mirror out-of-sync
-	 *    R => Read - A read failure occurred, mirror data unaffected
-	 *    U => Unclassified failure (bug)
-	 */ 
-	if (status_code == 'F') {
-		syslog(LOG_ERR, "%s device %s flush failed.\n",
-		       dev_type, dev_name);
-		r = ME_FAILURE;
-	} else if (status_code == 'S')
-		syslog(LOG_ERR, "%s device %s sync failed.\n",
-		       dev_type, dev_name);
-	else if (status_code == 'R')
-		syslog(LOG_ERR, "%s device %s read failed.\n",
-		       dev_type, dev_name);
-	else if (status_code != 'A') {
-		syslog(LOG_ERR, "%s device %s has failed (%c).\n",
-		       dev_type, dev_name, status_code);
-		r = ME_FAILURE;
+	int r;
+	const char *error_string;
+	const char *dev_type = dev_index ? "Secondary" : "Primary";
+
+	switch (status_code) {
+	default:
+	case 'U': /* Unclassified failure (bug) */
+	case 'D': /* Write failure occurred leaving mirror out-of-sync */
+	case 'F': /* Flush failure, handled as write failure */
+		if (status_code == 'D')
+			error_string = "write";
+		else if (status_code == 'F')
+			error_string = "flush";
+		else if (status_code == 'U')
+			error_string = "unclassified";
+		else {
+			syslog(LOG_ERR, "Unknown device status code (%c).\n",
+					status_code);
+			error_string = "unknown";
+		}
+
+		syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had a %s "
+				"failure.\n", mirror, dev_type, device,
+				error_string);
+
+		r = dev_index? ME_SECONDARY_WRITE_FAILURE :
+			ME_PRIMARY_WRITE_FAILURE;
+		break;
+
+	case 'S': /* Sychronization failure occurred, mirror out-of-sync */
+		syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had "
+				"sync failure.\n", mirror, dev_type, device);
+		r = ME_SYNC_FAILURE;
+		break;
+
+	case 'R': /* Read failure occurred, mirror data unaffected */
+		syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had a "
+				"read failure.\n", mirror, dev_type, device);
+		r = ME_READ_FAILURE;
+		break;
+
+	case 'A': /* Active, a good status */
+		r = 0;
+		break;
 	}
 
 	return r;
 }
 
-static int _get_mirror_event(char *params)
+static int _get_mirror_event(const char *mirror, char *params)
 {
-	int i, r = ME_INSYNC;
+	int i;
 	char **args = NULL;
 	char *dev_status_str;
 	char *log_status_str;
 	char *sync_str;
 	char *p = NULL;
 	int log_argc, num_devs;
+	int retval = 0;
 
 	/*
 	 * dm core parms:	     0 409600 mirror
@@ -122,23 +153,25 @@ static int _get_mirror_event(char *param
 
 	/* Check for bad mirror devices */
 	for (i = 0; i < num_devs; i++)
-		r = _process_status_code(dev_status_str[i], args[i],
-			i ? "Secondary mirror" : "Primary mirror", r);
+		retval |= _process_status_code(mirror, args[i],
+					       dev_status_str[i], i);
 
 	/* Check for bad disk log device */
-	if (log_argc > 1)
-		r = _process_status_code(log_status_str[0],
-					 args[2 + num_devs + log_argc],
-					 "Log", r);
+	if (log_argc > 1 && log_status_str[0] == 'D') {
+		syslog(LOG_ERR, "Mirror device: %s, log device: %s failed.\n",
+				mirror, args[2 + num_devs + log_argc]);
+		retval = retval | ME_LOG_FAILURE;
+	}
 
-	if (r == ME_FAILURE)
+	if (retval)	/* A failure occurred */
 		goto out;
 
+	retval = ME_INSYNC;	/* assume INSYNC event */
 	p = strstr(sync_str, "/");
 	if (p) {
 		p[0] = '\0';
 		if (strcmp(sync_str, p+1))
-			r = ME_IGNORE;
+			retval = ME_IGNORE;
 		p[0] = '/';
 	} else
 		goto out_parse;
@@ -146,7 +179,7 @@ static int _get_mirror_event(char *param
 out:
 	if (args)
 		dm_free(args);
-	return r;
+	return retval;
 	
 out_parse:
 	if (args)
@@ -212,6 +245,7 @@ void process_event(struct dm_task *dmt,
 	char *target_type = NULL;
 	char *params;
 	const char *device = dm_task_get_name(dmt);
+	int error;
 
 	if (pthread_mutex_trylock(&_event_mutex)) {
 		syslog(LOG_NOTICE, "Another thread is handling an event.  Waiting...");
@@ -231,17 +265,11 @@ void process_event(struct dm_task *dmt,
 			continue;
 		}
 
-		switch(_get_mirror_event(params)) {
-		case ME_INSYNC:
-			/* FIXME: all we really know is that this
-			   _part_ of the device is in sync
-			   Also, this is not an error
-			*/
-			syslog(LOG_NOTICE, "%s is now in-sync\n", device);
-			break;
-		case ME_FAILURE:
-			syslog(LOG_ERR, "Device failure in %s\n", device);
-			if (_remove_failed_devices(device))
+		error = _get_mirror_event(device, params);
+		if (error & ME_LOG_FAILURE ||
+		    error & ME_PRIMARY_WRITE_FAILURE ||
+		    error & ME_SECONDARY_WRITE_FAILURE) {
+			if (_remove_failed_devices(device)) {
 				/* FIXME Why are all the error return codes unused? Get rid of them? */
 				syslog(LOG_ERR, "Failed to remove faulty devices in %s\n",
 				       device);
@@ -250,13 +278,18 @@ void process_event(struct dm_task *dmt,
 				syslog(LOG_NOTICE, "%s is now a linear device.\n",
 					device);
 			*/
-			break;
-		case ME_IGNORE:
-			break;
-		default:
-			/* FIXME Provide value then! */
-			syslog(LOG_INFO, "Unknown event received.\n");
-		}
+			}
+		} else if (error & ME_INSYNC) {
+			/* FIXME: all we really know is that this
+			   _part_ of the device is in sync
+			   Also, this is not an error
+			*/
+			syslog(LOG_NOTICE, "%s is now in-sync\n", device);
+		} else if (error & ME_READ_FAILURE ||
+			   error & ME_SYNC_FAILURE) {
+			/* Ignore these for now */
+		} else
+			syslog(LOG_INFO, "Unknown event:%u received.\n", error);
 	} while (next);
 
 	pthread_mutex_unlock(&_event_mutex);



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 2 of 4] Handle transient secondary mirror leg failures
  2009-12-13  9:18 [PATCH 0 of 4] Re-integrate a failed secondary mirror leg Malahal Naineni
  2009-12-13  9:18 ` [PATCH 1 of 4] Add more error codes in mirror DSO Malahal Naineni
@ 2009-12-13  9:18 ` Malahal Naineni
  2009-12-18 17:10   ` Jonathan Brassow
  2009-12-13  9:18 ` [PATCH 3 of 4] Add dm_event_set_timeout/dm_event_unset_timeout interface Malahal Naineni
  2009-12-13  9:18 ` [PATCH 4 of 4] Attempt to resync a failed secondary leg few times before giving up Malahal Naineni
  3 siblings, 1 reply; 17+ messages in thread
From: Malahal Naineni @ 2009-12-13  9:18 UTC (permalink / raw)
  To: lvm-devel

A new mirror_device_fault_policy, "retry", is added to handle transient
device failures. When this policy is selected, the mirror DSO will try
to resync the mirror upon a secondary leg failure. This will be tried
and tried until the mirror goes to in_sync state. Later patches make this a
configurable number spaced at some configurable timeout.

The patch uses dmsetup suspend and resume commands to attempt a resync.
It uses "lvm dumpconfig" to find out the mirror_device_fault_policy.

Signed-off-by: Malahal Naineni (malahal at us.ibm.com)

diff -r a74600c6163e -r 1e369d480df0 daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
--- a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c	Sun Dec 13 01:16:51 2009 -0800
+++ b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c	Sun Dec 13 01:17:52 2009 -0800
@@ -14,6 +14,7 @@
 
 #include "lvm2cmd.h"
 #include "errors.h"
+#include "defaults.h"
 
 #include <libdevmapper.h>
 #include <libdevmapper-event.h>
@@ -24,6 +25,7 @@
 #include <stdlib.h>
 #include <pthread.h>
 #include <unistd.h>
+#include <ctype.h>
 
 #include <syslog.h> /* FIXME Replace syslog with multilog */
 /* FIXME Missing openlog? */
@@ -56,6 +58,109 @@ static int _register_count = 0;
 static struct dm_pool *_mem_pool = NULL;
 static void *_lvm_handle = NULL;
 
+enum fault_policy {
+	FAULT_POLICY_INVALID,
+	FAULT_POLICY_REMOVE,
+	FAULT_POLICY_ALLOCATE,
+	FAULT_POLICY_ALLOCATE_ANYWHERE,
+	FAULT_POLICY_RETRY,
+};
+
+struct mirror_device_info {
+	enum fault_policy fault_policy;
+};
+
+#define CMD_SIZE 256	/* FIXME Use system restriction */
+#define LINE_SIZE 1024	/* FIXME Use system restriction */
+static int fill_config_str(char buf[], int bufsize, const char *path)
+{
+	char cmd_str[CMD_SIZE];
+	char data[LINE_SIZE], *ptr;
+	int ret = 0;
+	FILE *fp;
+
+	snprintf(cmd_str, sizeof(cmd_str), "lvm dumpconfig %s", path);
+	if ((fp = popen(cmd_str, "r")) == NULL) {
+		syslog(LOG_ERR, "fopen() failed.\n");
+		goto out;
+	}
+
+	if ((ptr = fgets(data, sizeof(data), fp)) == NULL) {
+		syslog(LOG_ERR, "fgets() failed.\n");
+		pclose(fp);
+		goto out;
+	}
+
+	/* Remove white space or quotes at the end */
+	for (ptr = &data[strlen(data)-1]; ptr >= data; --ptr)
+		if (isspace(*ptr) || *ptr == '\"' || *ptr == '\'')
+			*ptr = '\0';
+		else
+			break;
+	ptr = strchr(data, '=');
+	if (ptr) {
+		ptr++; /* After the '=' sign */
+		/* skip quotes or white space */
+		if (isspace(*ptr) || *ptr == '\"' || *ptr == '\'')
+			ptr++;
+		strncpy(buf, ptr, bufsize);
+		buf[bufsize-1] = '\0';
+		ret = 1; /* buf is valid */
+	}
+	if (pclose(fp) != 0)
+		syslog(LOG_ERR, "pclose() failed.\n");
+
+out:
+	return ret;
+}
+
+static enum fault_policy fault_policy_str2enum(const char *str)
+{
+	enum fault_policy ret = FAULT_POLICY_INVALID;
+
+	/* At most the loop is ran twice */
+	while (ret == FAULT_POLICY_INVALID) {
+		if (!strcmp(str, "remove"))
+			ret = FAULT_POLICY_REMOVE;
+		else if (!strcmp(str, "allocate"))
+			ret = FAULT_POLICY_ALLOCATE;
+		else if (!strcmp(str, "allocate_anywhere"))
+			ret = FAULT_POLICY_ALLOCATE_ANYWHERE;
+		else if (!strcmp(str, "retry"))
+			ret = FAULT_POLICY_RETRY;
+		else {
+			syslog(LOG_ERR, "Bad activation/"
+					"mirror_device_fault_policy: %s\n",
+					str);
+			str = DEFAULT_MIRROR_DEV_FAULT_POLICY;
+		}
+	}
+
+	return ret;
+}
+
+
+static enum fault_policy get_mirror_fault_policy()
+{
+	enum fault_policy ret;
+	char policy[LINE_SIZE];
+	const char *ptr;
+
+	ret = fill_config_str(policy, sizeof(policy),
+			"activation/mirror_device_fault_policy");
+	if (ret) {
+		if (!strcmp(policy, ""))
+			ptr = DEFAULT_MIRROR_DEV_FAULT_POLICY;
+		else
+			ptr = policy;
+	} else {
+		ptr = DEFAULT_MIRROR_DEV_FAULT_POLICY;
+	}
+	ret = fault_policy_str2enum(ptr);
+
+	return ret;
+}
+
 /*
  * Currently only one event can be processed at a time.
  */
@@ -200,10 +305,52 @@ static void _temporary_log_fn(int level,
 		syslog(LOG_DEBUG, "%s", format);
 }
 
+
+static int retry_failed_devices(const char *device)
+{
+	int r;
+	char cmd_str[CMD_SIZE];
+	char *vg = NULL, *lv = NULL, *layer = NULL;
+
+	if (strlen(device) > 200)  /* FIXME Use real restriction */
+		/* FIXME These return code distinctions are not used so
+		 * remove them! */
+		return -ENAMETOOLONG;
+
+	if (!dm_split_lvm_name(_mem_pool, device, &vg, &lv, &layer)) {
+		syslog(LOG_ERR, "Unable to determine VG name from %s",
+		       device);
+		/* FIXME Replace with generic error return - reason for
+		 * failure has already got logged */
+		return -ENOMEM;
+	}
+
+	/* FIXME: should be running an LVM command that is pinned.
+	 * dmsetup command may not be pinned in memory all the time.
+	 * "lvchange --refresh vg/lv" only works if there are no device
+	 * failures while it is running. Otherwise, the failed device
+	 * is replaced with "error" target which is not what we want.
+	 */
+	snprintf(cmd_str, CMD_SIZE, "dmsetup suspend --noflush %s-%s", vg, lv);
+	syslog(LOG_NOTICE, "Running command: %s", cmd_str);
+	r = system(cmd_str);
+
+	snprintf(cmd_str, CMD_SIZE, "dmsetup table %s-%s | dmsetup load "
+			"%s-%s", vg, lv, vg, lv);
+	syslog(LOG_NOTICE, "Running command: %s", cmd_str);
+	r |= system(cmd_str);
+
+	snprintf(cmd_str, CMD_SIZE, "dmsetup resume %s-%s", vg, lv);
+	syslog(LOG_NOTICE, "Running command: %s", cmd_str);
+	r |= system(cmd_str);
+
+	dm_pool_empty(_mem_pool);  /* FIXME: not safe with multiple threads */
+	return r;
+}
+
 static int _remove_failed_devices(const char *device)
 {
 	int r;
-#define CMD_SIZE 256	/* FIXME Use system restriction */
 	char cmd_str[CMD_SIZE];
 	char *vg = NULL, *lv = NULL, *layer = NULL;
 
@@ -238,7 +385,7 @@ static int _remove_failed_devices(const 
 
 void process_event(struct dm_task *dmt,
 		   enum dm_event_mask event __attribute((unused)),
-		   void **unused __attribute((unused)))
+		   void **private)
 {
 	void *next = NULL;
 	uint64_t start, length;
@@ -246,6 +393,7 @@ void process_event(struct dm_task *dmt,
 	char *params;
 	const char *device = dm_task_get_name(dmt);
 	int error;
+	struct mirror_device_info *mirror_info = *private;
 
 	if (pthread_mutex_trylock(&_event_mutex)) {
 		syslog(LOG_NOTICE, "Another thread is handling an event.  Waiting...");
@@ -268,8 +416,17 @@ void process_event(struct dm_task *dmt,
 		error = _get_mirror_event(device, params);
 		if (error & ME_LOG_FAILURE ||
 		    error & ME_PRIMARY_WRITE_FAILURE ||
-		    error & ME_SECONDARY_WRITE_FAILURE) {
-			if (_remove_failed_devices(device)) {
+		    error & ME_SECONDARY_WRITE_FAILURE ||
+		    error & ME_SYNC_FAILURE) {
+ 			if (mirror_info->fault_policy == FAULT_POLICY_RETRY &&
+					(error & ME_SECONDARY_WRITE_FAILURE ||
+					 error & ME_SYNC_FAILURE)) {
+ 				syslog(LOG_ERR, "Retrying the failed mirror "
+ 						"device.\n");
+ 				if (retry_failed_devices(device))
+ 					syslog(LOG_ERR, "Failed to reload the "
+ 							"mirror: %s\n", device);
+ 			} else if (_remove_failed_devices(device)) {
 				/* FIXME Why are all the error return codes unused? Get rid of them? */
 				syslog(LOG_ERR, "Failed to remove faulty devices in %s\n",
 				       device);
@@ -285,9 +442,8 @@ void process_event(struct dm_task *dmt,
 			   Also, this is not an error
 			*/
 			syslog(LOG_NOTICE, "%s is now in-sync\n", device);
-		} else if (error & ME_READ_FAILURE ||
-			   error & ME_SYNC_FAILURE) {
-			/* Ignore these for now */
+		} else if (error & ME_READ_FAILURE) {
+			/* Ignore it for now */
 		} else
 			syslog(LOG_INFO, "Unknown event:%u received.\n", error);
 	} while (next);
@@ -299,9 +455,10 @@ int register_device(const char *device,
 		    const char *uuid __attribute((unused)),
 		    int major __attribute((unused)),
 		    int minor __attribute((unused)),
-		    void **unused __attribute((unused)))
+		    void **private)
 {
 	int r = 0;
+	struct mirror_device_info *mirror_info;
 
 	pthread_mutex_lock(&_register_mutex);
 
@@ -312,9 +469,19 @@ int register_device(const char *device,
 	if (!_mem_pool && !(_mem_pool = dm_pool_create("mirror_dso", 1024)))
 		goto out;
 
+	mirror_info = dm_malloc(sizeof(struct mirror_device_info));
+	if (!mirror_info) {
+		dm_pool_destroy(_mem_pool);
+		_mem_pool = NULL;
+		goto out;
+	}
+	mirror_info->fault_policy = get_mirror_fault_policy();
+	*private = mirror_info;
+
 	if (!_lvm_handle) {
 		lvm2_log_fn(_temporary_log_fn);
 		if (!(_lvm_handle = lvm2_init())) {
+			dm_free(mirror_info);
 			dm_pool_destroy(_mem_pool);
 			_mem_pool = NULL;
 			goto out;
@@ -339,8 +506,11 @@ int unregister_device(const char *device
 		      const char *uuid __attribute((unused)),
 		      int major __attribute((unused)),
 		      int minor __attribute((unused)),
-		      void **unused __attribute((unused)))
+		      void **private)
 {
+	struct mirror_device_info *mirror_info = *private;
+
+	dm_free(mirror_info);
 	pthread_mutex_lock(&_register_mutex);
 
 	syslog(LOG_INFO, "No longer monitoring mirror device %s for events\n",
diff -r a74600c6163e -r 1e369d480df0 doc/example.conf
--- a/doc/example.conf	Sun Dec 13 01:16:51 2009 -0800
+++ b/doc/example.conf	Sun Dec 13 01:17:52 2009 -0800
@@ -403,6 +403,9 @@ activation {
     #            since it would break the redundant nature of the mirror. This
     #            policy acts like "remove" if no suitable device and space can
     #            be allocated for the replacement.
+    #
+    # "retry" - Try to re-integrate the failed mirror leg assuming that the
+    #           failure is transient. Not implemented yet, so don't use it.
 
     mirror_log_fault_policy = "allocate"
     mirror_device_fault_policy = "remove"
diff -r a74600c6163e -r 1e369d480df0 lib/metadata/mirror.c
--- a/lib/metadata/mirror.c	Sun Dec 13 01:16:51 2009 -0800
+++ b/lib/metadata/mirror.c	Sun Dec 13 01:17:52 2009 -0800
@@ -37,6 +37,7 @@
 #define MIRROR_REMOVE		 0
 #define MIRROR_ALLOCATE		 1
 #define MIRROR_ALLOCATE_ANYWHERE 2
+#define MIRROR_RETRY	 	 3
 
 /*
  * Returns true if the lv is temporary mirror layer for resync
@@ -787,6 +788,8 @@ static int get_mirror_fault_policy(struc
 		return MIRROR_ALLOCATE;
 	else if (!strcmp(policy, "allocate_anywhere"))
 		return MIRROR_ALLOCATE_ANYWHERE;
+	else if (!strcmp(policy, "retry"))
+		return MIRROR_RETRY;
 
 	if (log_policy)
 		log_error("Bad activation/mirror_log_fault_policy");



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 3 of 4] Add dm_event_set_timeout/dm_event_unset_timeout interface
  2009-12-13  9:18 [PATCH 0 of 4] Re-integrate a failed secondary mirror leg Malahal Naineni
  2009-12-13  9:18 ` [PATCH 1 of 4] Add more error codes in mirror DSO Malahal Naineni
  2009-12-13  9:18 ` [PATCH 2 of 4] Handle transient secondary mirror leg failures Malahal Naineni
@ 2009-12-13  9:18 ` Malahal Naineni
  2009-12-22  2:12   ` malahal
  2009-12-13  9:18 ` [PATCH 4 of 4] Attempt to resync a failed secondary leg few times before giving up Malahal Naineni
  3 siblings, 1 reply; 17+ messages in thread
From: Malahal Naineni @ 2009-12-13  9:18 UTC (permalink / raw)
  To: lvm-devel

Will be used in mirror DSO to handle transient device failures.

Signed-off-by: Malahal Naineni <malahal@us.ibm.com>

diff -r 1e369d480df0 -r 1c364c686b5e daemons/dmeventd/.exported_symbols
--- a/daemons/dmeventd/.exported_symbols	Sun Dec 13 01:17:52 2009 -0800
+++ b/daemons/dmeventd/.exported_symbols	Sun Dec 13 01:17:57 2009 -0800
@@ -17,3 +17,5 @@ dm_event_unregister_handler
 dm_event_get_registered_device
 dm_event_handler_set_timeout
 dm_event_handler_get_timeout
+dm_event_set_timeout
+dm_event_unset_timeout
diff -r 1e369d480df0 -r 1c364c686b5e daemons/dmeventd/dmeventd.c
--- a/daemons/dmeventd/dmeventd.c	Sun Dec 13 01:17:52 2009 -0800
+++ b/daemons/dmeventd/dmeventd.c	Sun Dec 13 01:17:57 2009 -0800
@@ -1164,13 +1164,25 @@ static int _get_next_registered_device(s
 static int _set_timeout(struct message_data *message_data)
 {
 	struct thread_status *thread;
+	int ret = 0;
 
 	_lock_mutex();
-	if ((thread = _lookup_thread_status(message_data)))
-		thread->timeout = message_data->timeout.secs;
+	if ((thread = _lookup_thread_status(message_data))) {
+		/* INT_MAX timeout is used to unregister timeout,
+		 * register timeout for any other value. */
+		thread->events &= ~DM_EVENT_TIMEOUT;
+		_unregister_for_timeout(thread);
+		if (message_data->timeout.secs == INT_MAX) {
+			thread->timeout = 0;
+		} else {
+			thread->events |= DM_EVENT_TIMEOUT;
+			thread->timeout = message_data->timeout.secs;
+			ret = _register_for_timeout(thread);
+		}
+	}
 	_unlock_mutex();
 
-	return thread ? 0 : -ENODEV;
+	return (thread && ret == 0) ? 0 : -ENODEV;
 }
 
 static int _get_timeout(struct message_data *message_data)
diff -r 1e369d480df0 -r 1c364c686b5e daemons/dmeventd/libdevmapper-event.c
--- a/daemons/dmeventd/libdevmapper-event.c	Sun Dec 13 01:17:52 2009 -0800
+++ b/daemons/dmeventd/libdevmapper-event.c	Sun Dec 13 01:17:57 2009 -0800
@@ -770,27 +770,90 @@ int dm_event_get_registered_device(struc
 	return ret;
 }
 
-#if 0				/* left out for now */
+/* Get uuid of a device */
+static struct dm_task *_get_device_info_by_name(const char *dev_name)
+{
+	struct dm_task *dmt;
+	struct dm_info info;
+
+	if (!(dmt = dm_task_create(DM_DEVICE_INFO))) {
+		log_error("_get_device_info: dm_task creation for info failed");
+		return NULL;
+	}
+
+	dm_task_set_name(dmt, dev_name);
+
+	/* FIXME Add name or uuid or devno to messages */
+	if (!dm_task_run(dmt)) {
+		log_error("_get_device_info: dm_task_run() failed");
+		goto failed;
+	}
+
+	if (!dm_task_get_info(dmt, &info)) {
+		log_error("_get_device_info: failed to get info for device");
+		goto failed;
+	}
+
+	if (!info.exists) {
+		log_error("_get_device_info: device not found");
+		goto failed;
+	}
+
+	return dmt;
+
+failed:
+	dm_task_destroy(dmt);
+	return NULL;
+}
+
+int dm_event_unset_timeout(const char *dev_name)
+{
+	/* timeout of INT_MAX is used as no timeout that removes the
+	 * timer.  INT_MAX is chosen instead of -1 as atoi() is used in
+	 * dmeventd.c */
+	return dm_event_set_timeout(dev_name, INT_MAX);
+}
+
+int dm_event_set_timeout(const char *dev_name, uint32_t timeout)
+{
+	int ret = 1, err;
+	const char *uuid;
+	struct dm_task *dmt;
+	struct dm_event_daemon_message msg = { 0, 0, NULL };
+
+	if (!(dmt = _get_device_info_by_name(dev_name))) {
+		stack;
+		return 0;
+	}
+
+	uuid = dm_task_get_uuid(dmt);
+
+	if ((err = _do_event(DM_EVENT_CMD_SET_TIMEOUT, &msg,
+			     NULL, uuid, 0, timeout)) < 0) {
+		log_error("%s: event set timeout failed: %s",
+			  dm_task_get_name(dmt),
+			  msg.data ? msg.data : strerror(-err));
+		ret = 0;
+	}
+
+	if (msg.data)
+		dm_free(msg.data);
+
+	dm_task_destroy(dmt);
+
+	return ret;
+}
+
+#if 0
 
 static char *_skip_string(char *src, const int delimiter)
 {
-	src = srtchr(src, delimiter);
+	src = strchr(src, delimiter);
 	if (src && *(src + 1))
 		return src + 1;
 	return NULL;
 }
 
-int dm_event_set_timeout(const char *device_path, uint32_t timeout)
-{
-	struct dm_event_daemon_message msg = { 0, 0, NULL };
-
-	if (!device_exists(device_path))
-		return -ENODEV;
-
-	return _do_event(DM_EVENT_CMD_SET_TIMEOUT, &msg,
-			 NULL, device_path, 0, timeout);
-}
-
 int dm_event_get_timeout(const char *device_path, uint32_t *timeout)
 {
 	int ret;
diff -r 1e369d480df0 -r 1c364c686b5e daemons/dmeventd/libdevmapper-event.h
--- a/daemons/dmeventd/libdevmapper-event.h	Sun Dec 13 01:17:52 2009 -0800
+++ b/daemons/dmeventd/libdevmapper-event.h	Sun Dec 13 01:17:57 2009 -0800
@@ -102,5 +102,7 @@ void process_event(struct dm_task *dmt, 
 int register_device(const char *device_name, const char *uuid, int major, int minor, void **user);
 int unregister_device(const char *device_name, const char *uuid, int major,
 		      int minor, void **user);
+int dm_event_set_timeout(const char *dev_name, uint32_t timeout);
+int dm_event_unset_timeout(const char *dev_name);
 
 #endif



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 4 of 4] Attempt to resync a failed secondary leg few times before giving up
  2009-12-13  9:18 [PATCH 0 of 4] Re-integrate a failed secondary mirror leg Malahal Naineni
                   ` (2 preceding siblings ...)
  2009-12-13  9:18 ` [PATCH 3 of 4] Add dm_event_set_timeout/dm_event_unset_timeout interface Malahal Naineni
@ 2009-12-13  9:18 ` Malahal Naineni
  3 siblings, 0 replies; 17+ messages in thread
From: Malahal Naineni @ 2009-12-13  9:18 UTC (permalink / raw)
  To: lvm-devel

This patch adds the capability to attempt resync of a failed mirror
device at a given timeout interval and given number of attempts.

Signed-off-by: Malahal Naineni (malahal at us.ibm.com)

diff -r 1c364c686b5e -r bebd880d7476 daemons/dmeventd/plugins/mirror/Makefile.in
--- a/daemons/dmeventd/plugins/mirror/Makefile.in	Sun Dec 13 01:17:57 2009 -0800
+++ b/daemons/dmeventd/plugins/mirror/Makefile.in	Sun Dec 13 01:17:57 2009 -0800
@@ -32,7 +32,7 @@ LIB_VERSION = $(LIB_VERSION_LVM)
 
 include $(top_builddir)/make.tmpl
 
-LIBS += -ldevmapper @LIB_PTHREAD@ @LVM2CMD_LIB@
+LIBS += -ldevmapper -ldevmapper-event @LIB_PTHREAD@ @LVM2CMD_LIB@
 
 install_lvm2: libdevmapper-event-lvm2mirror.$(LIB_SUFFIX)
 	$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) $< \
diff -r 1c364c686b5e -r bebd880d7476 daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
--- a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c	Sun Dec 13 01:17:57 2009 -0800
+++ b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c	Sun Dec 13 01:17:57 2009 -0800
@@ -68,6 +68,9 @@ enum fault_policy {
 
 struct mirror_device_info {
 	enum fault_policy fault_policy;
+	int retry_total;	/* number of retries before giving up */
+	int retry_current;	/* number of retries already tried */
+	int retry_timeout;	/* timeout between retry attepts, in seconds */
 };
 
 #define CMD_SIZE 256	/* FIXME Use system restriction */
@@ -161,6 +164,16 @@ static enum fault_policy get_mirror_faul
 	return ret;
 }
 
+static int get_mirror_retry_num()
+{
+	return 10; /* FIXME: make it configurable */
+}
+
+static int get_mirror_retry_timeout()
+{
+	return 30; /* 30 seconds. FIXME: make it configurable */
+}
+
 /*
  * Currently only one event can be processed at a time.
  */
@@ -305,8 +318,22 @@ static void _temporary_log_fn(int level,
 		syslog(LOG_DEBUG, "%s", format);
 }
 
+static int start_retry_failed_devices(const char *device,
+				     struct mirror_device_info *mirror_info)
+{
+	/* Schedule a timeout for retrying failed devices. Note that
+	 * our process_event gets called@every retry_timeout interval
+	 * until we remove it by calling dm_event_unset_timeout */
+	return dm_event_set_timeout(device, mirror_info->retry_timeout);
+}
 
-static int retry_failed_devices(const char *device)
+static int stop_retry_failed_devices(const char *device)
+{
+	return dm_event_unset_timeout(device);
+}
+
+static int retry_failed_devices(const char *device,
+			        struct mirror_device_info *mirror_info)
 {
 	int r;
 	char cmd_str[CMD_SIZE];
@@ -348,6 +375,32 @@ static int retry_failed_devices(const ch
 	return r;
 }
 
+static int process_timeout(const char *device,
+			   struct mirror_device_info *mirror_info)
+{
+	int ret;
+
+	if (mirror_info->retry_current > mirror_info->retry_total) {
+		syslog(LOG_ERR, "Unable to resync the mirror: %s after %d "
+				"attempts. Giving up.\n", device,
+				mirror_info->retry_total);
+		stop_retry_failed_devices(device);
+		ret = -ENOMEM;
+	} else {
+		mirror_info->retry_current++;
+		syslog(LOG_ERR, "Trying to resync the failed mirror: %s "
+				"attepmt number: %d\n", device,
+				 mirror_info->retry_current);
+		ret = retry_failed_devices(device, mirror_info);
+
+		/* If we successfully retried failed device, stop the timer */
+		if (!ret)
+			stop_retry_failed_devices(device);
+	}
+
+	return ret;
+}
+
 static int _remove_failed_devices(const char *device)
 {
 	int r;
@@ -384,7 +437,7 @@ static int _remove_failed_devices(const 
 }
 
 void process_event(struct dm_task *dmt,
-		   enum dm_event_mask event __attribute((unused)),
+		   enum dm_event_mask event,
 		   void **private)
 {
 	void *next = NULL;
@@ -399,6 +452,12 @@ void process_event(struct dm_task *dmt,
 		syslog(LOG_NOTICE, "Another thread is handling an event.  Waiting...");
 		pthread_mutex_lock(&_event_mutex);
 	}
+
+	if (event & DM_EVENT_TIMEOUT) {
+		process_timeout(device, mirror_info);
+		goto out;
+	}
+
 	do {
 		next = dm_get_next_target(dmt, next, &start, &length,
 					  &target_type, &params);
@@ -421,11 +480,14 @@ void process_event(struct dm_task *dmt,
  			if (mirror_info->fault_policy == FAULT_POLICY_RETRY &&
 					(error & ME_SECONDARY_WRITE_FAILURE ||
 					 error & ME_SYNC_FAILURE)) {
- 				syslog(LOG_ERR, "Retrying the failed mirror "
- 						"device.\n");
- 				if (retry_failed_devices(device))
- 					syslog(LOG_ERR, "Failed to reload the "
- 							"mirror: %s\n", device);
+ 				syslog(LOG_ERR, "Start recovery for the "
+						"failed mirror device: %s.\n",
+						device);
+ 				if (!start_retry_failed_devices(device,
+								mirror_info))
+ 					syslog(LOG_ERR, "Failed to start retry "
+							"for the failed mirror "
+ 							"device: %s\n", device);
  			} else if (_remove_failed_devices(device)) {
 				/* FIXME Why are all the error return codes unused? Get rid of them? */
 				syslog(LOG_ERR, "Failed to remove faulty devices in %s\n",
@@ -441,6 +503,11 @@ void process_event(struct dm_task *dmt,
 			   _part_ of the device is in sync
 			   Also, this is not an error
 			*/
+ 			if (mirror_info->fault_policy == FAULT_POLICY_RETRY) {
+				/* stop if we scheduled any timeouts for retry */
+				mirror_info->retry_current = 0;
+				stop_retry_failed_devices(device);
+			}
 			syslog(LOG_NOTICE, "%s is now in-sync\n", device);
 		} else if (error & ME_READ_FAILURE) {
 			/* Ignore it for now */
@@ -448,6 +515,7 @@ void process_event(struct dm_task *dmt,
 			syslog(LOG_INFO, "Unknown event:%u received.\n", error);
 	} while (next);
 
+out:
 	pthread_mutex_unlock(&_event_mutex);
 }
 
@@ -476,6 +544,11 @@ int register_device(const char *device,
 		goto out;
 	}
 	mirror_info->fault_policy = get_mirror_fault_policy();
+	if (mirror_info->fault_policy == FAULT_POLICY_RETRY) {
+		mirror_info->retry_total = get_mirror_retry_num();
+		mirror_info->retry_current = 0;
+		mirror_info->retry_timeout = get_mirror_retry_timeout();
+	}
 	*private = mirror_info;
 
 	if (!_lvm_handle) {
@@ -511,8 +584,12 @@ int unregister_device(const char *device
 	struct mirror_device_info *mirror_info = *private;
 
 	dm_free(mirror_info);
+
 	pthread_mutex_lock(&_register_mutex);
 
+	/* Stop the retry timer, if any */
+	stop_retry_failed_devices(device);
+
 	syslog(LOG_INFO, "No longer monitoring mirror device %s for events\n",
 	       device);
 



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1 of 4] Add more error codes in mirror DSO
  2009-12-13  9:18 ` [PATCH 1 of 4] Add more error codes in mirror DSO Malahal Naineni
@ 2009-12-18 16:28   ` Jonathan Brassow
  2009-12-18 17:01     ` malahal
  2009-12-22  2:07     ` malahal
  0 siblings, 2 replies; 17+ messages in thread
From: Jonathan Brassow @ 2009-12-18 16:28 UTC (permalink / raw)
  To: lvm-devel

Is there a reason you bumped ME_IGNORE from 0 to 1?

Did you forget about ME_IGNORE in 'process_event' (causing it to fall  
through to the final else)?

  brassow
On Dec 13, 2009, at 3:18 AM, Malahal Naineni wrote:

> The mirror DSO (daemons/dmeventd/plugins/mirror/dmeventd_mirror.c)  
> logs
> various device failure codes but only handles one failure,  
> 'ME_FAILURE'.
> This patchs adds more error codes to handle multiple types of device
> failures.
>
> Signed-off-by: Malahal Naineni <malahal@us.ibm.com>
>
> diff -r a48a5f0eea85 -r a74600c6163e daemons/dmeventd/plugins/mirror/ 
> dmeventd_mirror.c
> --- a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c	Sun Dec 06  
> 23:04:13 2009 -0800
> +++ b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c	Sun Dec 13  
> 01:16:51 2009 -0800
> @@ -30,9 +30,17 @@
> /* FIXME Replace most syslogs with log_error() style messages and  
> add complete context. */
> /* FIXME Reformat to 80 char lines. */
>
> -#define ME_IGNORE    0
> -#define ME_INSYNC    1
> -#define ME_FAILURE   2
> +/*
> + * An event may contain more than one error type. The following are  
> bit
> + * flags that indicate each error type.
> + */
> +#define ME_IGNORE    			0x01U
> +#define ME_INSYNC    			0x02U
> +#define ME_READ_FAILURE   		0x04U
> +#define ME_SYNC_FAILURE   		0x08U
> +#define ME_LOG_FAILURE 			0x10U
> +#define ME_SECONDARY_WRITE_FAILURE	0x20U
> +#define ME_PRIMARY_WRITE_FAILURE	0x40U
>
> /*
>  * register_device() is called first and performs initialisation.
> @@ -53,45 +61,68 @@ static void *_lvm_handle = NULL;
>  */
> static pthread_mutex_t _event_mutex = PTHREAD_MUTEX_INITIALIZER;
>
> -static int _process_status_code(const char status_code, const char  
> *dev_name,
> -				const char *dev_type, int r)
> +static int _process_status_code(const char *mirror, const char  
> *device,
> +			        const char status_code, int dev_index)
> {
> -	/*
> -	 *    A => Alive - No failures
> -	 *    D => Dead - A write failure occurred leaving mirror out-of- 
> sync
> -	 *    F => Flush failed.
> -	 *    S => Sync - A sychronization failure occurred, mirror out-of- 
> sync
> -	 *    R => Read - A read failure occurred, mirror data unaffected
> -	 *    U => Unclassified failure (bug)
> -	 */
> -	if (status_code == 'F') {
> -		syslog(LOG_ERR, "%s device %s flush failed.\n",
> -		       dev_type, dev_name);
> -		r = ME_FAILURE;
> -	} else if (status_code == 'S')
> -		syslog(LOG_ERR, "%s device %s sync failed.\n",
> -		       dev_type, dev_name);
> -	else if (status_code == 'R')
> -		syslog(LOG_ERR, "%s device %s read failed.\n",
> -		       dev_type, dev_name);
> -	else if (status_code != 'A') {
> -		syslog(LOG_ERR, "%s device %s has failed (%c).\n",
> -		       dev_type, dev_name, status_code);
> -		r = ME_FAILURE;
> +	int r;
> +	const char *error_string;
> +	const char *dev_type = dev_index ? "Secondary" : "Primary";
> +
> +	switch (status_code) {
> +	default:
> +	case 'U': /* Unclassified failure (bug) */
> +	case 'D': /* Write failure occurred leaving mirror out-of-sync */
> +	case 'F': /* Flush failure, handled as write failure */
> +		if (status_code == 'D')
> +			error_string = "write";
> +		else if (status_code == 'F')
> +			error_string = "flush";
> +		else if (status_code == 'U')
> +			error_string = "unclassified";
> +		else {
> +			syslog(LOG_ERR, "Unknown device status code (%c).\n",
> +					status_code);
> +			error_string = "unknown";
> +		}
> +
> +		syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had a %s "
> +				"failure.\n", mirror, dev_type, device,
> +				error_string);
> +
> +		r = dev_index? ME_SECONDARY_WRITE_FAILURE :
> +			ME_PRIMARY_WRITE_FAILURE;
> +		break;
> +
> +	case 'S': /* Sychronization failure occurred, mirror out-of-sync */
> +		syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had "
> +				"sync failure.\n", mirror, dev_type, device);
> +		r = ME_SYNC_FAILURE;
> +		break;
> +
> +	case 'R': /* Read failure occurred, mirror data unaffected */
> +		syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had a "
> +				"read failure.\n", mirror, dev_type, device);
> +		r = ME_READ_FAILURE;
> +		break;
> +
> +	case 'A': /* Active, a good status */
> +		r = 0;
> +		break;
> 	}
>
> 	return r;
> }
>
> -static int _get_mirror_event(char *params)
> +static int _get_mirror_event(const char *mirror, char *params)
> {
> -	int i, r = ME_INSYNC;
> +	int i;
> 	char **args = NULL;
> 	char *dev_status_str;
> 	char *log_status_str;
> 	char *sync_str;
> 	char *p = NULL;
> 	int log_argc, num_devs;
> +	int retval = 0;
>
> 	/*
> 	 * dm core parms:	     0 409600 mirror
> @@ -122,23 +153,25 @@ static int _get_mirror_event(char *param
>
> 	/* Check for bad mirror devices */
> 	for (i = 0; i < num_devs; i++)
> -		r = _process_status_code(dev_status_str[i], args[i],
> -			i ? "Secondary mirror" : "Primary mirror", r);
> +		retval |= _process_status_code(mirror, args[i],
> +					       dev_status_str[i], i);
>
> 	/* Check for bad disk log device */
> -	if (log_argc > 1)
> -		r = _process_status_code(log_status_str[0],
> -					 args[2 + num_devs + log_argc],
> -					 "Log", r);
> +	if (log_argc > 1 && log_status_str[0] == 'D') {
> +		syslog(LOG_ERR, "Mirror device: %s, log device: %s failed.\n",
> +				mirror, args[2 + num_devs + log_argc]);
> +		retval = retval | ME_LOG_FAILURE;
> +	}
>
> -	if (r == ME_FAILURE)
> +	if (retval)	/* A failure occurred */
> 		goto out;
>
> +	retval = ME_INSYNC;	/* assume INSYNC event */
> 	p = strstr(sync_str, "/");
> 	if (p) {
> 		p[0] = '\0';
> 		if (strcmp(sync_str, p+1))
> -			r = ME_IGNORE;
> +			retval = ME_IGNORE;
> 		p[0] = '/';
> 	} else
> 		goto out_parse;
> @@ -146,7 +179,7 @@ static int _get_mirror_event(char *param
> out:
> 	if (args)
> 		dm_free(args);
> -	return r;
> +	return retval;
> 	
> out_parse:
> 	if (args)
> @@ -212,6 +245,7 @@ void process_event(struct dm_task *dmt,
> 	char *target_type = NULL;
> 	char *params;
> 	const char *device = dm_task_get_name(dmt);
> +	int error;
>
> 	if (pthread_mutex_trylock(&_event_mutex)) {
> 		syslog(LOG_NOTICE, "Another thread is handling an event.   
> Waiting...");
> @@ -231,17 +265,11 @@ void process_event(struct dm_task *dmt,
> 			continue;
> 		}
>
> -		switch(_get_mirror_event(params)) {
> -		case ME_INSYNC:
> -			/* FIXME: all we really know is that this
> -			   _part_ of the device is in sync
> -			   Also, this is not an error
> -			*/
> -			syslog(LOG_NOTICE, "%s is now in-sync\n", device);
> -			break;
> -		case ME_FAILURE:
> -			syslog(LOG_ERR, "Device failure in %s\n", device);
> -			if (_remove_failed_devices(device))
> +		error = _get_mirror_event(device, params);
> +		if (error & ME_LOG_FAILURE ||
> +		    error & ME_PRIMARY_WRITE_FAILURE ||
> +		    error & ME_SECONDARY_WRITE_FAILURE) {
> +			if (_remove_failed_devices(device)) {
> 				/* FIXME Why are all the error return codes unused? Get rid of  
> them? */
> 				syslog(LOG_ERR, "Failed to remove faulty devices in %s\n",
> 				       device);
> @@ -250,13 +278,18 @@ void process_event(struct dm_task *dmt,
> 				syslog(LOG_NOTICE, "%s is now a linear device.\n",
> 					device);
> 			*/
> -			break;
> -		case ME_IGNORE:
> -			break;
> -		default:
> -			/* FIXME Provide value then! */
> -			syslog(LOG_INFO, "Unknown event received.\n");
> -		}
> +			}
> +		} else if (error & ME_INSYNC) {
> +			/* FIXME: all we really know is that this
> +			   _part_ of the device is in sync
> +			   Also, this is not an error
> +			*/
> +			syslog(LOG_NOTICE, "%s is now in-sync\n", device);
> +		} else if (error & ME_READ_FAILURE ||
> +			   error & ME_SYNC_FAILURE) {
> +			/* Ignore these for now */
> +		} else
> +			syslog(LOG_INFO, "Unknown event:%u received.\n", error);
> 	} while (next);
>
> 	pthread_mutex_unlock(&_event_mutex);
>
> --
> lvm-devel mailing list
> lvm-devel at redhat.com
> https://www.redhat.com/mailman/listinfo/lvm-devel



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1 of 4] Add more error codes in mirror DSO
  2009-12-18 16:28   ` Jonathan Brassow
@ 2009-12-18 17:01     ` malahal
  2009-12-22  2:07     ` malahal
  1 sibling, 0 replies; 17+ messages in thread
From: malahal @ 2009-12-18 17:01 UTC (permalink / raw)
  To: lvm-devel

Jonathan Brassow [jbrassow at redhat.com] wrote:
> Is there a reason you bumped ME_IGNORE from 0 to 1?

It is actually a bit-field value rather than an absolute value, so I
couldn't have anything with zero.

> Did you forget about ME_IGNORE in 'process_event' (causing it to fall  
> through to the final else)?

Yes! Thank you. Unknown errors are handled as 'fatal' errors in the
existing code, so process_event will never get an unknown error code (it
has to be ME_IGNORE). I will fix it.



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 2 of 4] Handle transient secondary mirror leg failures
  2009-12-13  9:18 ` [PATCH 2 of 4] Handle transient secondary mirror leg failures Malahal Naineni
@ 2009-12-18 17:10   ` Jonathan Brassow
  2009-12-18 18:25     ` Takahiro Yasui
  2009-12-18 18:35     ` malahal
  0 siblings, 2 replies; 17+ messages in thread
From: Jonathan Brassow @ 2009-12-18 17:10 UTC (permalink / raw)
  To: lvm-devel

hmmm, I wonder how well this is going to work.

1) I've seen code ('dm_task_suppress_identical_reload') that  
suppresses table reloads if the tables are identical - will that  
obstruct what you are trying to do here?

2) If you don't get a new table loaded, it will behave as a suspend/ 
resume only.  Recent code changes in dm-raid1.c are causing  
'log_failure' and 'leg_failure' to not be reset in those cases.  IOW,  
all these steps could be for nothing.  :(

3) How does this work in a cluster?  dmsetup is not cluster-aware.  If  
you don't go through the LVM commands (I recognize the additional  
pain), none of this will be coordinated.

  brassow


On Dec 13, 2009, at 3:18 AM, Malahal Naineni wrote:

> +	/* FIXME: should be running an LVM command that is pinned.
> +	 * dmsetup command may not be pinned in memory all the time.
> +	 * "lvchange --refresh vg/lv" only works if there are no device
> +	 * failures while it is running. Otherwise, the failed device
> +	 * is replaced with "error" target which is not what we want.
> +	 */
> +	snprintf(cmd_str, CMD_SIZE, "dmsetup suspend --noflush %s-%s", vg,  
> lv);
> +	syslog(LOG_NOTICE, "Running command: %s", cmd_str);
> +	r = system(cmd_str);
> +
> +	snprintf(cmd_str, CMD_SIZE, "dmsetup table %s-%s | dmsetup load "
> +			"%s-%s", vg, lv, vg, lv);
> +	syslog(LOG_NOTICE, "Running command: %s", cmd_str);
> +	r |= system(cmd_str);
> +
> +	snprintf(cmd_str, CMD_SIZE, "dmsetup resume %s-%s", vg, lv);
> +	syslog(LOG_NOTICE, "Running command: %s", cmd_str);
> +	r |= system(cmd_str);



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 2 of 4] Handle transient secondary mirror leg failures
  2009-12-18 17:10   ` Jonathan Brassow
@ 2009-12-18 18:25     ` Takahiro Yasui
  2009-12-18 18:49       ` malahal
  2009-12-18 18:35     ` malahal
  1 sibling, 1 reply; 17+ messages in thread
From: Takahiro Yasui @ 2009-12-18 18:25 UTC (permalink / raw)
  To: lvm-devel

On 12/18/09 12:10, Jonathan Brassow wrote:
> 2) If you don't get a new table loaded, it will behave as a suspend/ 
> resume only.  Recent code changes in dm-raid1.c are causing  
> 'log_failure' and 'leg_failure' to not be reset in those cases.  IOW,  
> all these steps could be for nothing.  :(

I would like to know how effective the retry is. As Jon explained
above, recent upstream kernel blocks all write I/Os on NOSYNC regions.
This means that those write I/Os are kept blocked for a long time.
For example, mirror retry interval in your patch #4 is 30 seconds and
application or  filesystem will be waited for 30 seconds (330 seconds
if retry count is 10). Can your application wait for more than 5 minutes?

This behaviour will not been solved even if kernel is fixed so that
log_failure and leg_failure are reset. The write I/Os blocked will
be re-queued in the kernel when suspend/resume are done, but they
will be put in the hold queue again if the device failure is not
transient but permanent.

I would like to know the use case of this patch set.

Thanks,
Taka



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 2 of 4] Handle transient secondary mirror leg failures
  2009-12-18 17:10   ` Jonathan Brassow
  2009-12-18 18:25     ` Takahiro Yasui
@ 2009-12-18 18:35     ` malahal
  1 sibling, 0 replies; 17+ messages in thread
From: malahal @ 2009-12-18 18:35 UTC (permalink / raw)
  To: lvm-devel

Jonathan Brassow [jbrassow at redhat.com] wrote:
> hmmm, I wonder how well this is going to work.
> 
> 1) I've seen code ('dm_task_suppress_identical_reload') that  
> suppresses table reloads if the tables are identical - will that  
> obstruct what you are trying to do here?
> 
> 2) If you don't get a new table loaded, it will behave as a suspend/ 
> resume only.  Recent code changes in dm-raid1.c are causing  
> 'log_failure' and 'leg_failure' to not be reset in those cases.  IOW,  
> all these steps could be for nothing.  :(

Initially I did test with a modified dm-raid1.c that reset the device
status (error count too) in resume() function. I wasn't using "dmsetup
reload" at that time. Later, I did add "dmsetup reload" and removed my
patch in dm-raid1.c.

It worked fine, so either I am not hitting
dm_task_suppress_identical_reload or some how I failed to revert my
patch in the kernel.

> 3) How does this work in a cluster?  dmsetup is not cluster-aware.  If  
> you don't go through the LVM commands (I recognize the additional  
> pain), none of this will be coordinated.

Tried to use "lvchange --refresh" and that didn't work too well. In the
last call, Alasdair indicated I should add something like
"--top-layer-only" option to "lvchange --refresh" command.

Thank you for the review.

--Malahal.



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 2 of 4] Handle transient secondary mirror leg failures
  2009-12-18 18:25     ` Takahiro Yasui
@ 2009-12-18 18:49       ` malahal
  2009-12-18 20:21         ` Takahiro Yasui
  0 siblings, 1 reply; 17+ messages in thread
From: malahal @ 2009-12-18 18:49 UTC (permalink / raw)
  To: lvm-devel

Takahiro Yasui [tyasui at redhat.com] wrote:
> On 12/18/09 12:10, Jonathan Brassow wrote:
> > 2) If you don't get a new table loaded, it will behave as a suspend/ 
> > resume only.  Recent code changes in dm-raid1.c are causing  
> > 'log_failure' and 'leg_failure' to not be reset in those cases.  IOW,  
> > all these steps could be for nothing.  :(
> 
> I would like to know how effective the retry is. As Jon explained
> above, recent upstream kernel blocks all write I/Os on NOSYNC regions.
> This means that those write I/Os are kept blocked for a long time.
> For example, mirror retry interval in your patch #4 is 30 seconds and
> application or  filesystem will be waited for 30 seconds (330 seconds
> if retry count is 10). Can your application wait for more than 5 minutes?
> 
> This behaviour will not been solved even if kernel is fixed so that
> log_failure and leg_failure are reset. The write I/Os blocked will
> be re-queued in the kernel when suspend/resume are done, but they
> will be put in the hold queue again if the device failure is not
> transient but permanent.
> 
> I would like to know the use case of this patch set.

I have tested with RHEL5.4 kernel that doesn't block on device failure.
IMHO, suspend/resume is doing two things if the kernel code blocks on
failure -- 1) letting the kernel module unblock 2) start resync. We
should separate those two actions. Maybe, we should do something here???
Have an ioctl to start resync or have a message to unblock....

Also, blocking the kernel module introduced a regression -- the
mirror will block forever on a transient failure!

Thanks, Malahal.



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 2 of 4] Handle transient secondary mirror leg failures
  2009-12-18 18:49       ` malahal
@ 2009-12-18 20:21         ` Takahiro Yasui
  2009-12-18 20:54           ` malahal
  0 siblings, 1 reply; 17+ messages in thread
From: Takahiro Yasui @ 2009-12-18 20:21 UTC (permalink / raw)
  To: lvm-devel

On 12/18/09 13:49, malahal at us.ibm.com wrote:
> Takahiro Yasui [tyasui at redhat.com] wrote:
>> On 12/18/09 12:10, Jonathan Brassow wrote:
>>> 2) If you don't get a new table loaded, it will behave as a suspend/ 
>>> resume only.  Recent code changes in dm-raid1.c are causing  
>>> 'log_failure' and 'leg_failure' to not be reset in those cases.  IOW,  
>>> all these steps could be for nothing.  :(
>>
>> I would like to know how effective the retry is. As Jon explained
>> above, recent upstream kernel blocks all write I/Os on NOSYNC regions.
>> This means that those write I/Os are kept blocked for a long time.
>> For example, mirror retry interval in your patch #4 is 30 seconds and
>> application or  filesystem will be waited for 30 seconds (330 seconds
>> if retry count is 10). Can your application wait for more than 5 minutes?
>>
>> This behaviour will not been solved even if kernel is fixed so that
>> log_failure and leg_failure are reset. The write I/Os blocked will
>> be re-queued in the kernel when suspend/resume are done, but they
>> will be put in the hold queue again if the device failure is not
>> transient but permanent.
>>
>> I would like to know the use case of this patch set.
> 
> I have tested with RHEL5.4 kernel that doesn't block on device failure.

I see. Your retry approach looks good for the kernels on RHEL5.4 kernel
and 2.6.32 kernel since write I/Os aren't blocked even after the error
is detected on a mirror leg.

> IMHO, suspend/resume is doing two things if the kernel code blocks on
> failure -- 1) letting the kernel module unblock 2) start resync. We
> should separate those two actions. Maybe, we should do something here???
> Have an ioctl to start resync or have a message to unblock....

I'm sorry I don't get your point to separate unblock and resync.
Currently "unblock" is done by "suspend" and resync is done by "resume"
We need to reset log_failure/leg_failre, but anything else?

> Also, blocking the kernel module introduced a regression -- the
> mirror will block forever on a transient failure!

You are right. Refreshing block I/Os is necessary when dmeventd noticed
an error event and lvconvert command found nothing to be done for repair.
dmeventd should kick an action to refresh the mirror device.

Thanks,
Taka



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 2 of 4] Handle transient secondary mirror leg failures
  2009-12-18 20:21         ` Takahiro Yasui
@ 2009-12-18 20:54           ` malahal
  0 siblings, 0 replies; 17+ messages in thread
From: malahal @ 2009-12-18 20:54 UTC (permalink / raw)
  To: lvm-devel

Takahiro Yasui [tyasui at redhat.com] wrote:
> > IMHO, suspend/resume is doing two things if the kernel code blocks on
> > failure -- 1) letting the kernel module unblock 2) start resync. We
> > should separate those two actions. Maybe, we should do something here???
> > Have an ioctl to start resync or have a message to unblock....
> 
> I'm sorry I don't get your point to separate unblock and resync.
> Currently "unblock" is done by "suspend" and resync is done by "resume"
> We need to reset log_failure/leg_failre, but anything else?

Let us talk about suspend with noflush as the other case would fail the
I/O.  The suspend just REQUEUE's, so it really needs resume to send the
I/O down to a surviving leg. As far as the application goes, the I/O
gets blocked until the resume, anyway.

Also in do_writes(), we block any I/O that is going to a nosync region.
If I understand, The recent block on error patch just fails to do any
I/O on a "nosync" region. In other words, we don't entertain any mirror
with leg failures and still continue to function as a mirror!

We can reset log_failure/leg_failure in resume to make them work for now
as you say.

--Malahal.



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1 of 4] Add more error codes in mirror DSO
  2009-12-18 16:28   ` Jonathan Brassow
  2009-12-18 17:01     ` malahal
@ 2009-12-22  2:07     ` malahal
  1 sibling, 0 replies; 17+ messages in thread
From: malahal @ 2009-12-22  2:07 UTC (permalink / raw)
  To: lvm-devel

Jonathan Brassow [jbrassow at redhat.com] wrote:
> Is there a reason you bumped ME_IGNORE from 0 to 1?
> 
> Did you forget about ME_IGNORE in 'process_event' (causing it to fall  
> through to the final else)?
> 
>   brassow

The following fixes the ME_IGNORE in 'process_event' [complete patch]

======================================================================

Add more error codes in mirror DSO.

The mirror DSO (daemons/dmeventd/plugins/mirror/dmeventd_mirror.c) logs
various device failure codes but only handles one failure, 'ME_FAILURE'.
This patchs adds more error codes to handle multiple types of device
failures.

Signed-off-by: Malahal Naineni <malahal@us.ibm.com>

diff -r a48a5f0eea85 -r db70a48aef1b daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
--- a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c	Sun Dec 06 23:04:13 2009 -0800
+++ b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c	Mon Dec 21 17:38:25 2009 -0800
@@ -30,9 +30,17 @@
 /* FIXME Replace most syslogs with log_error() style messages and add complete context. */
 /* FIXME Reformat to 80 char lines. */
 
-#define ME_IGNORE    0
-#define ME_INSYNC    1
-#define ME_FAILURE   2
+/* 
+ * An event may contain more than one error type. The following are bit
+ * flags that indicate each error type.
+ */
+#define ME_IGNORE    			0x01U
+#define ME_INSYNC    			0x02U
+#define ME_READ_FAILURE   		0x04U
+#define ME_SYNC_FAILURE   		0x08U
+#define ME_LOG_FAILURE 			0x10U
+#define ME_SECONDARY_WRITE_FAILURE	0x20U
+#define ME_PRIMARY_WRITE_FAILURE	0x40U
 
 /*
  * register_device() is called first and performs initialisation.
@@ -53,45 +61,68 @@ static void *_lvm_handle = NULL;
  */
 static pthread_mutex_t _event_mutex = PTHREAD_MUTEX_INITIALIZER;
 
-static int _process_status_code(const char status_code, const char *dev_name,
-				const char *dev_type, int r)
+static int _process_status_code(const char *mirror, const char *device,
+			        const char status_code, int dev_index)
 {
-	/*
-	 *    A => Alive - No failures
-	 *    D => Dead - A write failure occurred leaving mirror out-of-sync
-	 *    F => Flush failed.
-	 *    S => Sync - A sychronization failure occurred, mirror out-of-sync
-	 *    R => Read - A read failure occurred, mirror data unaffected
-	 *    U => Unclassified failure (bug)
-	 */ 
-	if (status_code == 'F') {
-		syslog(LOG_ERR, "%s device %s flush failed.\n",
-		       dev_type, dev_name);
-		r = ME_FAILURE;
-	} else if (status_code == 'S')
-		syslog(LOG_ERR, "%s device %s sync failed.\n",
-		       dev_type, dev_name);
-	else if (status_code == 'R')
-		syslog(LOG_ERR, "%s device %s read failed.\n",
-		       dev_type, dev_name);
-	else if (status_code != 'A') {
-		syslog(LOG_ERR, "%s device %s has failed (%c).\n",
-		       dev_type, dev_name, status_code);
-		r = ME_FAILURE;
+	int r;
+	const char *error_string;
+	const char *dev_type = dev_index ? "Secondary" : "Primary";
+
+	switch (status_code) {
+	default:
+	case 'U': /* Unclassified failure (bug) */
+	case 'D': /* Write failure occurred leaving mirror out-of-sync */
+	case 'F': /* Flush failure, handled as write failure */
+		if (status_code == 'D')
+			error_string = "write";
+		else if (status_code == 'F')
+			error_string = "flush";
+		else if (status_code == 'U')
+			error_string = "unclassified";
+		else {
+			syslog(LOG_ERR, "Unknown device status code (%c).\n",
+					status_code);
+			error_string = "unknown";
+		}
+
+		syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had a %s "
+				"failure.\n", mirror, dev_type, device,
+				error_string);
+
+		r = dev_index? ME_SECONDARY_WRITE_FAILURE :
+			ME_PRIMARY_WRITE_FAILURE;
+		break;
+
+	case 'S': /* Sychronization failure occurred, mirror out-of-sync */
+		syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had "
+				"sync failure.\n", mirror, dev_type, device);
+		r = ME_SYNC_FAILURE;
+		break;
+
+	case 'R': /* Read failure occurred, mirror data unaffected */
+		syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had a "
+				"read failure.\n", mirror, dev_type, device);
+		r = ME_READ_FAILURE;
+		break;
+
+	case 'A': /* Active, a good status */
+		r = 0;
+		break;
 	}
 
 	return r;
 }
 
-static int _get_mirror_event(char *params)
+static int _get_mirror_event(const char *mirror, char *params)
 {
-	int i, r = ME_INSYNC;
+	int i;
 	char **args = NULL;
 	char *dev_status_str;
 	char *log_status_str;
 	char *sync_str;
 	char *p = NULL;
 	int log_argc, num_devs;
+	int retval = 0;
 
 	/*
 	 * dm core parms:	     0 409600 mirror
@@ -122,23 +153,25 @@ static int _get_mirror_event(char *param
 
 	/* Check for bad mirror devices */
 	for (i = 0; i < num_devs; i++)
-		r = _process_status_code(dev_status_str[i], args[i],
-			i ? "Secondary mirror" : "Primary mirror", r);
+		retval |= _process_status_code(mirror, args[i],
+					       dev_status_str[i], i);
 
 	/* Check for bad disk log device */
-	if (log_argc > 1)
-		r = _process_status_code(log_status_str[0],
-					 args[2 + num_devs + log_argc],
-					 "Log", r);
+	if (log_argc > 1 && log_status_str[0] == 'D') {
+		syslog(LOG_ERR, "Mirror device: %s, log device: %s failed.\n",
+				mirror, args[2 + num_devs + log_argc]);
+		retval = retval | ME_LOG_FAILURE;
+	}
 
-	if (r == ME_FAILURE)
+	if (retval)	/* A failure occurred */
 		goto out;
 
+	retval = ME_INSYNC;	/* assume INSYNC event */
 	p = strstr(sync_str, "/");
 	if (p) {
 		p[0] = '\0';
 		if (strcmp(sync_str, p+1))
-			r = ME_IGNORE;
+			retval = ME_IGNORE;
 		p[0] = '/';
 	} else
 		goto out_parse;
@@ -146,7 +179,7 @@ static int _get_mirror_event(char *param
 out:
 	if (args)
 		dm_free(args);
-	return r;
+	return retval;
 	
 out_parse:
 	if (args)
@@ -212,6 +245,7 @@ void process_event(struct dm_task *dmt,
 	char *target_type = NULL;
 	char *params;
 	const char *device = dm_task_get_name(dmt);
+	int error;
 
 	if (pthread_mutex_trylock(&_event_mutex)) {
 		syslog(LOG_NOTICE, "Another thread is handling an event.  Waiting...");
@@ -231,17 +265,11 @@ void process_event(struct dm_task *dmt,
 			continue;
 		}
 
-		switch(_get_mirror_event(params)) {
-		case ME_INSYNC:
-			/* FIXME: all we really know is that this
-			   _part_ of the device is in sync
-			   Also, this is not an error
-			*/
-			syslog(LOG_NOTICE, "%s is now in-sync\n", device);
-			break;
-		case ME_FAILURE:
-			syslog(LOG_ERR, "Device failure in %s\n", device);
-			if (_remove_failed_devices(device))
+		error = _get_mirror_event(device, params);
+		if (error & ME_LOG_FAILURE ||
+		    error & ME_PRIMARY_WRITE_FAILURE ||
+		    error & ME_SECONDARY_WRITE_FAILURE) {
+			if (_remove_failed_devices(device)) {
 				/* FIXME Why are all the error return codes unused? Get rid of them? */
 				syslog(LOG_ERR, "Failed to remove faulty devices in %s\n",
 				       device);
@@ -250,13 +278,19 @@ void process_event(struct dm_task *dmt,
 				syslog(LOG_NOTICE, "%s is now a linear device.\n",
 					device);
 			*/
-			break;
-		case ME_IGNORE:
-			break;
-		default:
-			/* FIXME Provide value then! */
-			syslog(LOG_INFO, "Unknown event received.\n");
-		}
+			}
+		} else if (error & ME_INSYNC) {
+			/* FIXME: all we really know is that this
+			   _part_ of the device is in sync
+			   Also, this is not an error
+			*/
+			syslog(LOG_NOTICE, "%s is now in-sync\n", device);
+		} else if (error & ME_READ_FAILURE ||
+			   error & ME_SYNC_FAILURE ||
+			   error & ME_IGNORE) {
+			/* Ignore these for now */
+		} else
+			syslog(LOG_INFO, "Unknown event:%u received.\n", error);
 	} while (next);
 
 	pthread_mutex_unlock(&_event_mutex);



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 3 of 4] Add dm_event_set_timeout/dm_event_unset_timeout interface
  2009-12-13  9:18 ` [PATCH 3 of 4] Add dm_event_set_timeout/dm_event_unset_timeout interface Malahal Naineni
@ 2009-12-22  2:12   ` malahal
  2009-12-22 10:51     ` Alasdair G Kergon
  0 siblings, 1 reply; 17+ messages in thread
From: malahal @ 2009-12-22  2:12 UTC (permalink / raw)
  To: lvm-devel

Malahal Naineni [malahal at us.ibm.com] wrote:
> Will be used in mirror DSO to handle transient device failures.

Alasdair suggested to use -1 instead instead of INT_MAX to unregister a
timeout. This patch supersedes the previous patch.

==========================================================
Add dm_event_set_timeout/dm_event_unset_timeout interface

Will be used in mirror DSO to handle transient device failures.

Signed-off-by: Malahal Naineni <malahal@us.ibm.com>

diff -r c11c9f0a04f8 -r b3c48f241dfd daemons/dmeventd/.exported_symbols
--- a/daemons/dmeventd/.exported_symbols	Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/.exported_symbols	Mon Dec 21 17:57:37 2009 -0800
@@ -17,3 +17,5 @@ dm_event_unregister_handler
 dm_event_get_registered_device
 dm_event_handler_set_timeout
 dm_event_handler_get_timeout
+dm_event_set_timeout
+dm_event_unset_timeout
diff -r c11c9f0a04f8 -r b3c48f241dfd daemons/dmeventd/dmeventd.c
--- a/daemons/dmeventd/dmeventd.c	Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/dmeventd.c	Mon Dec 21 17:57:37 2009 -0800
@@ -155,7 +155,7 @@ struct message_data {
 	} events;
 	union {
 		char *str;
-		uint32_t secs;
+		int32_t secs;
 	} timeout;
 	struct dm_event_daemon_message *msg;	/* Pointer to message buffer. */
 };
@@ -187,7 +187,7 @@ struct thread_status {
 	enum dm_event_mask current_events;	/* bitfield for occured events. */
 	struct dm_task *current_task;
 	time_t next_time;
-	uint32_t timeout;
+	int32_t timeout;
 	struct dm_list timeout_list;
 	void *dso_private; /* dso per-thread status variable */
 };
@@ -363,7 +363,7 @@ static int _parse_message(struct message
 			message_data->events.field = i;
 		}
 		if (message_data->timeout.str) {
-			uint32_t secs = atoi(message_data->timeout.str);
+			int32_t secs = atoi(message_data->timeout.str);
 			dm_free(message_data->timeout.str);
 			message_data->timeout.secs = secs ? secs :
 			    DM_EVENT_DEFAULT_TIMEOUT;
@@ -1164,13 +1164,26 @@ static int _get_next_registered_device(s
 static int _set_timeout(struct message_data *message_data)
 {
 	struct thread_status *thread;
+	int ret = 0;
 
 	_lock_mutex();
-	if ((thread = _lookup_thread_status(message_data)))
-		thread->timeout = message_data->timeout.secs;
+	if ((thread = _lookup_thread_status(message_data))) {
+		/* A timeout of -1 (actually any negative timeout) is
+		 * used to unregister the timeout, register a timeout
+		 * for any other value.  */
+		thread->events &= ~DM_EVENT_TIMEOUT;
+		_unregister_for_timeout(thread);
+		if (message_data->timeout.secs >= 0) {
+			thread->events |= DM_EVENT_TIMEOUT;
+			thread->timeout = message_data->timeout.secs;
+			ret = _register_for_timeout(thread);
+		} else {
+			thread->timeout = 0;
+		}
+	}
 	_unlock_mutex();
 
-	return thread ? 0 : -ENODEV;
+	return (thread && ret == 0) ? 0 : -ENODEV;
 }
 
 static int _get_timeout(struct message_data *message_data)
diff -r c11c9f0a04f8 -r b3c48f241dfd daemons/dmeventd/libdevmapper-event.c
--- a/daemons/dmeventd/libdevmapper-event.c	Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/libdevmapper-event.c	Mon Dec 21 17:57:37 2009 -0800
@@ -40,7 +40,7 @@ struct dm_event_handler {
 	char *uuid;
 	int major;
 	int minor;
-	uint32_t timeout;
+	int32_t timeout;
 
 	enum dm_event_mask mask;
 };
@@ -331,11 +331,11 @@ static int _daemon_write(struct dm_event
 static int _daemon_talk(struct dm_event_fifos *fifos,
 			struct dm_event_daemon_message *msg, int cmd,
 			const char *dso_name, const char *dev_name,
-			enum dm_event_mask evmask, uint32_t timeout)
+			enum dm_event_mask evmask, int32_t timeout)
 {
 	const char *dso = dso_name ? dso_name : "";
 	const char *dev = dev_name ? dev_name : "";
-	const char *fmt = "%d:%d %s %s %u %" PRIu32;
+	const char *fmt = "%d:%d %s %s %u %" PRId32;
 	int msg_size;
 	memset(msg, 0, sizeof(*msg));
 
@@ -550,7 +550,7 @@ failed:
 /* Handle the event (de)registration call and return negative error codes. */
 static int _do_event(int cmd, struct dm_event_daemon_message *msg,
 		     const char *dso_name, const char *dev_name,
-		     enum dm_event_mask evmask, uint32_t timeout)
+		     enum dm_event_mask evmask, int32_t timeout)
 {
 	int ret;
 	struct dm_event_fifos fifos;
@@ -770,27 +770,89 @@ int dm_event_get_registered_device(struc
 	return ret;
 }
 
-#if 0				/* left out for now */
+/* Get uuid of a device */
+static struct dm_task *_get_device_info_by_name(const char *dev_name)
+{
+	struct dm_task *dmt;
+	struct dm_info info;
+
+	if (!(dmt = dm_task_create(DM_DEVICE_INFO))) {
+		log_error("_get_device_info: dm_task creation for info failed");
+		return NULL;
+	}
+
+	dm_task_set_name(dmt, dev_name);
+
+	/* FIXME Add name or uuid or devno to messages */
+	if (!dm_task_run(dmt)) {
+		log_error("_get_device_info: dm_task_run() failed");
+		goto failed;
+	}
+
+	if (!dm_task_get_info(dmt, &info)) {
+		log_error("_get_device_info: failed to get info for device");
+		goto failed;
+	}
+
+	if (!info.exists) {
+		log_error("_get_device_info: device not found");
+		goto failed;
+	}
+
+	return dmt;
+
+failed:
+	dm_task_destroy(dmt);
+	return NULL;
+}
+
+int dm_event_unset_timeout(const char *dev_name)
+{
+	/* timeout of -1 is used as no timeout and removes the
+	 * timer. */
+	return dm_event_set_timeout(dev_name, -1);
+}
+
+int dm_event_set_timeout(const char *dev_name, int32_t timeout)
+{
+	int ret = 1, err;
+	const char *uuid;
+	struct dm_task *dmt;
+	struct dm_event_daemon_message msg = { 0, 0, NULL };
+
+	if (!(dmt = _get_device_info_by_name(dev_name))) {
+		stack;
+		return 0;
+	}
+
+	uuid = dm_task_get_uuid(dmt);
+
+	if ((err = _do_event(DM_EVENT_CMD_SET_TIMEOUT, &msg,
+			     NULL, uuid, 0, timeout)) < 0) {
+		log_error("%s: event set timeout failed: %s",
+			  dm_task_get_name(dmt),
+			  msg.data ? msg.data : strerror(-err));
+		ret = 0;
+	}
+
+	if (msg.data)
+		dm_free(msg.data);
+
+	dm_task_destroy(dmt);
+
+	return ret;
+}
+
+#if 0
 
 static char *_skip_string(char *src, const int delimiter)
 {
-	src = srtchr(src, delimiter);
+	src = strchr(src, delimiter);
 	if (src && *(src + 1))
 		return src + 1;
 	return NULL;
 }
 
-int dm_event_set_timeout(const char *device_path, uint32_t timeout)
-{
-	struct dm_event_daemon_message msg = { 0, 0, NULL };
-
-	if (!device_exists(device_path))
-		return -ENODEV;
-
-	return _do_event(DM_EVENT_CMD_SET_TIMEOUT, &msg,
-			 NULL, device_path, 0, timeout);
-}
-
 int dm_event_get_timeout(const char *device_path, uint32_t *timeout)
 {
 	int ret;
diff -r c11c9f0a04f8 -r b3c48f241dfd daemons/dmeventd/libdevmapper-event.h
--- a/daemons/dmeventd/libdevmapper-event.h	Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/libdevmapper-event.h	Mon Dec 21 17:57:37 2009 -0800
@@ -102,5 +102,7 @@ void process_event(struct dm_task *dmt, 
 int register_device(const char *device_name, const char *uuid, int major, int minor, void **user);
 int unregister_device(const char *device_name, const char *uuid, int major,
 		      int minor, void **user);
+int dm_event_set_timeout(const char *dev_name, int32_t timeout);
+int dm_event_unset_timeout(const char *dev_name);
 
 #endif



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 3 of 4] Add dm_event_set_timeout/dm_event_unset_timeout interface
  2009-12-22  2:12   ` malahal
@ 2009-12-22 10:51     ` Alasdair G Kergon
  2009-12-23  1:58       ` malahal
  0 siblings, 1 reply; 17+ messages in thread
From: Alasdair G Kergon @ 2009-12-22 10:51 UTC (permalink / raw)
  To: lvm-devel

On Mon, Dec 21, 2009 at 06:12:55PM -0800, malahal at us.ibm.com wrote:
> Alasdair suggested to use -1 instead instead of INT_MAX to unregister a
> timeout. This patch supersedes the previous patch.
 
Using a #define, please, rather than hard-coding -1 into the source.

Alasdair



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 3 of 4] Add dm_event_set_timeout/dm_event_unset_timeout interface
  2009-12-22 10:51     ` Alasdair G Kergon
@ 2009-12-23  1:58       ` malahal
  0 siblings, 0 replies; 17+ messages in thread
From: malahal @ 2009-12-23  1:58 UTC (permalink / raw)
  To: lvm-devel

Alasdair G Kergon [agk at redhat.com] wrote:
> Using a #define, please, rather than hard-coding -1 into the source.

Thank you. The following patch introduces DM_EVENT_REMOVE_TIMEOUT for
that purpose. This patch supersedes the previous patch (both patches are
against same source, so interdiff can be used to find just the
difference)

=============================================================================

Add dm_event_set_timeout/dm_event_unset_timeout interface

Will be used in mirror DSO to handle transient device failures.

Signed-off-by: Malahal Naineni <malahal@us.ibm.com>

diff -r c11c9f0a04f8 -r 0b6ad3b52702 daemons/dmeventd/.exported_symbols
--- a/daemons/dmeventd/.exported_symbols	Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/.exported_symbols	Tue Dec 22 15:53:11 2009 -0800
@@ -17,3 +17,5 @@ dm_event_unregister_handler
 dm_event_get_registered_device
 dm_event_handler_set_timeout
 dm_event_handler_get_timeout
+dm_event_set_timeout
+dm_event_unset_timeout
diff -r c11c9f0a04f8 -r 0b6ad3b52702 daemons/dmeventd/dmeventd.c
--- a/daemons/dmeventd/dmeventd.c	Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/dmeventd.c	Tue Dec 22 15:53:11 2009 -0800
@@ -155,7 +155,7 @@ struct message_data {
 	} events;
 	union {
 		char *str;
-		uint32_t secs;
+		int32_t secs;
 	} timeout;
 	struct dm_event_daemon_message *msg;	/* Pointer to message buffer. */
 };
@@ -187,7 +187,7 @@ struct thread_status {
 	enum dm_event_mask current_events;	/* bitfield for occured events. */
 	struct dm_task *current_task;
 	time_t next_time;
-	uint32_t timeout;
+	int32_t timeout;
 	struct dm_list timeout_list;
 	void *dso_private; /* dso per-thread status variable */
 };
@@ -363,7 +363,7 @@ static int _parse_message(struct message
 			message_data->events.field = i;
 		}
 		if (message_data->timeout.str) {
-			uint32_t secs = atoi(message_data->timeout.str);
+			int32_t secs = atoi(message_data->timeout.str);
 			dm_free(message_data->timeout.str);
 			message_data->timeout.secs = secs ? secs :
 			    DM_EVENT_DEFAULT_TIMEOUT;
@@ -1164,13 +1164,26 @@ static int _get_next_registered_device(s
 static int _set_timeout(struct message_data *message_data)
 {
 	struct thread_status *thread;
+	int ret = 0;
 
 	_lock_mutex();
-	if ((thread = _lookup_thread_status(message_data)))
-		thread->timeout = message_data->timeout.secs;
+	if ((thread = _lookup_thread_status(message_data))) {
+		/* A timeout of DM_EVENT_REMOVE_TIMEOUT is used to
+		 * unregister the timeout, register a timeout for any
+		 * other value.  */
+		thread->events &= ~DM_EVENT_TIMEOUT;
+		_unregister_for_timeout(thread);
+		if (message_data->timeout.secs != DM_EVENT_REMOVE_TIMEOUT) {
+			thread->events |= DM_EVENT_TIMEOUT;
+			thread->timeout = message_data->timeout.secs;
+			ret = _register_for_timeout(thread);
+		} else {
+			thread->timeout = 0;
+		}
+	}
 	_unlock_mutex();
 
-	return thread ? 0 : -ENODEV;
+	return (thread && ret == 0) ? 0 : -ENODEV;
 }
 
 static int _get_timeout(struct message_data *message_data)
diff -r c11c9f0a04f8 -r 0b6ad3b52702 daemons/dmeventd/dmeventd.h
--- a/daemons/dmeventd/dmeventd.h	Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/dmeventd.h	Tue Dec 22 15:53:11 2009 -0800
@@ -24,6 +24,7 @@
 #define DM_EVENT_PIDFILE	"/var/run/dmeventd.pid"
 
 #define DM_EVENT_DEFAULT_TIMEOUT 10
+#define DM_EVENT_REMOVE_TIMEOUT -1	/* used to unschedule a timeout */
 
 /* Commands for the daemon passed in the message below. */
 enum dm_event_command {
diff -r c11c9f0a04f8 -r 0b6ad3b52702 daemons/dmeventd/libdevmapper-event.c
--- a/daemons/dmeventd/libdevmapper-event.c	Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/libdevmapper-event.c	Tue Dec 22 15:53:11 2009 -0800
@@ -40,7 +40,7 @@ struct dm_event_handler {
 	char *uuid;
 	int major;
 	int minor;
-	uint32_t timeout;
+	int32_t timeout;
 
 	enum dm_event_mask mask;
 };
@@ -331,11 +331,11 @@ static int _daemon_write(struct dm_event
 static int _daemon_talk(struct dm_event_fifos *fifos,
 			struct dm_event_daemon_message *msg, int cmd,
 			const char *dso_name, const char *dev_name,
-			enum dm_event_mask evmask, uint32_t timeout)
+			enum dm_event_mask evmask, int32_t timeout)
 {
 	const char *dso = dso_name ? dso_name : "";
 	const char *dev = dev_name ? dev_name : "";
-	const char *fmt = "%d:%d %s %s %u %" PRIu32;
+	const char *fmt = "%d:%d %s %s %u %" PRId32;
 	int msg_size;
 	memset(msg, 0, sizeof(*msg));
 
@@ -550,7 +550,7 @@ failed:
 /* Handle the event (de)registration call and return negative error codes. */
 static int _do_event(int cmd, struct dm_event_daemon_message *msg,
 		     const char *dso_name, const char *dev_name,
-		     enum dm_event_mask evmask, uint32_t timeout)
+		     enum dm_event_mask evmask, int32_t timeout)
 {
 	int ret;
 	struct dm_event_fifos fifos;
@@ -770,27 +770,89 @@ int dm_event_get_registered_device(struc
 	return ret;
 }
 
-#if 0				/* left out for now */
+/* Get uuid of a device */
+static struct dm_task *_get_device_info_by_name(const char *dev_name)
+{
+	struct dm_task *dmt;
+	struct dm_info info;
+
+	if (!(dmt = dm_task_create(DM_DEVICE_INFO))) {
+		log_error("_get_device_info: dm_task creation for info failed");
+		return NULL;
+	}
+
+	dm_task_set_name(dmt, dev_name);
+
+	/* FIXME Add name or uuid or devno to messages */
+	if (!dm_task_run(dmt)) {
+		log_error("_get_device_info: dm_task_run() failed");
+		goto failed;
+	}
+
+	if (!dm_task_get_info(dmt, &info)) {
+		log_error("_get_device_info: failed to get info for device");
+		goto failed;
+	}
+
+	if (!info.exists) {
+		log_error("_get_device_info: device not found");
+		goto failed;
+	}
+
+	return dmt;
+
+failed:
+	dm_task_destroy(dmt);
+	return NULL;
+}
+
+int dm_event_unset_timeout(const char *dev_name)
+{
+	/* timeout value of DM_EVENT_REMOVE_TIMEOUT is used to remove
+	 * the timer. */
+	return dm_event_set_timeout(dev_name, DM_EVENT_REMOVE_TIMEOUT);
+}
+
+int dm_event_set_timeout(const char *dev_name, int32_t timeout)
+{
+	int ret = 1, err;
+	const char *uuid;
+	struct dm_task *dmt;
+	struct dm_event_daemon_message msg = { 0, 0, NULL };
+
+	if (!(dmt = _get_device_info_by_name(dev_name))) {
+		stack;
+		return 0;
+	}
+
+	uuid = dm_task_get_uuid(dmt);
+
+	if ((err = _do_event(DM_EVENT_CMD_SET_TIMEOUT, &msg,
+			     NULL, uuid, 0, timeout)) < 0) {
+		log_error("%s: event set timeout failed: %s",
+			  dm_task_get_name(dmt),
+			  msg.data ? msg.data : strerror(-err));
+		ret = 0;
+	}
+
+	if (msg.data)
+		dm_free(msg.data);
+
+	dm_task_destroy(dmt);
+
+	return ret;
+}
+
+#if 0
 
 static char *_skip_string(char *src, const int delimiter)
 {
-	src = srtchr(src, delimiter);
+	src = strchr(src, delimiter);
 	if (src && *(src + 1))
 		return src + 1;
 	return NULL;
 }
 
-int dm_event_set_timeout(const char *device_path, uint32_t timeout)
-{
-	struct dm_event_daemon_message msg = { 0, 0, NULL };
-
-	if (!device_exists(device_path))
-		return -ENODEV;
-
-	return _do_event(DM_EVENT_CMD_SET_TIMEOUT, &msg,
-			 NULL, device_path, 0, timeout);
-}
-
 int dm_event_get_timeout(const char *device_path, uint32_t *timeout)
 {
 	int ret;
diff -r c11c9f0a04f8 -r 0b6ad3b52702 daemons/dmeventd/libdevmapper-event.h
--- a/daemons/dmeventd/libdevmapper-event.h	Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/libdevmapper-event.h	Tue Dec 22 15:53:11 2009 -0800
@@ -102,5 +102,7 @@ void process_event(struct dm_task *dmt, 
 int register_device(const char *device_name, const char *uuid, int major, int minor, void **user);
 int unregister_device(const char *device_name, const char *uuid, int major,
 		      int minor, void **user);
+int dm_event_set_timeout(const char *dev_name, int32_t timeout);
+int dm_event_unset_timeout(const char *dev_name);
 
 #endif



^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2009-12-23  1:58 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-12-13  9:18 [PATCH 0 of 4] Re-integrate a failed secondary mirror leg Malahal Naineni
2009-12-13  9:18 ` [PATCH 1 of 4] Add more error codes in mirror DSO Malahal Naineni
2009-12-18 16:28   ` Jonathan Brassow
2009-12-18 17:01     ` malahal
2009-12-22  2:07     ` malahal
2009-12-13  9:18 ` [PATCH 2 of 4] Handle transient secondary mirror leg failures Malahal Naineni
2009-12-18 17:10   ` Jonathan Brassow
2009-12-18 18:25     ` Takahiro Yasui
2009-12-18 18:49       ` malahal
2009-12-18 20:21         ` Takahiro Yasui
2009-12-18 20:54           ` malahal
2009-12-18 18:35     ` malahal
2009-12-13  9:18 ` [PATCH 3 of 4] Add dm_event_set_timeout/dm_event_unset_timeout interface Malahal Naineni
2009-12-22  2:12   ` malahal
2009-12-22 10:51     ` Alasdair G Kergon
2009-12-23  1:58       ` malahal
2009-12-13  9:18 ` [PATCH 4 of 4] Attempt to resync a failed secondary leg few times before giving up Malahal Naineni

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.