* [PATCH 1 of 4] Add more error codes in mirror DSO
2009-12-13 9:18 [PATCH 0 of 4] Re-integrate a failed secondary mirror leg Malahal Naineni
@ 2009-12-13 9:18 ` Malahal Naineni
2009-12-18 16:28 ` Jonathan Brassow
2009-12-13 9:18 ` [PATCH 2 of 4] Handle transient secondary mirror leg failures Malahal Naineni
` (2 subsequent siblings)
3 siblings, 1 reply; 17+ messages in thread
From: Malahal Naineni @ 2009-12-13 9:18 UTC (permalink / raw)
To: lvm-devel
The mirror DSO (daemons/dmeventd/plugins/mirror/dmeventd_mirror.c) logs
various device failure codes but only handles one failure, 'ME_FAILURE'.
This patchs adds more error codes to handle multiple types of device
failures.
Signed-off-by: Malahal Naineni <malahal@us.ibm.com>
diff -r a48a5f0eea85 -r a74600c6163e daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
--- a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c Sun Dec 06 23:04:13 2009 -0800
+++ b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c Sun Dec 13 01:16:51 2009 -0800
@@ -30,9 +30,17 @@
/* FIXME Replace most syslogs with log_error() style messages and add complete context. */
/* FIXME Reformat to 80 char lines. */
-#define ME_IGNORE 0
-#define ME_INSYNC 1
-#define ME_FAILURE 2
+/*
+ * An event may contain more than one error type. The following are bit
+ * flags that indicate each error type.
+ */
+#define ME_IGNORE 0x01U
+#define ME_INSYNC 0x02U
+#define ME_READ_FAILURE 0x04U
+#define ME_SYNC_FAILURE 0x08U
+#define ME_LOG_FAILURE 0x10U
+#define ME_SECONDARY_WRITE_FAILURE 0x20U
+#define ME_PRIMARY_WRITE_FAILURE 0x40U
/*
* register_device() is called first and performs initialisation.
@@ -53,45 +61,68 @@ static void *_lvm_handle = NULL;
*/
static pthread_mutex_t _event_mutex = PTHREAD_MUTEX_INITIALIZER;
-static int _process_status_code(const char status_code, const char *dev_name,
- const char *dev_type, int r)
+static int _process_status_code(const char *mirror, const char *device,
+ const char status_code, int dev_index)
{
- /*
- * A => Alive - No failures
- * D => Dead - A write failure occurred leaving mirror out-of-sync
- * F => Flush failed.
- * S => Sync - A sychronization failure occurred, mirror out-of-sync
- * R => Read - A read failure occurred, mirror data unaffected
- * U => Unclassified failure (bug)
- */
- if (status_code == 'F') {
- syslog(LOG_ERR, "%s device %s flush failed.\n",
- dev_type, dev_name);
- r = ME_FAILURE;
- } else if (status_code == 'S')
- syslog(LOG_ERR, "%s device %s sync failed.\n",
- dev_type, dev_name);
- else if (status_code == 'R')
- syslog(LOG_ERR, "%s device %s read failed.\n",
- dev_type, dev_name);
- else if (status_code != 'A') {
- syslog(LOG_ERR, "%s device %s has failed (%c).\n",
- dev_type, dev_name, status_code);
- r = ME_FAILURE;
+ int r;
+ const char *error_string;
+ const char *dev_type = dev_index ? "Secondary" : "Primary";
+
+ switch (status_code) {
+ default:
+ case 'U': /* Unclassified failure (bug) */
+ case 'D': /* Write failure occurred leaving mirror out-of-sync */
+ case 'F': /* Flush failure, handled as write failure */
+ if (status_code == 'D')
+ error_string = "write";
+ else if (status_code == 'F')
+ error_string = "flush";
+ else if (status_code == 'U')
+ error_string = "unclassified";
+ else {
+ syslog(LOG_ERR, "Unknown device status code (%c).\n",
+ status_code);
+ error_string = "unknown";
+ }
+
+ syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had a %s "
+ "failure.\n", mirror, dev_type, device,
+ error_string);
+
+ r = dev_index? ME_SECONDARY_WRITE_FAILURE :
+ ME_PRIMARY_WRITE_FAILURE;
+ break;
+
+ case 'S': /* Sychronization failure occurred, mirror out-of-sync */
+ syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had "
+ "sync failure.\n", mirror, dev_type, device);
+ r = ME_SYNC_FAILURE;
+ break;
+
+ case 'R': /* Read failure occurred, mirror data unaffected */
+ syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had a "
+ "read failure.\n", mirror, dev_type, device);
+ r = ME_READ_FAILURE;
+ break;
+
+ case 'A': /* Active, a good status */
+ r = 0;
+ break;
}
return r;
}
-static int _get_mirror_event(char *params)
+static int _get_mirror_event(const char *mirror, char *params)
{
- int i, r = ME_INSYNC;
+ int i;
char **args = NULL;
char *dev_status_str;
char *log_status_str;
char *sync_str;
char *p = NULL;
int log_argc, num_devs;
+ int retval = 0;
/*
* dm core parms: 0 409600 mirror
@@ -122,23 +153,25 @@ static int _get_mirror_event(char *param
/* Check for bad mirror devices */
for (i = 0; i < num_devs; i++)
- r = _process_status_code(dev_status_str[i], args[i],
- i ? "Secondary mirror" : "Primary mirror", r);
+ retval |= _process_status_code(mirror, args[i],
+ dev_status_str[i], i);
/* Check for bad disk log device */
- if (log_argc > 1)
- r = _process_status_code(log_status_str[0],
- args[2 + num_devs + log_argc],
- "Log", r);
+ if (log_argc > 1 && log_status_str[0] == 'D') {
+ syslog(LOG_ERR, "Mirror device: %s, log device: %s failed.\n",
+ mirror, args[2 + num_devs + log_argc]);
+ retval = retval | ME_LOG_FAILURE;
+ }
- if (r == ME_FAILURE)
+ if (retval) /* A failure occurred */
goto out;
+ retval = ME_INSYNC; /* assume INSYNC event */
p = strstr(sync_str, "/");
if (p) {
p[0] = '\0';
if (strcmp(sync_str, p+1))
- r = ME_IGNORE;
+ retval = ME_IGNORE;
p[0] = '/';
} else
goto out_parse;
@@ -146,7 +179,7 @@ static int _get_mirror_event(char *param
out:
if (args)
dm_free(args);
- return r;
+ return retval;
out_parse:
if (args)
@@ -212,6 +245,7 @@ void process_event(struct dm_task *dmt,
char *target_type = NULL;
char *params;
const char *device = dm_task_get_name(dmt);
+ int error;
if (pthread_mutex_trylock(&_event_mutex)) {
syslog(LOG_NOTICE, "Another thread is handling an event. Waiting...");
@@ -231,17 +265,11 @@ void process_event(struct dm_task *dmt,
continue;
}
- switch(_get_mirror_event(params)) {
- case ME_INSYNC:
- /* FIXME: all we really know is that this
- _part_ of the device is in sync
- Also, this is not an error
- */
- syslog(LOG_NOTICE, "%s is now in-sync\n", device);
- break;
- case ME_FAILURE:
- syslog(LOG_ERR, "Device failure in %s\n", device);
- if (_remove_failed_devices(device))
+ error = _get_mirror_event(device, params);
+ if (error & ME_LOG_FAILURE ||
+ error & ME_PRIMARY_WRITE_FAILURE ||
+ error & ME_SECONDARY_WRITE_FAILURE) {
+ if (_remove_failed_devices(device)) {
/* FIXME Why are all the error return codes unused? Get rid of them? */
syslog(LOG_ERR, "Failed to remove faulty devices in %s\n",
device);
@@ -250,13 +278,18 @@ void process_event(struct dm_task *dmt,
syslog(LOG_NOTICE, "%s is now a linear device.\n",
device);
*/
- break;
- case ME_IGNORE:
- break;
- default:
- /* FIXME Provide value then! */
- syslog(LOG_INFO, "Unknown event received.\n");
- }
+ }
+ } else if (error & ME_INSYNC) {
+ /* FIXME: all we really know is that this
+ _part_ of the device is in sync
+ Also, this is not an error
+ */
+ syslog(LOG_NOTICE, "%s is now in-sync\n", device);
+ } else if (error & ME_READ_FAILURE ||
+ error & ME_SYNC_FAILURE) {
+ /* Ignore these for now */
+ } else
+ syslog(LOG_INFO, "Unknown event:%u received.\n", error);
} while (next);
pthread_mutex_unlock(&_event_mutex);
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH 1 of 4] Add more error codes in mirror DSO
2009-12-13 9:18 ` [PATCH 1 of 4] Add more error codes in mirror DSO Malahal Naineni
@ 2009-12-18 16:28 ` Jonathan Brassow
2009-12-18 17:01 ` malahal
2009-12-22 2:07 ` malahal
0 siblings, 2 replies; 17+ messages in thread
From: Jonathan Brassow @ 2009-12-18 16:28 UTC (permalink / raw)
To: lvm-devel
Is there a reason you bumped ME_IGNORE from 0 to 1?
Did you forget about ME_IGNORE in 'process_event' (causing it to fall
through to the final else)?
brassow
On Dec 13, 2009, at 3:18 AM, Malahal Naineni wrote:
> The mirror DSO (daemons/dmeventd/plugins/mirror/dmeventd_mirror.c)
> logs
> various device failure codes but only handles one failure,
> 'ME_FAILURE'.
> This patchs adds more error codes to handle multiple types of device
> failures.
>
> Signed-off-by: Malahal Naineni <malahal@us.ibm.com>
>
> diff -r a48a5f0eea85 -r a74600c6163e daemons/dmeventd/plugins/mirror/
> dmeventd_mirror.c
> --- a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c Sun Dec 06
> 23:04:13 2009 -0800
> +++ b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c Sun Dec 13
> 01:16:51 2009 -0800
> @@ -30,9 +30,17 @@
> /* FIXME Replace most syslogs with log_error() style messages and
> add complete context. */
> /* FIXME Reformat to 80 char lines. */
>
> -#define ME_IGNORE 0
> -#define ME_INSYNC 1
> -#define ME_FAILURE 2
> +/*
> + * An event may contain more than one error type. The following are
> bit
> + * flags that indicate each error type.
> + */
> +#define ME_IGNORE 0x01U
> +#define ME_INSYNC 0x02U
> +#define ME_READ_FAILURE 0x04U
> +#define ME_SYNC_FAILURE 0x08U
> +#define ME_LOG_FAILURE 0x10U
> +#define ME_SECONDARY_WRITE_FAILURE 0x20U
> +#define ME_PRIMARY_WRITE_FAILURE 0x40U
>
> /*
> * register_device() is called first and performs initialisation.
> @@ -53,45 +61,68 @@ static void *_lvm_handle = NULL;
> */
> static pthread_mutex_t _event_mutex = PTHREAD_MUTEX_INITIALIZER;
>
> -static int _process_status_code(const char status_code, const char
> *dev_name,
> - const char *dev_type, int r)
> +static int _process_status_code(const char *mirror, const char
> *device,
> + const char status_code, int dev_index)
> {
> - /*
> - * A => Alive - No failures
> - * D => Dead - A write failure occurred leaving mirror out-of-
> sync
> - * F => Flush failed.
> - * S => Sync - A sychronization failure occurred, mirror out-of-
> sync
> - * R => Read - A read failure occurred, mirror data unaffected
> - * U => Unclassified failure (bug)
> - */
> - if (status_code == 'F') {
> - syslog(LOG_ERR, "%s device %s flush failed.\n",
> - dev_type, dev_name);
> - r = ME_FAILURE;
> - } else if (status_code == 'S')
> - syslog(LOG_ERR, "%s device %s sync failed.\n",
> - dev_type, dev_name);
> - else if (status_code == 'R')
> - syslog(LOG_ERR, "%s device %s read failed.\n",
> - dev_type, dev_name);
> - else if (status_code != 'A') {
> - syslog(LOG_ERR, "%s device %s has failed (%c).\n",
> - dev_type, dev_name, status_code);
> - r = ME_FAILURE;
> + int r;
> + const char *error_string;
> + const char *dev_type = dev_index ? "Secondary" : "Primary";
> +
> + switch (status_code) {
> + default:
> + case 'U': /* Unclassified failure (bug) */
> + case 'D': /* Write failure occurred leaving mirror out-of-sync */
> + case 'F': /* Flush failure, handled as write failure */
> + if (status_code == 'D')
> + error_string = "write";
> + else if (status_code == 'F')
> + error_string = "flush";
> + else if (status_code == 'U')
> + error_string = "unclassified";
> + else {
> + syslog(LOG_ERR, "Unknown device status code (%c).\n",
> + status_code);
> + error_string = "unknown";
> + }
> +
> + syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had a %s "
> + "failure.\n", mirror, dev_type, device,
> + error_string);
> +
> + r = dev_index? ME_SECONDARY_WRITE_FAILURE :
> + ME_PRIMARY_WRITE_FAILURE;
> + break;
> +
> + case 'S': /* Sychronization failure occurred, mirror out-of-sync */
> + syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had "
> + "sync failure.\n", mirror, dev_type, device);
> + r = ME_SYNC_FAILURE;
> + break;
> +
> + case 'R': /* Read failure occurred, mirror data unaffected */
> + syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had a "
> + "read failure.\n", mirror, dev_type, device);
> + r = ME_READ_FAILURE;
> + break;
> +
> + case 'A': /* Active, a good status */
> + r = 0;
> + break;
> }
>
> return r;
> }
>
> -static int _get_mirror_event(char *params)
> +static int _get_mirror_event(const char *mirror, char *params)
> {
> - int i, r = ME_INSYNC;
> + int i;
> char **args = NULL;
> char *dev_status_str;
> char *log_status_str;
> char *sync_str;
> char *p = NULL;
> int log_argc, num_devs;
> + int retval = 0;
>
> /*
> * dm core parms: 0 409600 mirror
> @@ -122,23 +153,25 @@ static int _get_mirror_event(char *param
>
> /* Check for bad mirror devices */
> for (i = 0; i < num_devs; i++)
> - r = _process_status_code(dev_status_str[i], args[i],
> - i ? "Secondary mirror" : "Primary mirror", r);
> + retval |= _process_status_code(mirror, args[i],
> + dev_status_str[i], i);
>
> /* Check for bad disk log device */
> - if (log_argc > 1)
> - r = _process_status_code(log_status_str[0],
> - args[2 + num_devs + log_argc],
> - "Log", r);
> + if (log_argc > 1 && log_status_str[0] == 'D') {
> + syslog(LOG_ERR, "Mirror device: %s, log device: %s failed.\n",
> + mirror, args[2 + num_devs + log_argc]);
> + retval = retval | ME_LOG_FAILURE;
> + }
>
> - if (r == ME_FAILURE)
> + if (retval) /* A failure occurred */
> goto out;
>
> + retval = ME_INSYNC; /* assume INSYNC event */
> p = strstr(sync_str, "/");
> if (p) {
> p[0] = '\0';
> if (strcmp(sync_str, p+1))
> - r = ME_IGNORE;
> + retval = ME_IGNORE;
> p[0] = '/';
> } else
> goto out_parse;
> @@ -146,7 +179,7 @@ static int _get_mirror_event(char *param
> out:
> if (args)
> dm_free(args);
> - return r;
> + return retval;
>
> out_parse:
> if (args)
> @@ -212,6 +245,7 @@ void process_event(struct dm_task *dmt,
> char *target_type = NULL;
> char *params;
> const char *device = dm_task_get_name(dmt);
> + int error;
>
> if (pthread_mutex_trylock(&_event_mutex)) {
> syslog(LOG_NOTICE, "Another thread is handling an event.
> Waiting...");
> @@ -231,17 +265,11 @@ void process_event(struct dm_task *dmt,
> continue;
> }
>
> - switch(_get_mirror_event(params)) {
> - case ME_INSYNC:
> - /* FIXME: all we really know is that this
> - _part_ of the device is in sync
> - Also, this is not an error
> - */
> - syslog(LOG_NOTICE, "%s is now in-sync\n", device);
> - break;
> - case ME_FAILURE:
> - syslog(LOG_ERR, "Device failure in %s\n", device);
> - if (_remove_failed_devices(device))
> + error = _get_mirror_event(device, params);
> + if (error & ME_LOG_FAILURE ||
> + error & ME_PRIMARY_WRITE_FAILURE ||
> + error & ME_SECONDARY_WRITE_FAILURE) {
> + if (_remove_failed_devices(device)) {
> /* FIXME Why are all the error return codes unused? Get rid of
> them? */
> syslog(LOG_ERR, "Failed to remove faulty devices in %s\n",
> device);
> @@ -250,13 +278,18 @@ void process_event(struct dm_task *dmt,
> syslog(LOG_NOTICE, "%s is now a linear device.\n",
> device);
> */
> - break;
> - case ME_IGNORE:
> - break;
> - default:
> - /* FIXME Provide value then! */
> - syslog(LOG_INFO, "Unknown event received.\n");
> - }
> + }
> + } else if (error & ME_INSYNC) {
> + /* FIXME: all we really know is that this
> + _part_ of the device is in sync
> + Also, this is not an error
> + */
> + syslog(LOG_NOTICE, "%s is now in-sync\n", device);
> + } else if (error & ME_READ_FAILURE ||
> + error & ME_SYNC_FAILURE) {
> + /* Ignore these for now */
> + } else
> + syslog(LOG_INFO, "Unknown event:%u received.\n", error);
> } while (next);
>
> pthread_mutex_unlock(&_event_mutex);
>
> --
> lvm-devel mailing list
> lvm-devel at redhat.com
> https://www.redhat.com/mailman/listinfo/lvm-devel
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH 1 of 4] Add more error codes in mirror DSO
2009-12-18 16:28 ` Jonathan Brassow
@ 2009-12-18 17:01 ` malahal
2009-12-22 2:07 ` malahal
1 sibling, 0 replies; 17+ messages in thread
From: malahal @ 2009-12-18 17:01 UTC (permalink / raw)
To: lvm-devel
Jonathan Brassow [jbrassow at redhat.com] wrote:
> Is there a reason you bumped ME_IGNORE from 0 to 1?
It is actually a bit-field value rather than an absolute value, so I
couldn't have anything with zero.
> Did you forget about ME_IGNORE in 'process_event' (causing it to fall
> through to the final else)?
Yes! Thank you. Unknown errors are handled as 'fatal' errors in the
existing code, so process_event will never get an unknown error code (it
has to be ME_IGNORE). I will fix it.
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 1 of 4] Add more error codes in mirror DSO
2009-12-18 16:28 ` Jonathan Brassow
2009-12-18 17:01 ` malahal
@ 2009-12-22 2:07 ` malahal
1 sibling, 0 replies; 17+ messages in thread
From: malahal @ 2009-12-22 2:07 UTC (permalink / raw)
To: lvm-devel
Jonathan Brassow [jbrassow at redhat.com] wrote:
> Is there a reason you bumped ME_IGNORE from 0 to 1?
>
> Did you forget about ME_IGNORE in 'process_event' (causing it to fall
> through to the final else)?
>
> brassow
The following fixes the ME_IGNORE in 'process_event' [complete patch]
======================================================================
Add more error codes in mirror DSO.
The mirror DSO (daemons/dmeventd/plugins/mirror/dmeventd_mirror.c) logs
various device failure codes but only handles one failure, 'ME_FAILURE'.
This patchs adds more error codes to handle multiple types of device
failures.
Signed-off-by: Malahal Naineni <malahal@us.ibm.com>
diff -r a48a5f0eea85 -r db70a48aef1b daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
--- a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c Sun Dec 06 23:04:13 2009 -0800
+++ b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c Mon Dec 21 17:38:25 2009 -0800
@@ -30,9 +30,17 @@
/* FIXME Replace most syslogs with log_error() style messages and add complete context. */
/* FIXME Reformat to 80 char lines. */
-#define ME_IGNORE 0
-#define ME_INSYNC 1
-#define ME_FAILURE 2
+/*
+ * An event may contain more than one error type. The following are bit
+ * flags that indicate each error type.
+ */
+#define ME_IGNORE 0x01U
+#define ME_INSYNC 0x02U
+#define ME_READ_FAILURE 0x04U
+#define ME_SYNC_FAILURE 0x08U
+#define ME_LOG_FAILURE 0x10U
+#define ME_SECONDARY_WRITE_FAILURE 0x20U
+#define ME_PRIMARY_WRITE_FAILURE 0x40U
/*
* register_device() is called first and performs initialisation.
@@ -53,45 +61,68 @@ static void *_lvm_handle = NULL;
*/
static pthread_mutex_t _event_mutex = PTHREAD_MUTEX_INITIALIZER;
-static int _process_status_code(const char status_code, const char *dev_name,
- const char *dev_type, int r)
+static int _process_status_code(const char *mirror, const char *device,
+ const char status_code, int dev_index)
{
- /*
- * A => Alive - No failures
- * D => Dead - A write failure occurred leaving mirror out-of-sync
- * F => Flush failed.
- * S => Sync - A sychronization failure occurred, mirror out-of-sync
- * R => Read - A read failure occurred, mirror data unaffected
- * U => Unclassified failure (bug)
- */
- if (status_code == 'F') {
- syslog(LOG_ERR, "%s device %s flush failed.\n",
- dev_type, dev_name);
- r = ME_FAILURE;
- } else if (status_code == 'S')
- syslog(LOG_ERR, "%s device %s sync failed.\n",
- dev_type, dev_name);
- else if (status_code == 'R')
- syslog(LOG_ERR, "%s device %s read failed.\n",
- dev_type, dev_name);
- else if (status_code != 'A') {
- syslog(LOG_ERR, "%s device %s has failed (%c).\n",
- dev_type, dev_name, status_code);
- r = ME_FAILURE;
+ int r;
+ const char *error_string;
+ const char *dev_type = dev_index ? "Secondary" : "Primary";
+
+ switch (status_code) {
+ default:
+ case 'U': /* Unclassified failure (bug) */
+ case 'D': /* Write failure occurred leaving mirror out-of-sync */
+ case 'F': /* Flush failure, handled as write failure */
+ if (status_code == 'D')
+ error_string = "write";
+ else if (status_code == 'F')
+ error_string = "flush";
+ else if (status_code == 'U')
+ error_string = "unclassified";
+ else {
+ syslog(LOG_ERR, "Unknown device status code (%c).\n",
+ status_code);
+ error_string = "unknown";
+ }
+
+ syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had a %s "
+ "failure.\n", mirror, dev_type, device,
+ error_string);
+
+ r = dev_index? ME_SECONDARY_WRITE_FAILURE :
+ ME_PRIMARY_WRITE_FAILURE;
+ break;
+
+ case 'S': /* Sychronization failure occurred, mirror out-of-sync */
+ syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had "
+ "sync failure.\n", mirror, dev_type, device);
+ r = ME_SYNC_FAILURE;
+ break;
+
+ case 'R': /* Read failure occurred, mirror data unaffected */
+ syslog(LOG_ERR, "Mirror device: %s, %s leg: %s had a "
+ "read failure.\n", mirror, dev_type, device);
+ r = ME_READ_FAILURE;
+ break;
+
+ case 'A': /* Active, a good status */
+ r = 0;
+ break;
}
return r;
}
-static int _get_mirror_event(char *params)
+static int _get_mirror_event(const char *mirror, char *params)
{
- int i, r = ME_INSYNC;
+ int i;
char **args = NULL;
char *dev_status_str;
char *log_status_str;
char *sync_str;
char *p = NULL;
int log_argc, num_devs;
+ int retval = 0;
/*
* dm core parms: 0 409600 mirror
@@ -122,23 +153,25 @@ static int _get_mirror_event(char *param
/* Check for bad mirror devices */
for (i = 0; i < num_devs; i++)
- r = _process_status_code(dev_status_str[i], args[i],
- i ? "Secondary mirror" : "Primary mirror", r);
+ retval |= _process_status_code(mirror, args[i],
+ dev_status_str[i], i);
/* Check for bad disk log device */
- if (log_argc > 1)
- r = _process_status_code(log_status_str[0],
- args[2 + num_devs + log_argc],
- "Log", r);
+ if (log_argc > 1 && log_status_str[0] == 'D') {
+ syslog(LOG_ERR, "Mirror device: %s, log device: %s failed.\n",
+ mirror, args[2 + num_devs + log_argc]);
+ retval = retval | ME_LOG_FAILURE;
+ }
- if (r == ME_FAILURE)
+ if (retval) /* A failure occurred */
goto out;
+ retval = ME_INSYNC; /* assume INSYNC event */
p = strstr(sync_str, "/");
if (p) {
p[0] = '\0';
if (strcmp(sync_str, p+1))
- r = ME_IGNORE;
+ retval = ME_IGNORE;
p[0] = '/';
} else
goto out_parse;
@@ -146,7 +179,7 @@ static int _get_mirror_event(char *param
out:
if (args)
dm_free(args);
- return r;
+ return retval;
out_parse:
if (args)
@@ -212,6 +245,7 @@ void process_event(struct dm_task *dmt,
char *target_type = NULL;
char *params;
const char *device = dm_task_get_name(dmt);
+ int error;
if (pthread_mutex_trylock(&_event_mutex)) {
syslog(LOG_NOTICE, "Another thread is handling an event. Waiting...");
@@ -231,17 +265,11 @@ void process_event(struct dm_task *dmt,
continue;
}
- switch(_get_mirror_event(params)) {
- case ME_INSYNC:
- /* FIXME: all we really know is that this
- _part_ of the device is in sync
- Also, this is not an error
- */
- syslog(LOG_NOTICE, "%s is now in-sync\n", device);
- break;
- case ME_FAILURE:
- syslog(LOG_ERR, "Device failure in %s\n", device);
- if (_remove_failed_devices(device))
+ error = _get_mirror_event(device, params);
+ if (error & ME_LOG_FAILURE ||
+ error & ME_PRIMARY_WRITE_FAILURE ||
+ error & ME_SECONDARY_WRITE_FAILURE) {
+ if (_remove_failed_devices(device)) {
/* FIXME Why are all the error return codes unused? Get rid of them? */
syslog(LOG_ERR, "Failed to remove faulty devices in %s\n",
device);
@@ -250,13 +278,19 @@ void process_event(struct dm_task *dmt,
syslog(LOG_NOTICE, "%s is now a linear device.\n",
device);
*/
- break;
- case ME_IGNORE:
- break;
- default:
- /* FIXME Provide value then! */
- syslog(LOG_INFO, "Unknown event received.\n");
- }
+ }
+ } else if (error & ME_INSYNC) {
+ /* FIXME: all we really know is that this
+ _part_ of the device is in sync
+ Also, this is not an error
+ */
+ syslog(LOG_NOTICE, "%s is now in-sync\n", device);
+ } else if (error & ME_READ_FAILURE ||
+ error & ME_SYNC_FAILURE ||
+ error & ME_IGNORE) {
+ /* Ignore these for now */
+ } else
+ syslog(LOG_INFO, "Unknown event:%u received.\n", error);
} while (next);
pthread_mutex_unlock(&_event_mutex);
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 2 of 4] Handle transient secondary mirror leg failures
2009-12-13 9:18 [PATCH 0 of 4] Re-integrate a failed secondary mirror leg Malahal Naineni
2009-12-13 9:18 ` [PATCH 1 of 4] Add more error codes in mirror DSO Malahal Naineni
@ 2009-12-13 9:18 ` Malahal Naineni
2009-12-18 17:10 ` Jonathan Brassow
2009-12-13 9:18 ` [PATCH 3 of 4] Add dm_event_set_timeout/dm_event_unset_timeout interface Malahal Naineni
2009-12-13 9:18 ` [PATCH 4 of 4] Attempt to resync a failed secondary leg few times before giving up Malahal Naineni
3 siblings, 1 reply; 17+ messages in thread
From: Malahal Naineni @ 2009-12-13 9:18 UTC (permalink / raw)
To: lvm-devel
A new mirror_device_fault_policy, "retry", is added to handle transient
device failures. When this policy is selected, the mirror DSO will try
to resync the mirror upon a secondary leg failure. This will be tried
and tried until the mirror goes to in_sync state. Later patches make this a
configurable number spaced at some configurable timeout.
The patch uses dmsetup suspend and resume commands to attempt a resync.
It uses "lvm dumpconfig" to find out the mirror_device_fault_policy.
Signed-off-by: Malahal Naineni (malahal at us.ibm.com)
diff -r a74600c6163e -r 1e369d480df0 daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
--- a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c Sun Dec 13 01:16:51 2009 -0800
+++ b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c Sun Dec 13 01:17:52 2009 -0800
@@ -14,6 +14,7 @@
#include "lvm2cmd.h"
#include "errors.h"
+#include "defaults.h"
#include <libdevmapper.h>
#include <libdevmapper-event.h>
@@ -24,6 +25,7 @@
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>
+#include <ctype.h>
#include <syslog.h> /* FIXME Replace syslog with multilog */
/* FIXME Missing openlog? */
@@ -56,6 +58,109 @@ static int _register_count = 0;
static struct dm_pool *_mem_pool = NULL;
static void *_lvm_handle = NULL;
+enum fault_policy {
+ FAULT_POLICY_INVALID,
+ FAULT_POLICY_REMOVE,
+ FAULT_POLICY_ALLOCATE,
+ FAULT_POLICY_ALLOCATE_ANYWHERE,
+ FAULT_POLICY_RETRY,
+};
+
+struct mirror_device_info {
+ enum fault_policy fault_policy;
+};
+
+#define CMD_SIZE 256 /* FIXME Use system restriction */
+#define LINE_SIZE 1024 /* FIXME Use system restriction */
+static int fill_config_str(char buf[], int bufsize, const char *path)
+{
+ char cmd_str[CMD_SIZE];
+ char data[LINE_SIZE], *ptr;
+ int ret = 0;
+ FILE *fp;
+
+ snprintf(cmd_str, sizeof(cmd_str), "lvm dumpconfig %s", path);
+ if ((fp = popen(cmd_str, "r")) == NULL) {
+ syslog(LOG_ERR, "fopen() failed.\n");
+ goto out;
+ }
+
+ if ((ptr = fgets(data, sizeof(data), fp)) == NULL) {
+ syslog(LOG_ERR, "fgets() failed.\n");
+ pclose(fp);
+ goto out;
+ }
+
+ /* Remove white space or quotes at the end */
+ for (ptr = &data[strlen(data)-1]; ptr >= data; --ptr)
+ if (isspace(*ptr) || *ptr == '\"' || *ptr == '\'')
+ *ptr = '\0';
+ else
+ break;
+ ptr = strchr(data, '=');
+ if (ptr) {
+ ptr++; /* After the '=' sign */
+ /* skip quotes or white space */
+ if (isspace(*ptr) || *ptr == '\"' || *ptr == '\'')
+ ptr++;
+ strncpy(buf, ptr, bufsize);
+ buf[bufsize-1] = '\0';
+ ret = 1; /* buf is valid */
+ }
+ if (pclose(fp) != 0)
+ syslog(LOG_ERR, "pclose() failed.\n");
+
+out:
+ return ret;
+}
+
+static enum fault_policy fault_policy_str2enum(const char *str)
+{
+ enum fault_policy ret = FAULT_POLICY_INVALID;
+
+ /* At most the loop is ran twice */
+ while (ret == FAULT_POLICY_INVALID) {
+ if (!strcmp(str, "remove"))
+ ret = FAULT_POLICY_REMOVE;
+ else if (!strcmp(str, "allocate"))
+ ret = FAULT_POLICY_ALLOCATE;
+ else if (!strcmp(str, "allocate_anywhere"))
+ ret = FAULT_POLICY_ALLOCATE_ANYWHERE;
+ else if (!strcmp(str, "retry"))
+ ret = FAULT_POLICY_RETRY;
+ else {
+ syslog(LOG_ERR, "Bad activation/"
+ "mirror_device_fault_policy: %s\n",
+ str);
+ str = DEFAULT_MIRROR_DEV_FAULT_POLICY;
+ }
+ }
+
+ return ret;
+}
+
+
+static enum fault_policy get_mirror_fault_policy()
+{
+ enum fault_policy ret;
+ char policy[LINE_SIZE];
+ const char *ptr;
+
+ ret = fill_config_str(policy, sizeof(policy),
+ "activation/mirror_device_fault_policy");
+ if (ret) {
+ if (!strcmp(policy, ""))
+ ptr = DEFAULT_MIRROR_DEV_FAULT_POLICY;
+ else
+ ptr = policy;
+ } else {
+ ptr = DEFAULT_MIRROR_DEV_FAULT_POLICY;
+ }
+ ret = fault_policy_str2enum(ptr);
+
+ return ret;
+}
+
/*
* Currently only one event can be processed at a time.
*/
@@ -200,10 +305,52 @@ static void _temporary_log_fn(int level,
syslog(LOG_DEBUG, "%s", format);
}
+
+static int retry_failed_devices(const char *device)
+{
+ int r;
+ char cmd_str[CMD_SIZE];
+ char *vg = NULL, *lv = NULL, *layer = NULL;
+
+ if (strlen(device) > 200) /* FIXME Use real restriction */
+ /* FIXME These return code distinctions are not used so
+ * remove them! */
+ return -ENAMETOOLONG;
+
+ if (!dm_split_lvm_name(_mem_pool, device, &vg, &lv, &layer)) {
+ syslog(LOG_ERR, "Unable to determine VG name from %s",
+ device);
+ /* FIXME Replace with generic error return - reason for
+ * failure has already got logged */
+ return -ENOMEM;
+ }
+
+ /* FIXME: should be running an LVM command that is pinned.
+ * dmsetup command may not be pinned in memory all the time.
+ * "lvchange --refresh vg/lv" only works if there are no device
+ * failures while it is running. Otherwise, the failed device
+ * is replaced with "error" target which is not what we want.
+ */
+ snprintf(cmd_str, CMD_SIZE, "dmsetup suspend --noflush %s-%s", vg, lv);
+ syslog(LOG_NOTICE, "Running command: %s", cmd_str);
+ r = system(cmd_str);
+
+ snprintf(cmd_str, CMD_SIZE, "dmsetup table %s-%s | dmsetup load "
+ "%s-%s", vg, lv, vg, lv);
+ syslog(LOG_NOTICE, "Running command: %s", cmd_str);
+ r |= system(cmd_str);
+
+ snprintf(cmd_str, CMD_SIZE, "dmsetup resume %s-%s", vg, lv);
+ syslog(LOG_NOTICE, "Running command: %s", cmd_str);
+ r |= system(cmd_str);
+
+ dm_pool_empty(_mem_pool); /* FIXME: not safe with multiple threads */
+ return r;
+}
+
static int _remove_failed_devices(const char *device)
{
int r;
-#define CMD_SIZE 256 /* FIXME Use system restriction */
char cmd_str[CMD_SIZE];
char *vg = NULL, *lv = NULL, *layer = NULL;
@@ -238,7 +385,7 @@ static int _remove_failed_devices(const
void process_event(struct dm_task *dmt,
enum dm_event_mask event __attribute((unused)),
- void **unused __attribute((unused)))
+ void **private)
{
void *next = NULL;
uint64_t start, length;
@@ -246,6 +393,7 @@ void process_event(struct dm_task *dmt,
char *params;
const char *device = dm_task_get_name(dmt);
int error;
+ struct mirror_device_info *mirror_info = *private;
if (pthread_mutex_trylock(&_event_mutex)) {
syslog(LOG_NOTICE, "Another thread is handling an event. Waiting...");
@@ -268,8 +416,17 @@ void process_event(struct dm_task *dmt,
error = _get_mirror_event(device, params);
if (error & ME_LOG_FAILURE ||
error & ME_PRIMARY_WRITE_FAILURE ||
- error & ME_SECONDARY_WRITE_FAILURE) {
- if (_remove_failed_devices(device)) {
+ error & ME_SECONDARY_WRITE_FAILURE ||
+ error & ME_SYNC_FAILURE) {
+ if (mirror_info->fault_policy == FAULT_POLICY_RETRY &&
+ (error & ME_SECONDARY_WRITE_FAILURE ||
+ error & ME_SYNC_FAILURE)) {
+ syslog(LOG_ERR, "Retrying the failed mirror "
+ "device.\n");
+ if (retry_failed_devices(device))
+ syslog(LOG_ERR, "Failed to reload the "
+ "mirror: %s\n", device);
+ } else if (_remove_failed_devices(device)) {
/* FIXME Why are all the error return codes unused? Get rid of them? */
syslog(LOG_ERR, "Failed to remove faulty devices in %s\n",
device);
@@ -285,9 +442,8 @@ void process_event(struct dm_task *dmt,
Also, this is not an error
*/
syslog(LOG_NOTICE, "%s is now in-sync\n", device);
- } else if (error & ME_READ_FAILURE ||
- error & ME_SYNC_FAILURE) {
- /* Ignore these for now */
+ } else if (error & ME_READ_FAILURE) {
+ /* Ignore it for now */
} else
syslog(LOG_INFO, "Unknown event:%u received.\n", error);
} while (next);
@@ -299,9 +455,10 @@ int register_device(const char *device,
const char *uuid __attribute((unused)),
int major __attribute((unused)),
int minor __attribute((unused)),
- void **unused __attribute((unused)))
+ void **private)
{
int r = 0;
+ struct mirror_device_info *mirror_info;
pthread_mutex_lock(&_register_mutex);
@@ -312,9 +469,19 @@ int register_device(const char *device,
if (!_mem_pool && !(_mem_pool = dm_pool_create("mirror_dso", 1024)))
goto out;
+ mirror_info = dm_malloc(sizeof(struct mirror_device_info));
+ if (!mirror_info) {
+ dm_pool_destroy(_mem_pool);
+ _mem_pool = NULL;
+ goto out;
+ }
+ mirror_info->fault_policy = get_mirror_fault_policy();
+ *private = mirror_info;
+
if (!_lvm_handle) {
lvm2_log_fn(_temporary_log_fn);
if (!(_lvm_handle = lvm2_init())) {
+ dm_free(mirror_info);
dm_pool_destroy(_mem_pool);
_mem_pool = NULL;
goto out;
@@ -339,8 +506,11 @@ int unregister_device(const char *device
const char *uuid __attribute((unused)),
int major __attribute((unused)),
int minor __attribute((unused)),
- void **unused __attribute((unused)))
+ void **private)
{
+ struct mirror_device_info *mirror_info = *private;
+
+ dm_free(mirror_info);
pthread_mutex_lock(&_register_mutex);
syslog(LOG_INFO, "No longer monitoring mirror device %s for events\n",
diff -r a74600c6163e -r 1e369d480df0 doc/example.conf
--- a/doc/example.conf Sun Dec 13 01:16:51 2009 -0800
+++ b/doc/example.conf Sun Dec 13 01:17:52 2009 -0800
@@ -403,6 +403,9 @@ activation {
# since it would break the redundant nature of the mirror. This
# policy acts like "remove" if no suitable device and space can
# be allocated for the replacement.
+ #
+ # "retry" - Try to re-integrate the failed mirror leg assuming that the
+ # failure is transient. Not implemented yet, so don't use it.
mirror_log_fault_policy = "allocate"
mirror_device_fault_policy = "remove"
diff -r a74600c6163e -r 1e369d480df0 lib/metadata/mirror.c
--- a/lib/metadata/mirror.c Sun Dec 13 01:16:51 2009 -0800
+++ b/lib/metadata/mirror.c Sun Dec 13 01:17:52 2009 -0800
@@ -37,6 +37,7 @@
#define MIRROR_REMOVE 0
#define MIRROR_ALLOCATE 1
#define MIRROR_ALLOCATE_ANYWHERE 2
+#define MIRROR_RETRY 3
/*
* Returns true if the lv is temporary mirror layer for resync
@@ -787,6 +788,8 @@ static int get_mirror_fault_policy(struc
return MIRROR_ALLOCATE;
else if (!strcmp(policy, "allocate_anywhere"))
return MIRROR_ALLOCATE_ANYWHERE;
+ else if (!strcmp(policy, "retry"))
+ return MIRROR_RETRY;
if (log_policy)
log_error("Bad activation/mirror_log_fault_policy");
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH 2 of 4] Handle transient secondary mirror leg failures
2009-12-13 9:18 ` [PATCH 2 of 4] Handle transient secondary mirror leg failures Malahal Naineni
@ 2009-12-18 17:10 ` Jonathan Brassow
2009-12-18 18:25 ` Takahiro Yasui
2009-12-18 18:35 ` malahal
0 siblings, 2 replies; 17+ messages in thread
From: Jonathan Brassow @ 2009-12-18 17:10 UTC (permalink / raw)
To: lvm-devel
hmmm, I wonder how well this is going to work.
1) I've seen code ('dm_task_suppress_identical_reload') that
suppresses table reloads if the tables are identical - will that
obstruct what you are trying to do here?
2) If you don't get a new table loaded, it will behave as a suspend/
resume only. Recent code changes in dm-raid1.c are causing
'log_failure' and 'leg_failure' to not be reset in those cases. IOW,
all these steps could be for nothing. :(
3) How does this work in a cluster? dmsetup is not cluster-aware. If
you don't go through the LVM commands (I recognize the additional
pain), none of this will be coordinated.
brassow
On Dec 13, 2009, at 3:18 AM, Malahal Naineni wrote:
> + /* FIXME: should be running an LVM command that is pinned.
> + * dmsetup command may not be pinned in memory all the time.
> + * "lvchange --refresh vg/lv" only works if there are no device
> + * failures while it is running. Otherwise, the failed device
> + * is replaced with "error" target which is not what we want.
> + */
> + snprintf(cmd_str, CMD_SIZE, "dmsetup suspend --noflush %s-%s", vg,
> lv);
> + syslog(LOG_NOTICE, "Running command: %s", cmd_str);
> + r = system(cmd_str);
> +
> + snprintf(cmd_str, CMD_SIZE, "dmsetup table %s-%s | dmsetup load "
> + "%s-%s", vg, lv, vg, lv);
> + syslog(LOG_NOTICE, "Running command: %s", cmd_str);
> + r |= system(cmd_str);
> +
> + snprintf(cmd_str, CMD_SIZE, "dmsetup resume %s-%s", vg, lv);
> + syslog(LOG_NOTICE, "Running command: %s", cmd_str);
> + r |= system(cmd_str);
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH 2 of 4] Handle transient secondary mirror leg failures
2009-12-18 17:10 ` Jonathan Brassow
@ 2009-12-18 18:25 ` Takahiro Yasui
2009-12-18 18:49 ` malahal
2009-12-18 18:35 ` malahal
1 sibling, 1 reply; 17+ messages in thread
From: Takahiro Yasui @ 2009-12-18 18:25 UTC (permalink / raw)
To: lvm-devel
On 12/18/09 12:10, Jonathan Brassow wrote:
> 2) If you don't get a new table loaded, it will behave as a suspend/
> resume only. Recent code changes in dm-raid1.c are causing
> 'log_failure' and 'leg_failure' to not be reset in those cases. IOW,
> all these steps could be for nothing. :(
I would like to know how effective the retry is. As Jon explained
above, recent upstream kernel blocks all write I/Os on NOSYNC regions.
This means that those write I/Os are kept blocked for a long time.
For example, mirror retry interval in your patch #4 is 30 seconds and
application or filesystem will be waited for 30 seconds (330 seconds
if retry count is 10). Can your application wait for more than 5 minutes?
This behaviour will not been solved even if kernel is fixed so that
log_failure and leg_failure are reset. The write I/Os blocked will
be re-queued in the kernel when suspend/resume are done, but they
will be put in the hold queue again if the device failure is not
transient but permanent.
I would like to know the use case of this patch set.
Thanks,
Taka
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 2 of 4] Handle transient secondary mirror leg failures
2009-12-18 18:25 ` Takahiro Yasui
@ 2009-12-18 18:49 ` malahal
2009-12-18 20:21 ` Takahiro Yasui
0 siblings, 1 reply; 17+ messages in thread
From: malahal @ 2009-12-18 18:49 UTC (permalink / raw)
To: lvm-devel
Takahiro Yasui [tyasui at redhat.com] wrote:
> On 12/18/09 12:10, Jonathan Brassow wrote:
> > 2) If you don't get a new table loaded, it will behave as a suspend/
> > resume only. Recent code changes in dm-raid1.c are causing
> > 'log_failure' and 'leg_failure' to not be reset in those cases. IOW,
> > all these steps could be for nothing. :(
>
> I would like to know how effective the retry is. As Jon explained
> above, recent upstream kernel blocks all write I/Os on NOSYNC regions.
> This means that those write I/Os are kept blocked for a long time.
> For example, mirror retry interval in your patch #4 is 30 seconds and
> application or filesystem will be waited for 30 seconds (330 seconds
> if retry count is 10). Can your application wait for more than 5 minutes?
>
> This behaviour will not been solved even if kernel is fixed so that
> log_failure and leg_failure are reset. The write I/Os blocked will
> be re-queued in the kernel when suspend/resume are done, but they
> will be put in the hold queue again if the device failure is not
> transient but permanent.
>
> I would like to know the use case of this patch set.
I have tested with RHEL5.4 kernel that doesn't block on device failure.
IMHO, suspend/resume is doing two things if the kernel code blocks on
failure -- 1) letting the kernel module unblock 2) start resync. We
should separate those two actions. Maybe, we should do something here???
Have an ioctl to start resync or have a message to unblock....
Also, blocking the kernel module introduced a regression -- the
mirror will block forever on a transient failure!
Thanks, Malahal.
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 2 of 4] Handle transient secondary mirror leg failures
2009-12-18 18:49 ` malahal
@ 2009-12-18 20:21 ` Takahiro Yasui
2009-12-18 20:54 ` malahal
0 siblings, 1 reply; 17+ messages in thread
From: Takahiro Yasui @ 2009-12-18 20:21 UTC (permalink / raw)
To: lvm-devel
On 12/18/09 13:49, malahal at us.ibm.com wrote:
> Takahiro Yasui [tyasui at redhat.com] wrote:
>> On 12/18/09 12:10, Jonathan Brassow wrote:
>>> 2) If you don't get a new table loaded, it will behave as a suspend/
>>> resume only. Recent code changes in dm-raid1.c are causing
>>> 'log_failure' and 'leg_failure' to not be reset in those cases. IOW,
>>> all these steps could be for nothing. :(
>>
>> I would like to know how effective the retry is. As Jon explained
>> above, recent upstream kernel blocks all write I/Os on NOSYNC regions.
>> This means that those write I/Os are kept blocked for a long time.
>> For example, mirror retry interval in your patch #4 is 30 seconds and
>> application or filesystem will be waited for 30 seconds (330 seconds
>> if retry count is 10). Can your application wait for more than 5 minutes?
>>
>> This behaviour will not been solved even if kernel is fixed so that
>> log_failure and leg_failure are reset. The write I/Os blocked will
>> be re-queued in the kernel when suspend/resume are done, but they
>> will be put in the hold queue again if the device failure is not
>> transient but permanent.
>>
>> I would like to know the use case of this patch set.
>
> I have tested with RHEL5.4 kernel that doesn't block on device failure.
I see. Your retry approach looks good for the kernels on RHEL5.4 kernel
and 2.6.32 kernel since write I/Os aren't blocked even after the error
is detected on a mirror leg.
> IMHO, suspend/resume is doing two things if the kernel code blocks on
> failure -- 1) letting the kernel module unblock 2) start resync. We
> should separate those two actions. Maybe, we should do something here???
> Have an ioctl to start resync or have a message to unblock....
I'm sorry I don't get your point to separate unblock and resync.
Currently "unblock" is done by "suspend" and resync is done by "resume"
We need to reset log_failure/leg_failre, but anything else?
> Also, blocking the kernel module introduced a regression -- the
> mirror will block forever on a transient failure!
You are right. Refreshing block I/Os is necessary when dmeventd noticed
an error event and lvconvert command found nothing to be done for repair.
dmeventd should kick an action to refresh the mirror device.
Thanks,
Taka
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 2 of 4] Handle transient secondary mirror leg failures
2009-12-18 20:21 ` Takahiro Yasui
@ 2009-12-18 20:54 ` malahal
0 siblings, 0 replies; 17+ messages in thread
From: malahal @ 2009-12-18 20:54 UTC (permalink / raw)
To: lvm-devel
Takahiro Yasui [tyasui at redhat.com] wrote:
> > IMHO, suspend/resume is doing two things if the kernel code blocks on
> > failure -- 1) letting the kernel module unblock 2) start resync. We
> > should separate those two actions. Maybe, we should do something here???
> > Have an ioctl to start resync or have a message to unblock....
>
> I'm sorry I don't get your point to separate unblock and resync.
> Currently "unblock" is done by "suspend" and resync is done by "resume"
> We need to reset log_failure/leg_failre, but anything else?
Let us talk about suspend with noflush as the other case would fail the
I/O. The suspend just REQUEUE's, so it really needs resume to send the
I/O down to a surviving leg. As far as the application goes, the I/O
gets blocked until the resume, anyway.
Also in do_writes(), we block any I/O that is going to a nosync region.
If I understand, The recent block on error patch just fails to do any
I/O on a "nosync" region. In other words, we don't entertain any mirror
with leg failures and still continue to function as a mirror!
We can reset log_failure/leg_failure in resume to make them work for now
as you say.
--Malahal.
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 2 of 4] Handle transient secondary mirror leg failures
2009-12-18 17:10 ` Jonathan Brassow
2009-12-18 18:25 ` Takahiro Yasui
@ 2009-12-18 18:35 ` malahal
1 sibling, 0 replies; 17+ messages in thread
From: malahal @ 2009-12-18 18:35 UTC (permalink / raw)
To: lvm-devel
Jonathan Brassow [jbrassow at redhat.com] wrote:
> hmmm, I wonder how well this is going to work.
>
> 1) I've seen code ('dm_task_suppress_identical_reload') that
> suppresses table reloads if the tables are identical - will that
> obstruct what you are trying to do here?
>
> 2) If you don't get a new table loaded, it will behave as a suspend/
> resume only. Recent code changes in dm-raid1.c are causing
> 'log_failure' and 'leg_failure' to not be reset in those cases. IOW,
> all these steps could be for nothing. :(
Initially I did test with a modified dm-raid1.c that reset the device
status (error count too) in resume() function. I wasn't using "dmsetup
reload" at that time. Later, I did add "dmsetup reload" and removed my
patch in dm-raid1.c.
It worked fine, so either I am not hitting
dm_task_suppress_identical_reload or some how I failed to revert my
patch in the kernel.
> 3) How does this work in a cluster? dmsetup is not cluster-aware. If
> you don't go through the LVM commands (I recognize the additional
> pain), none of this will be coordinated.
Tried to use "lvchange --refresh" and that didn't work too well. In the
last call, Alasdair indicated I should add something like
"--top-layer-only" option to "lvchange --refresh" command.
Thank you for the review.
--Malahal.
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 3 of 4] Add dm_event_set_timeout/dm_event_unset_timeout interface
2009-12-13 9:18 [PATCH 0 of 4] Re-integrate a failed secondary mirror leg Malahal Naineni
2009-12-13 9:18 ` [PATCH 1 of 4] Add more error codes in mirror DSO Malahal Naineni
2009-12-13 9:18 ` [PATCH 2 of 4] Handle transient secondary mirror leg failures Malahal Naineni
@ 2009-12-13 9:18 ` Malahal Naineni
2009-12-22 2:12 ` malahal
2009-12-13 9:18 ` [PATCH 4 of 4] Attempt to resync a failed secondary leg few times before giving up Malahal Naineni
3 siblings, 1 reply; 17+ messages in thread
From: Malahal Naineni @ 2009-12-13 9:18 UTC (permalink / raw)
To: lvm-devel
Will be used in mirror DSO to handle transient device failures.
Signed-off-by: Malahal Naineni <malahal@us.ibm.com>
diff -r 1e369d480df0 -r 1c364c686b5e daemons/dmeventd/.exported_symbols
--- a/daemons/dmeventd/.exported_symbols Sun Dec 13 01:17:52 2009 -0800
+++ b/daemons/dmeventd/.exported_symbols Sun Dec 13 01:17:57 2009 -0800
@@ -17,3 +17,5 @@ dm_event_unregister_handler
dm_event_get_registered_device
dm_event_handler_set_timeout
dm_event_handler_get_timeout
+dm_event_set_timeout
+dm_event_unset_timeout
diff -r 1e369d480df0 -r 1c364c686b5e daemons/dmeventd/dmeventd.c
--- a/daemons/dmeventd/dmeventd.c Sun Dec 13 01:17:52 2009 -0800
+++ b/daemons/dmeventd/dmeventd.c Sun Dec 13 01:17:57 2009 -0800
@@ -1164,13 +1164,25 @@ static int _get_next_registered_device(s
static int _set_timeout(struct message_data *message_data)
{
struct thread_status *thread;
+ int ret = 0;
_lock_mutex();
- if ((thread = _lookup_thread_status(message_data)))
- thread->timeout = message_data->timeout.secs;
+ if ((thread = _lookup_thread_status(message_data))) {
+ /* INT_MAX timeout is used to unregister timeout,
+ * register timeout for any other value. */
+ thread->events &= ~DM_EVENT_TIMEOUT;
+ _unregister_for_timeout(thread);
+ if (message_data->timeout.secs == INT_MAX) {
+ thread->timeout = 0;
+ } else {
+ thread->events |= DM_EVENT_TIMEOUT;
+ thread->timeout = message_data->timeout.secs;
+ ret = _register_for_timeout(thread);
+ }
+ }
_unlock_mutex();
- return thread ? 0 : -ENODEV;
+ return (thread && ret == 0) ? 0 : -ENODEV;
}
static int _get_timeout(struct message_data *message_data)
diff -r 1e369d480df0 -r 1c364c686b5e daemons/dmeventd/libdevmapper-event.c
--- a/daemons/dmeventd/libdevmapper-event.c Sun Dec 13 01:17:52 2009 -0800
+++ b/daemons/dmeventd/libdevmapper-event.c Sun Dec 13 01:17:57 2009 -0800
@@ -770,27 +770,90 @@ int dm_event_get_registered_device(struc
return ret;
}
-#if 0 /* left out for now */
+/* Get uuid of a device */
+static struct dm_task *_get_device_info_by_name(const char *dev_name)
+{
+ struct dm_task *dmt;
+ struct dm_info info;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_INFO))) {
+ log_error("_get_device_info: dm_task creation for info failed");
+ return NULL;
+ }
+
+ dm_task_set_name(dmt, dev_name);
+
+ /* FIXME Add name or uuid or devno to messages */
+ if (!dm_task_run(dmt)) {
+ log_error("_get_device_info: dm_task_run() failed");
+ goto failed;
+ }
+
+ if (!dm_task_get_info(dmt, &info)) {
+ log_error("_get_device_info: failed to get info for device");
+ goto failed;
+ }
+
+ if (!info.exists) {
+ log_error("_get_device_info: device not found");
+ goto failed;
+ }
+
+ return dmt;
+
+failed:
+ dm_task_destroy(dmt);
+ return NULL;
+}
+
+int dm_event_unset_timeout(const char *dev_name)
+{
+ /* timeout of INT_MAX is used as no timeout that removes the
+ * timer. INT_MAX is chosen instead of -1 as atoi() is used in
+ * dmeventd.c */
+ return dm_event_set_timeout(dev_name, INT_MAX);
+}
+
+int dm_event_set_timeout(const char *dev_name, uint32_t timeout)
+{
+ int ret = 1, err;
+ const char *uuid;
+ struct dm_task *dmt;
+ struct dm_event_daemon_message msg = { 0, 0, NULL };
+
+ if (!(dmt = _get_device_info_by_name(dev_name))) {
+ stack;
+ return 0;
+ }
+
+ uuid = dm_task_get_uuid(dmt);
+
+ if ((err = _do_event(DM_EVENT_CMD_SET_TIMEOUT, &msg,
+ NULL, uuid, 0, timeout)) < 0) {
+ log_error("%s: event set timeout failed: %s",
+ dm_task_get_name(dmt),
+ msg.data ? msg.data : strerror(-err));
+ ret = 0;
+ }
+
+ if (msg.data)
+ dm_free(msg.data);
+
+ dm_task_destroy(dmt);
+
+ return ret;
+}
+
+#if 0
static char *_skip_string(char *src, const int delimiter)
{
- src = srtchr(src, delimiter);
+ src = strchr(src, delimiter);
if (src && *(src + 1))
return src + 1;
return NULL;
}
-int dm_event_set_timeout(const char *device_path, uint32_t timeout)
-{
- struct dm_event_daemon_message msg = { 0, 0, NULL };
-
- if (!device_exists(device_path))
- return -ENODEV;
-
- return _do_event(DM_EVENT_CMD_SET_TIMEOUT, &msg,
- NULL, device_path, 0, timeout);
-}
-
int dm_event_get_timeout(const char *device_path, uint32_t *timeout)
{
int ret;
diff -r 1e369d480df0 -r 1c364c686b5e daemons/dmeventd/libdevmapper-event.h
--- a/daemons/dmeventd/libdevmapper-event.h Sun Dec 13 01:17:52 2009 -0800
+++ b/daemons/dmeventd/libdevmapper-event.h Sun Dec 13 01:17:57 2009 -0800
@@ -102,5 +102,7 @@ void process_event(struct dm_task *dmt,
int register_device(const char *device_name, const char *uuid, int major, int minor, void **user);
int unregister_device(const char *device_name, const char *uuid, int major,
int minor, void **user);
+int dm_event_set_timeout(const char *dev_name, uint32_t timeout);
+int dm_event_unset_timeout(const char *dev_name);
#endif
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH 3 of 4] Add dm_event_set_timeout/dm_event_unset_timeout interface
2009-12-13 9:18 ` [PATCH 3 of 4] Add dm_event_set_timeout/dm_event_unset_timeout interface Malahal Naineni
@ 2009-12-22 2:12 ` malahal
2009-12-22 10:51 ` Alasdair G Kergon
0 siblings, 1 reply; 17+ messages in thread
From: malahal @ 2009-12-22 2:12 UTC (permalink / raw)
To: lvm-devel
Malahal Naineni [malahal at us.ibm.com] wrote:
> Will be used in mirror DSO to handle transient device failures.
Alasdair suggested to use -1 instead instead of INT_MAX to unregister a
timeout. This patch supersedes the previous patch.
==========================================================
Add dm_event_set_timeout/dm_event_unset_timeout interface
Will be used in mirror DSO to handle transient device failures.
Signed-off-by: Malahal Naineni <malahal@us.ibm.com>
diff -r c11c9f0a04f8 -r b3c48f241dfd daemons/dmeventd/.exported_symbols
--- a/daemons/dmeventd/.exported_symbols Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/.exported_symbols Mon Dec 21 17:57:37 2009 -0800
@@ -17,3 +17,5 @@ dm_event_unregister_handler
dm_event_get_registered_device
dm_event_handler_set_timeout
dm_event_handler_get_timeout
+dm_event_set_timeout
+dm_event_unset_timeout
diff -r c11c9f0a04f8 -r b3c48f241dfd daemons/dmeventd/dmeventd.c
--- a/daemons/dmeventd/dmeventd.c Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/dmeventd.c Mon Dec 21 17:57:37 2009 -0800
@@ -155,7 +155,7 @@ struct message_data {
} events;
union {
char *str;
- uint32_t secs;
+ int32_t secs;
} timeout;
struct dm_event_daemon_message *msg; /* Pointer to message buffer. */
};
@@ -187,7 +187,7 @@ struct thread_status {
enum dm_event_mask current_events; /* bitfield for occured events. */
struct dm_task *current_task;
time_t next_time;
- uint32_t timeout;
+ int32_t timeout;
struct dm_list timeout_list;
void *dso_private; /* dso per-thread status variable */
};
@@ -363,7 +363,7 @@ static int _parse_message(struct message
message_data->events.field = i;
}
if (message_data->timeout.str) {
- uint32_t secs = atoi(message_data->timeout.str);
+ int32_t secs = atoi(message_data->timeout.str);
dm_free(message_data->timeout.str);
message_data->timeout.secs = secs ? secs :
DM_EVENT_DEFAULT_TIMEOUT;
@@ -1164,13 +1164,26 @@ static int _get_next_registered_device(s
static int _set_timeout(struct message_data *message_data)
{
struct thread_status *thread;
+ int ret = 0;
_lock_mutex();
- if ((thread = _lookup_thread_status(message_data)))
- thread->timeout = message_data->timeout.secs;
+ if ((thread = _lookup_thread_status(message_data))) {
+ /* A timeout of -1 (actually any negative timeout) is
+ * used to unregister the timeout, register a timeout
+ * for any other value. */
+ thread->events &= ~DM_EVENT_TIMEOUT;
+ _unregister_for_timeout(thread);
+ if (message_data->timeout.secs >= 0) {
+ thread->events |= DM_EVENT_TIMEOUT;
+ thread->timeout = message_data->timeout.secs;
+ ret = _register_for_timeout(thread);
+ } else {
+ thread->timeout = 0;
+ }
+ }
_unlock_mutex();
- return thread ? 0 : -ENODEV;
+ return (thread && ret == 0) ? 0 : -ENODEV;
}
static int _get_timeout(struct message_data *message_data)
diff -r c11c9f0a04f8 -r b3c48f241dfd daemons/dmeventd/libdevmapper-event.c
--- a/daemons/dmeventd/libdevmapper-event.c Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/libdevmapper-event.c Mon Dec 21 17:57:37 2009 -0800
@@ -40,7 +40,7 @@ struct dm_event_handler {
char *uuid;
int major;
int minor;
- uint32_t timeout;
+ int32_t timeout;
enum dm_event_mask mask;
};
@@ -331,11 +331,11 @@ static int _daemon_write(struct dm_event
static int _daemon_talk(struct dm_event_fifos *fifos,
struct dm_event_daemon_message *msg, int cmd,
const char *dso_name, const char *dev_name,
- enum dm_event_mask evmask, uint32_t timeout)
+ enum dm_event_mask evmask, int32_t timeout)
{
const char *dso = dso_name ? dso_name : "";
const char *dev = dev_name ? dev_name : "";
- const char *fmt = "%d:%d %s %s %u %" PRIu32;
+ const char *fmt = "%d:%d %s %s %u %" PRId32;
int msg_size;
memset(msg, 0, sizeof(*msg));
@@ -550,7 +550,7 @@ failed:
/* Handle the event (de)registration call and return negative error codes. */
static int _do_event(int cmd, struct dm_event_daemon_message *msg,
const char *dso_name, const char *dev_name,
- enum dm_event_mask evmask, uint32_t timeout)
+ enum dm_event_mask evmask, int32_t timeout)
{
int ret;
struct dm_event_fifos fifos;
@@ -770,27 +770,89 @@ int dm_event_get_registered_device(struc
return ret;
}
-#if 0 /* left out for now */
+/* Get uuid of a device */
+static struct dm_task *_get_device_info_by_name(const char *dev_name)
+{
+ struct dm_task *dmt;
+ struct dm_info info;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_INFO))) {
+ log_error("_get_device_info: dm_task creation for info failed");
+ return NULL;
+ }
+
+ dm_task_set_name(dmt, dev_name);
+
+ /* FIXME Add name or uuid or devno to messages */
+ if (!dm_task_run(dmt)) {
+ log_error("_get_device_info: dm_task_run() failed");
+ goto failed;
+ }
+
+ if (!dm_task_get_info(dmt, &info)) {
+ log_error("_get_device_info: failed to get info for device");
+ goto failed;
+ }
+
+ if (!info.exists) {
+ log_error("_get_device_info: device not found");
+ goto failed;
+ }
+
+ return dmt;
+
+failed:
+ dm_task_destroy(dmt);
+ return NULL;
+}
+
+int dm_event_unset_timeout(const char *dev_name)
+{
+ /* timeout of -1 is used as no timeout and removes the
+ * timer. */
+ return dm_event_set_timeout(dev_name, -1);
+}
+
+int dm_event_set_timeout(const char *dev_name, int32_t timeout)
+{
+ int ret = 1, err;
+ const char *uuid;
+ struct dm_task *dmt;
+ struct dm_event_daemon_message msg = { 0, 0, NULL };
+
+ if (!(dmt = _get_device_info_by_name(dev_name))) {
+ stack;
+ return 0;
+ }
+
+ uuid = dm_task_get_uuid(dmt);
+
+ if ((err = _do_event(DM_EVENT_CMD_SET_TIMEOUT, &msg,
+ NULL, uuid, 0, timeout)) < 0) {
+ log_error("%s: event set timeout failed: %s",
+ dm_task_get_name(dmt),
+ msg.data ? msg.data : strerror(-err));
+ ret = 0;
+ }
+
+ if (msg.data)
+ dm_free(msg.data);
+
+ dm_task_destroy(dmt);
+
+ return ret;
+}
+
+#if 0
static char *_skip_string(char *src, const int delimiter)
{
- src = srtchr(src, delimiter);
+ src = strchr(src, delimiter);
if (src && *(src + 1))
return src + 1;
return NULL;
}
-int dm_event_set_timeout(const char *device_path, uint32_t timeout)
-{
- struct dm_event_daemon_message msg = { 0, 0, NULL };
-
- if (!device_exists(device_path))
- return -ENODEV;
-
- return _do_event(DM_EVENT_CMD_SET_TIMEOUT, &msg,
- NULL, device_path, 0, timeout);
-}
-
int dm_event_get_timeout(const char *device_path, uint32_t *timeout)
{
int ret;
diff -r c11c9f0a04f8 -r b3c48f241dfd daemons/dmeventd/libdevmapper-event.h
--- a/daemons/dmeventd/libdevmapper-event.h Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/libdevmapper-event.h Mon Dec 21 17:57:37 2009 -0800
@@ -102,5 +102,7 @@ void process_event(struct dm_task *dmt,
int register_device(const char *device_name, const char *uuid, int major, int minor, void **user);
int unregister_device(const char *device_name, const char *uuid, int major,
int minor, void **user);
+int dm_event_set_timeout(const char *dev_name, int32_t timeout);
+int dm_event_unset_timeout(const char *dev_name);
#endif
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH 3 of 4] Add dm_event_set_timeout/dm_event_unset_timeout interface
2009-12-22 2:12 ` malahal
@ 2009-12-22 10:51 ` Alasdair G Kergon
2009-12-23 1:58 ` malahal
0 siblings, 1 reply; 17+ messages in thread
From: Alasdair G Kergon @ 2009-12-22 10:51 UTC (permalink / raw)
To: lvm-devel
On Mon, Dec 21, 2009 at 06:12:55PM -0800, malahal at us.ibm.com wrote:
> Alasdair suggested to use -1 instead instead of INT_MAX to unregister a
> timeout. This patch supersedes the previous patch.
Using a #define, please, rather than hard-coding -1 into the source.
Alasdair
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 3 of 4] Add dm_event_set_timeout/dm_event_unset_timeout interface
2009-12-22 10:51 ` Alasdair G Kergon
@ 2009-12-23 1:58 ` malahal
0 siblings, 0 replies; 17+ messages in thread
From: malahal @ 2009-12-23 1:58 UTC (permalink / raw)
To: lvm-devel
Alasdair G Kergon [agk at redhat.com] wrote:
> Using a #define, please, rather than hard-coding -1 into the source.
Thank you. The following patch introduces DM_EVENT_REMOVE_TIMEOUT for
that purpose. This patch supersedes the previous patch (both patches are
against same source, so interdiff can be used to find just the
difference)
=============================================================================
Add dm_event_set_timeout/dm_event_unset_timeout interface
Will be used in mirror DSO to handle transient device failures.
Signed-off-by: Malahal Naineni <malahal@us.ibm.com>
diff -r c11c9f0a04f8 -r 0b6ad3b52702 daemons/dmeventd/.exported_symbols
--- a/daemons/dmeventd/.exported_symbols Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/.exported_symbols Tue Dec 22 15:53:11 2009 -0800
@@ -17,3 +17,5 @@ dm_event_unregister_handler
dm_event_get_registered_device
dm_event_handler_set_timeout
dm_event_handler_get_timeout
+dm_event_set_timeout
+dm_event_unset_timeout
diff -r c11c9f0a04f8 -r 0b6ad3b52702 daemons/dmeventd/dmeventd.c
--- a/daemons/dmeventd/dmeventd.c Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/dmeventd.c Tue Dec 22 15:53:11 2009 -0800
@@ -155,7 +155,7 @@ struct message_data {
} events;
union {
char *str;
- uint32_t secs;
+ int32_t secs;
} timeout;
struct dm_event_daemon_message *msg; /* Pointer to message buffer. */
};
@@ -187,7 +187,7 @@ struct thread_status {
enum dm_event_mask current_events; /* bitfield for occured events. */
struct dm_task *current_task;
time_t next_time;
- uint32_t timeout;
+ int32_t timeout;
struct dm_list timeout_list;
void *dso_private; /* dso per-thread status variable */
};
@@ -363,7 +363,7 @@ static int _parse_message(struct message
message_data->events.field = i;
}
if (message_data->timeout.str) {
- uint32_t secs = atoi(message_data->timeout.str);
+ int32_t secs = atoi(message_data->timeout.str);
dm_free(message_data->timeout.str);
message_data->timeout.secs = secs ? secs :
DM_EVENT_DEFAULT_TIMEOUT;
@@ -1164,13 +1164,26 @@ static int _get_next_registered_device(s
static int _set_timeout(struct message_data *message_data)
{
struct thread_status *thread;
+ int ret = 0;
_lock_mutex();
- if ((thread = _lookup_thread_status(message_data)))
- thread->timeout = message_data->timeout.secs;
+ if ((thread = _lookup_thread_status(message_data))) {
+ /* A timeout of DM_EVENT_REMOVE_TIMEOUT is used to
+ * unregister the timeout, register a timeout for any
+ * other value. */
+ thread->events &= ~DM_EVENT_TIMEOUT;
+ _unregister_for_timeout(thread);
+ if (message_data->timeout.secs != DM_EVENT_REMOVE_TIMEOUT) {
+ thread->events |= DM_EVENT_TIMEOUT;
+ thread->timeout = message_data->timeout.secs;
+ ret = _register_for_timeout(thread);
+ } else {
+ thread->timeout = 0;
+ }
+ }
_unlock_mutex();
- return thread ? 0 : -ENODEV;
+ return (thread && ret == 0) ? 0 : -ENODEV;
}
static int _get_timeout(struct message_data *message_data)
diff -r c11c9f0a04f8 -r 0b6ad3b52702 daemons/dmeventd/dmeventd.h
--- a/daemons/dmeventd/dmeventd.h Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/dmeventd.h Tue Dec 22 15:53:11 2009 -0800
@@ -24,6 +24,7 @@
#define DM_EVENT_PIDFILE "/var/run/dmeventd.pid"
#define DM_EVENT_DEFAULT_TIMEOUT 10
+#define DM_EVENT_REMOVE_TIMEOUT -1 /* used to unschedule a timeout */
/* Commands for the daemon passed in the message below. */
enum dm_event_command {
diff -r c11c9f0a04f8 -r 0b6ad3b52702 daemons/dmeventd/libdevmapper-event.c
--- a/daemons/dmeventd/libdevmapper-event.c Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/libdevmapper-event.c Tue Dec 22 15:53:11 2009 -0800
@@ -40,7 +40,7 @@ struct dm_event_handler {
char *uuid;
int major;
int minor;
- uint32_t timeout;
+ int32_t timeout;
enum dm_event_mask mask;
};
@@ -331,11 +331,11 @@ static int _daemon_write(struct dm_event
static int _daemon_talk(struct dm_event_fifos *fifos,
struct dm_event_daemon_message *msg, int cmd,
const char *dso_name, const char *dev_name,
- enum dm_event_mask evmask, uint32_t timeout)
+ enum dm_event_mask evmask, int32_t timeout)
{
const char *dso = dso_name ? dso_name : "";
const char *dev = dev_name ? dev_name : "";
- const char *fmt = "%d:%d %s %s %u %" PRIu32;
+ const char *fmt = "%d:%d %s %s %u %" PRId32;
int msg_size;
memset(msg, 0, sizeof(*msg));
@@ -550,7 +550,7 @@ failed:
/* Handle the event (de)registration call and return negative error codes. */
static int _do_event(int cmd, struct dm_event_daemon_message *msg,
const char *dso_name, const char *dev_name,
- enum dm_event_mask evmask, uint32_t timeout)
+ enum dm_event_mask evmask, int32_t timeout)
{
int ret;
struct dm_event_fifos fifos;
@@ -770,27 +770,89 @@ int dm_event_get_registered_device(struc
return ret;
}
-#if 0 /* left out for now */
+/* Get uuid of a device */
+static struct dm_task *_get_device_info_by_name(const char *dev_name)
+{
+ struct dm_task *dmt;
+ struct dm_info info;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_INFO))) {
+ log_error("_get_device_info: dm_task creation for info failed");
+ return NULL;
+ }
+
+ dm_task_set_name(dmt, dev_name);
+
+ /* FIXME Add name or uuid or devno to messages */
+ if (!dm_task_run(dmt)) {
+ log_error("_get_device_info: dm_task_run() failed");
+ goto failed;
+ }
+
+ if (!dm_task_get_info(dmt, &info)) {
+ log_error("_get_device_info: failed to get info for device");
+ goto failed;
+ }
+
+ if (!info.exists) {
+ log_error("_get_device_info: device not found");
+ goto failed;
+ }
+
+ return dmt;
+
+failed:
+ dm_task_destroy(dmt);
+ return NULL;
+}
+
+int dm_event_unset_timeout(const char *dev_name)
+{
+ /* timeout value of DM_EVENT_REMOVE_TIMEOUT is used to remove
+ * the timer. */
+ return dm_event_set_timeout(dev_name, DM_EVENT_REMOVE_TIMEOUT);
+}
+
+int dm_event_set_timeout(const char *dev_name, int32_t timeout)
+{
+ int ret = 1, err;
+ const char *uuid;
+ struct dm_task *dmt;
+ struct dm_event_daemon_message msg = { 0, 0, NULL };
+
+ if (!(dmt = _get_device_info_by_name(dev_name))) {
+ stack;
+ return 0;
+ }
+
+ uuid = dm_task_get_uuid(dmt);
+
+ if ((err = _do_event(DM_EVENT_CMD_SET_TIMEOUT, &msg,
+ NULL, uuid, 0, timeout)) < 0) {
+ log_error("%s: event set timeout failed: %s",
+ dm_task_get_name(dmt),
+ msg.data ? msg.data : strerror(-err));
+ ret = 0;
+ }
+
+ if (msg.data)
+ dm_free(msg.data);
+
+ dm_task_destroy(dmt);
+
+ return ret;
+}
+
+#if 0
static char *_skip_string(char *src, const int delimiter)
{
- src = srtchr(src, delimiter);
+ src = strchr(src, delimiter);
if (src && *(src + 1))
return src + 1;
return NULL;
}
-int dm_event_set_timeout(const char *device_path, uint32_t timeout)
-{
- struct dm_event_daemon_message msg = { 0, 0, NULL };
-
- if (!device_exists(device_path))
- return -ENODEV;
-
- return _do_event(DM_EVENT_CMD_SET_TIMEOUT, &msg,
- NULL, device_path, 0, timeout);
-}
-
int dm_event_get_timeout(const char *device_path, uint32_t *timeout)
{
int ret;
diff -r c11c9f0a04f8 -r 0b6ad3b52702 daemons/dmeventd/libdevmapper-event.h
--- a/daemons/dmeventd/libdevmapper-event.h Mon Dec 21 17:40:47 2009 -0800
+++ b/daemons/dmeventd/libdevmapper-event.h Tue Dec 22 15:53:11 2009 -0800
@@ -102,5 +102,7 @@ void process_event(struct dm_task *dmt,
int register_device(const char *device_name, const char *uuid, int major, int minor, void **user);
int unregister_device(const char *device_name, const char *uuid, int major,
int minor, void **user);
+int dm_event_set_timeout(const char *dev_name, int32_t timeout);
+int dm_event_unset_timeout(const char *dev_name);
#endif
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 4 of 4] Attempt to resync a failed secondary leg few times before giving up
2009-12-13 9:18 [PATCH 0 of 4] Re-integrate a failed secondary mirror leg Malahal Naineni
` (2 preceding siblings ...)
2009-12-13 9:18 ` [PATCH 3 of 4] Add dm_event_set_timeout/dm_event_unset_timeout interface Malahal Naineni
@ 2009-12-13 9:18 ` Malahal Naineni
3 siblings, 0 replies; 17+ messages in thread
From: Malahal Naineni @ 2009-12-13 9:18 UTC (permalink / raw)
To: lvm-devel
This patch adds the capability to attempt resync of a failed mirror
device at a given timeout interval and given number of attempts.
Signed-off-by: Malahal Naineni (malahal at us.ibm.com)
diff -r 1c364c686b5e -r bebd880d7476 daemons/dmeventd/plugins/mirror/Makefile.in
--- a/daemons/dmeventd/plugins/mirror/Makefile.in Sun Dec 13 01:17:57 2009 -0800
+++ b/daemons/dmeventd/plugins/mirror/Makefile.in Sun Dec 13 01:17:57 2009 -0800
@@ -32,7 +32,7 @@ LIB_VERSION = $(LIB_VERSION_LVM)
include $(top_builddir)/make.tmpl
-LIBS += -ldevmapper @LIB_PTHREAD@ @LVM2CMD_LIB@
+LIBS += -ldevmapper -ldevmapper-event @LIB_PTHREAD@ @LVM2CMD_LIB@
install_lvm2: libdevmapper-event-lvm2mirror.$(LIB_SUFFIX)
$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) $< \
diff -r 1c364c686b5e -r bebd880d7476 daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
--- a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c Sun Dec 13 01:17:57 2009 -0800
+++ b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c Sun Dec 13 01:17:57 2009 -0800
@@ -68,6 +68,9 @@ enum fault_policy {
struct mirror_device_info {
enum fault_policy fault_policy;
+ int retry_total; /* number of retries before giving up */
+ int retry_current; /* number of retries already tried */
+ int retry_timeout; /* timeout between retry attepts, in seconds */
};
#define CMD_SIZE 256 /* FIXME Use system restriction */
@@ -161,6 +164,16 @@ static enum fault_policy get_mirror_faul
return ret;
}
+static int get_mirror_retry_num()
+{
+ return 10; /* FIXME: make it configurable */
+}
+
+static int get_mirror_retry_timeout()
+{
+ return 30; /* 30 seconds. FIXME: make it configurable */
+}
+
/*
* Currently only one event can be processed at a time.
*/
@@ -305,8 +318,22 @@ static void _temporary_log_fn(int level,
syslog(LOG_DEBUG, "%s", format);
}
+static int start_retry_failed_devices(const char *device,
+ struct mirror_device_info *mirror_info)
+{
+ /* Schedule a timeout for retrying failed devices. Note that
+ * our process_event gets called@every retry_timeout interval
+ * until we remove it by calling dm_event_unset_timeout */
+ return dm_event_set_timeout(device, mirror_info->retry_timeout);
+}
-static int retry_failed_devices(const char *device)
+static int stop_retry_failed_devices(const char *device)
+{
+ return dm_event_unset_timeout(device);
+}
+
+static int retry_failed_devices(const char *device,
+ struct mirror_device_info *mirror_info)
{
int r;
char cmd_str[CMD_SIZE];
@@ -348,6 +375,32 @@ static int retry_failed_devices(const ch
return r;
}
+static int process_timeout(const char *device,
+ struct mirror_device_info *mirror_info)
+{
+ int ret;
+
+ if (mirror_info->retry_current > mirror_info->retry_total) {
+ syslog(LOG_ERR, "Unable to resync the mirror: %s after %d "
+ "attempts. Giving up.\n", device,
+ mirror_info->retry_total);
+ stop_retry_failed_devices(device);
+ ret = -ENOMEM;
+ } else {
+ mirror_info->retry_current++;
+ syslog(LOG_ERR, "Trying to resync the failed mirror: %s "
+ "attepmt number: %d\n", device,
+ mirror_info->retry_current);
+ ret = retry_failed_devices(device, mirror_info);
+
+ /* If we successfully retried failed device, stop the timer */
+ if (!ret)
+ stop_retry_failed_devices(device);
+ }
+
+ return ret;
+}
+
static int _remove_failed_devices(const char *device)
{
int r;
@@ -384,7 +437,7 @@ static int _remove_failed_devices(const
}
void process_event(struct dm_task *dmt,
- enum dm_event_mask event __attribute((unused)),
+ enum dm_event_mask event,
void **private)
{
void *next = NULL;
@@ -399,6 +452,12 @@ void process_event(struct dm_task *dmt,
syslog(LOG_NOTICE, "Another thread is handling an event. Waiting...");
pthread_mutex_lock(&_event_mutex);
}
+
+ if (event & DM_EVENT_TIMEOUT) {
+ process_timeout(device, mirror_info);
+ goto out;
+ }
+
do {
next = dm_get_next_target(dmt, next, &start, &length,
&target_type, ¶ms);
@@ -421,11 +480,14 @@ void process_event(struct dm_task *dmt,
if (mirror_info->fault_policy == FAULT_POLICY_RETRY &&
(error & ME_SECONDARY_WRITE_FAILURE ||
error & ME_SYNC_FAILURE)) {
- syslog(LOG_ERR, "Retrying the failed mirror "
- "device.\n");
- if (retry_failed_devices(device))
- syslog(LOG_ERR, "Failed to reload the "
- "mirror: %s\n", device);
+ syslog(LOG_ERR, "Start recovery for the "
+ "failed mirror device: %s.\n",
+ device);
+ if (!start_retry_failed_devices(device,
+ mirror_info))
+ syslog(LOG_ERR, "Failed to start retry "
+ "for the failed mirror "
+ "device: %s\n", device);
} else if (_remove_failed_devices(device)) {
/* FIXME Why are all the error return codes unused? Get rid of them? */
syslog(LOG_ERR, "Failed to remove faulty devices in %s\n",
@@ -441,6 +503,11 @@ void process_event(struct dm_task *dmt,
_part_ of the device is in sync
Also, this is not an error
*/
+ if (mirror_info->fault_policy == FAULT_POLICY_RETRY) {
+ /* stop if we scheduled any timeouts for retry */
+ mirror_info->retry_current = 0;
+ stop_retry_failed_devices(device);
+ }
syslog(LOG_NOTICE, "%s is now in-sync\n", device);
} else if (error & ME_READ_FAILURE) {
/* Ignore it for now */
@@ -448,6 +515,7 @@ void process_event(struct dm_task *dmt,
syslog(LOG_INFO, "Unknown event:%u received.\n", error);
} while (next);
+out:
pthread_mutex_unlock(&_event_mutex);
}
@@ -476,6 +544,11 @@ int register_device(const char *device,
goto out;
}
mirror_info->fault_policy = get_mirror_fault_policy();
+ if (mirror_info->fault_policy == FAULT_POLICY_RETRY) {
+ mirror_info->retry_total = get_mirror_retry_num();
+ mirror_info->retry_current = 0;
+ mirror_info->retry_timeout = get_mirror_retry_timeout();
+ }
*private = mirror_info;
if (!_lvm_handle) {
@@ -511,8 +584,12 @@ int unregister_device(const char *device
struct mirror_device_info *mirror_info = *private;
dm_free(mirror_info);
+
pthread_mutex_lock(&_register_mutex);
+ /* Stop the retry timer, if any */
+ stop_retry_failed_devices(device);
+
syslog(LOG_INFO, "No longer monitoring mirror device %s for events\n",
device);
^ permalink raw reply [flat|nested] 17+ messages in thread