cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: David Windsor <dwindsor@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH] dlm_controld: trigger network interface failover if a communications error is detected
Date: Tue, 11 Jun 2019 09:21:45 -0400	[thread overview]
Message-ID: <20190611132145.6840-1-dwindsor@redhat.com> (raw)

Support for automatic failover in the face of network interruptions
is being added to the DLM kernel component [1].  This patch aids in that
effort by adding a mechanism whereby userspace can request the DLM
kernel component switch to the next usable network interface.

When --failover is set, dlm_controld will write to a configfs node that
alerts the DLM kernel component to the fact that a communications error
has occurred in userspace.  The kernel then reinitializes the DLM
communications stack, binding to the next usable network interface.
The kernel implements a round-robin mechanism for selecting the next
network interface.  If necessary, other interface selection heuristics
may be added later.

[1] https://www.redhat.com/archives/cluster-devel/2019-January/msg00009.html

Signed-off-by: David Windsor <dwindsor@redhat.com>

diff --git a/dlm_controld/action.c b/dlm_controld/action.c
index ecd0d022..c107092d 100644
--- a/dlm_controld/action.c
+++ b/dlm_controld/action.c
@@ -639,6 +639,28 @@ int add_configfs_node(int nodeid, char *addr, int addrlen, int local)
 	}
 	close(fd);
 
+	/*
+	 * set failover policy
+	 */
+	if opt(failover_ind) {
+		memset(path, 0, PATH_MAX);
+		snprintf(path, PATH_MAX, "%s/failover", COMMS_DIR);
+
+		fd = open(path, O_WRONLY);
+		if (fd < 0) {
+			log_error("%s: open failed: %d", path, errno);
+			return -1;
+		}
+
+		rv = do_write(fd, (void *)"1", strlen("1"));
+		if (rv < 0) {
+			log_error("%s: write failed: %d", path, errno);
+			close(fd);
+			return -1;
+		}
+		close(fd);
+	}
+
 	/*
 	 * set local
 	 */
@@ -681,6 +703,7 @@ int add_configfs_node(int nodeid, char *addr, int addrlen, int local)
 		}
 		close(fd);
 	}
+
  out:
 	return 0;
 }
@@ -907,6 +930,34 @@ int setup_configfs_members(void)
 	return 0;
 }
 
+/*
+ * Write to the configfs node triggering a switch to the next DLM
+ * failover network interface.
+ */
+int configfs_next_addr(void)
+{
+	int fd, rv;
+	char path[PATH_MAX];
+
+        memset(path, 0, PATH_MAX);
+        snprintf(path, PATH_MAX, "%s/error", COMMS_DIR);
+
+        fd = open(path, O_WRONLY);
+        if (fd < 0) {
+                log_error("%s: open failed: %d", path, errno);
+                return -1;
+        }
+
+        rv = do_write(fd, (void *)"1", strlen("1"));
+        if (rv < 0) {
+                log_error("%s: write failed: %d", path, errno);
+                close(fd);
+                return -1;
+        }
+        close(fd);
+	return 0;
+}
+
 static void find_minors(void)
 {
 	FILE *fl;
diff --git a/dlm_controld/dlm.conf.5 b/dlm_controld/dlm.conf.5
index 09492176..f086dfb1 100644
--- a/dlm_controld/dlm.conf.5
+++ b/dlm_controld/dlm.conf.5
@@ -40,6 +40,8 @@ protocol
 .br
 bind_all
 .br
+failover
+.br
 debug_logfile
 .br
 enable_plock
diff --git a/dlm_controld/dlm_daemon.h b/dlm_controld/dlm_daemon.h
index 3221e19c..9f244fd0 100644
--- a/dlm_controld/dlm_daemon.h
+++ b/dlm_controld/dlm_daemon.h
@@ -96,6 +96,7 @@ enum {
         protocol_ind,
         debug_logfile_ind,
 	bind_all_ind,
+	failover_ind,
         enable_fscontrol_ind,
         enable_plock_ind,
         plock_debug_ind,
@@ -363,6 +364,7 @@ void del_configfs_node(int nodeid);
 void clear_configfs(void);
 int setup_configfs_options(void);
 int setup_configfs_members(void);
+int configfs_next_addr(void);
 int check_uncontrolled_lockspaces(void);
 int setup_misc_devices(void);
 int path_exists(const char *path);
diff --git a/dlm_controld/main.c b/dlm_controld/main.c
index 8be6a4bc..ca19eac9 100644
--- a/dlm_controld/main.c
+++ b/dlm_controld/main.c
@@ -1501,6 +1501,7 @@ static int loop(void)
 			if (pollfd[i].revents & (POLLERR | POLLHUP | POLLNVAL)) {
 				deadfn = client[i].deadfn;
 				deadfn(i);
+				configfs_next_addr();
 			}
 		}
 		query_unlock();
@@ -1732,6 +1733,11 @@ static void set_opt_defaults(void)
 			0, NULL,
 			""); /* do not advertise */
 
+	set_opt_default(failover_ind,
+			"failover", '\0', req_arg_int,
+			0, NULL,
+			""); /* do not advertise */
+
 	set_opt_default(debug_logfile_ind,
 			"debug_logfile", 'L', no_arg,
 			0, NULL,
@@ -2096,4 +2102,3 @@ int main(int argc, char **argv)
 	unlink_lockfile(fd, RUNDIR, RUN_FILE_NAME);
 	return rv < 0 ? 1 : 0;
 }
-
-- 
2.21.0



                 reply	other threads:[~2019-06-11 13:21 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190611132145.6840-1-dwindsor@redhat.com \
    --to=dwindsor@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).