* [Cluster-devel] [PATCH] dlm_controld: trigger network interface failover if a communications error is detected
@ 2019-06-11 13:21 David Windsor
0 siblings, 0 replies; only message in thread
From: David Windsor @ 2019-06-11 13:21 UTC (permalink / raw)
To: cluster-devel.redhat.com
Support for automatic failover in the face of network interruptions
is being added to the DLM kernel component [1]. This patch aids in that
effort by adding a mechanism whereby userspace can request the DLM
kernel component switch to the next usable network interface.
When --failover is set, dlm_controld will write to a configfs node that
alerts the DLM kernel component to the fact that a communications error
has occurred in userspace. The kernel then reinitializes the DLM
communications stack, binding to the next usable network interface.
The kernel implements a round-robin mechanism for selecting the next
network interface. If necessary, other interface selection heuristics
may be added later.
[1] https://www.redhat.com/archives/cluster-devel/2019-January/msg00009.html
Signed-off-by: David Windsor <dwindsor@redhat.com>
diff --git a/dlm_controld/action.c b/dlm_controld/action.c
index ecd0d022..c107092d 100644
--- a/dlm_controld/action.c
+++ b/dlm_controld/action.c
@@ -639,6 +639,28 @@ int add_configfs_node(int nodeid, char *addr, int addrlen, int local)
}
close(fd);
+ /*
+ * set failover policy
+ */
+ if opt(failover_ind) {
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/failover", COMMS_DIR);
+
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ log_error("%s: open failed: %d", path, errno);
+ return -1;
+ }
+
+ rv = do_write(fd, (void *)"1", strlen("1"));
+ if (rv < 0) {
+ log_error("%s: write failed: %d", path, errno);
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ }
+
/*
* set local
*/
@@ -681,6 +703,7 @@ int add_configfs_node(int nodeid, char *addr, int addrlen, int local)
}
close(fd);
}
+
out:
return 0;
}
@@ -907,6 +930,34 @@ int setup_configfs_members(void)
return 0;
}
+/*
+ * Write to the configfs node triggering a switch to the next DLM
+ * failover network interface.
+ */
+int configfs_next_addr(void)
+{
+ int fd, rv;
+ char path[PATH_MAX];
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/error", COMMS_DIR);
+
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ log_error("%s: open failed: %d", path, errno);
+ return -1;
+ }
+
+ rv = do_write(fd, (void *)"1", strlen("1"));
+ if (rv < 0) {
+ log_error("%s: write failed: %d", path, errno);
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ return 0;
+}
+
static void find_minors(void)
{
FILE *fl;
diff --git a/dlm_controld/dlm.conf.5 b/dlm_controld/dlm.conf.5
index 09492176..f086dfb1 100644
--- a/dlm_controld/dlm.conf.5
+++ b/dlm_controld/dlm.conf.5
@@ -40,6 +40,8 @@ protocol
.br
bind_all
.br
+failover
+.br
debug_logfile
.br
enable_plock
diff --git a/dlm_controld/dlm_daemon.h b/dlm_controld/dlm_daemon.h
index 3221e19c..9f244fd0 100644
--- a/dlm_controld/dlm_daemon.h
+++ b/dlm_controld/dlm_daemon.h
@@ -96,6 +96,7 @@ enum {
protocol_ind,
debug_logfile_ind,
bind_all_ind,
+ failover_ind,
enable_fscontrol_ind,
enable_plock_ind,
plock_debug_ind,
@@ -363,6 +364,7 @@ void del_configfs_node(int nodeid);
void clear_configfs(void);
int setup_configfs_options(void);
int setup_configfs_members(void);
+int configfs_next_addr(void);
int check_uncontrolled_lockspaces(void);
int setup_misc_devices(void);
int path_exists(const char *path);
diff --git a/dlm_controld/main.c b/dlm_controld/main.c
index 8be6a4bc..ca19eac9 100644
--- a/dlm_controld/main.c
+++ b/dlm_controld/main.c
@@ -1501,6 +1501,7 @@ static int loop(void)
if (pollfd[i].revents & (POLLERR | POLLHUP | POLLNVAL)) {
deadfn = client[i].deadfn;
deadfn(i);
+ configfs_next_addr();
}
}
query_unlock();
@@ -1732,6 +1733,11 @@ static void set_opt_defaults(void)
0, NULL,
""); /* do not advertise */
+ set_opt_default(failover_ind,
+ "failover", '\0', req_arg_int,
+ 0, NULL,
+ ""); /* do not advertise */
+
set_opt_default(debug_logfile_ind,
"debug_logfile", 'L', no_arg,
0, NULL,
@@ -2096,4 +2102,3 @@ int main(int argc, char **argv)
unlink_lockfile(fd, RUNDIR, RUN_FILE_NAME);
return rv < 0 ? 1 : 0;
}
-
--
2.21.0
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2019-06-11 13:21 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-06-11 13:21 [Cluster-devel] [PATCH] dlm_controld: trigger network interface failover if a communications error is detected David Windsor
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).